xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision fc9b51d31e2be1d1d6cc93760bb648055e237fa2)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 #include <petscblaslapack.h>
4 #include <petscsf.h>
5 
6 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
7 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat);
8 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
9 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
10 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
11 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
12 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec);
14 
15 #undef __FUNCT__
16 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
17 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
18 {
19   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
20   PetscErrorCode ierr;
21   PetscInt       i,*idxb = 0;
22   PetscScalar    *va,*vb;
23   Vec            vtmp;
24 
25   PetscFunctionBegin;
26   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
27   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
28   if (idx) {
29     for (i=0; i<A->rmap->n; i++) {
30       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
31     }
32   }
33 
34   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
35   if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);}
36   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
37   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
38 
39   for (i=0; i<A->rmap->n; i++) {
40     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
41       va[i] = vb[i];
42       if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);
43     }
44   }
45 
46   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
47   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
48   ierr = PetscFree(idxb);CHKERRQ(ierr);
49   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
50   PetscFunctionReturn(0);
51 }
52 
53 #undef __FUNCT__
54 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
55 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
56 {
57   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
58   PetscErrorCode ierr;
59 
60   PetscFunctionBegin;
61   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
62   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
63   PetscFunctionReturn(0);
64 }
65 
66 #undef __FUNCT__
67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
68 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
69 {
70   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
71   PetscErrorCode ierr;
72 
73   PetscFunctionBegin;
74   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
75   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
76   PetscFunctionReturn(0);
77 }
78 
79 /*
80      Local utility routine that creates a mapping from the global column
81    number to the local number in the off-diagonal part of the local
82    storage of the matrix.  This is done in a non scalable way since the
83    length of colmap equals the global matrix length.
84 */
85 #undef __FUNCT__
86 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private"
87 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
88 {
89   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
90   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
91   PetscErrorCode ierr;
92   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
93 
94   PetscFunctionBegin;
95 #if defined(PETSC_USE_CTABLE)
96   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
97   for (i=0; i<nbs; i++) {
98     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
99   }
100 #else
101   ierr = PetscMalloc1((baij->Nbs+1),&baij->colmap);CHKERRQ(ierr);
102   ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
103   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
104   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
105 #endif
106   PetscFunctionReturn(0);
107 }
108 
109 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
110   { \
111  \
112     brow = row/bs;  \
113     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
114     rmax = aimax[brow]; nrow = ailen[brow]; \
115     bcol = col/bs; \
116     ridx = row % bs; cidx = col % bs; \
117     low  = 0; high = nrow; \
118     while (high-low > 3) { \
119       t = (low+high)/2; \
120       if (rp[t] > bcol) high = t; \
121       else              low  = t; \
122     } \
123     for (_i=low; _i<high; _i++) { \
124       if (rp[_i] > bcol) break; \
125       if (rp[_i] == bcol) { \
126         bap = ap +  bs2*_i + bs*cidx + ridx; \
127         if (addv == ADD_VALUES) *bap += value;  \
128         else                    *bap  = value;  \
129         goto a_noinsert; \
130       } \
131     } \
132     if (a->nonew == 1) goto a_noinsert; \
133     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
134     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
135     N = nrow++ - 1;  \
136     /* shift up all the later entries in this row */ \
137     for (ii=N; ii>=_i; ii--) { \
138       rp[ii+1] = rp[ii]; \
139       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
140     } \
141     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
142     rp[_i]                      = bcol;  \
143     ap[bs2*_i + bs*cidx + ridx] = value;  \
144 a_noinsert:; \
145     ailen[brow] = nrow; \
146   }
147 
148 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
149   { \
150     brow = row/bs;  \
151     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
152     rmax = bimax[brow]; nrow = bilen[brow]; \
153     bcol = col/bs; \
154     ridx = row % bs; cidx = col % bs; \
155     low  = 0; high = nrow; \
156     while (high-low > 3) { \
157       t = (low+high)/2; \
158       if (rp[t] > bcol) high = t; \
159       else              low  = t; \
160     } \
161     for (_i=low; _i<high; _i++) { \
162       if (rp[_i] > bcol) break; \
163       if (rp[_i] == bcol) { \
164         bap = ap +  bs2*_i + bs*cidx + ridx; \
165         if (addv == ADD_VALUES) *bap += value;  \
166         else                    *bap  = value;  \
167         goto b_noinsert; \
168       } \
169     } \
170     if (b->nonew == 1) goto b_noinsert; \
171     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
172     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
173     N = nrow++ - 1;  \
174     /* shift up all the later entries in this row */ \
175     for (ii=N; ii>=_i; ii--) { \
176       rp[ii+1] = rp[ii]; \
177       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
178     } \
179     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
180     rp[_i]                      = bcol;  \
181     ap[bs2*_i + bs*cidx + ridx] = value;  \
182 b_noinsert:; \
183     bilen[brow] = nrow; \
184   }
185 
186 #undef __FUNCT__
187 #define __FUNCT__ "MatSetValues_MPIBAIJ"
188 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
189 {
190   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
191   MatScalar      value;
192   PetscBool      roworiented = baij->roworiented;
193   PetscErrorCode ierr;
194   PetscInt       i,j,row,col;
195   PetscInt       rstart_orig=mat->rmap->rstart;
196   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
197   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
198 
199   /* Some Variables required in the macro */
200   Mat         A     = baij->A;
201   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
202   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
203   MatScalar   *aa   =a->a;
204 
205   Mat         B     = baij->B;
206   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
207   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
208   MatScalar   *ba   =b->a;
209 
210   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
211   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
212   MatScalar *ap,*bap;
213 
214   PetscFunctionBegin;
215   if (v) PetscValidScalarPointer(v,6);
216   for (i=0; i<m; i++) {
217     if (im[i] < 0) continue;
218 #if defined(PETSC_USE_DEBUG)
219     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
220 #endif
221     if (im[i] >= rstart_orig && im[i] < rend_orig) {
222       row = im[i] - rstart_orig;
223       for (j=0; j<n; j++) {
224         if (in[j] >= cstart_orig && in[j] < cend_orig) {
225           col = in[j] - cstart_orig;
226           if (roworiented) value = v[i*n+j];
227           else             value = v[i+j*m];
228           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
229           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
230         } else if (in[j] < 0) continue;
231 #if defined(PETSC_USE_DEBUG)
232         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
233 #endif
234         else {
235           if (mat->was_assembled) {
236             if (!baij->colmap) {
237               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
238             }
239 #if defined(PETSC_USE_CTABLE)
240             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
241             col  = col - 1;
242 #else
243             col = baij->colmap[in[j]/bs] - 1;
244 #endif
245             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
246               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
247               col  =  in[j];
248               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
249               B    = baij->B;
250               b    = (Mat_SeqBAIJ*)(B)->data;
251               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
252               ba   =b->a;
253             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
254             else col += in[j]%bs;
255           } else col = in[j];
256           if (roworiented) value = v[i*n+j];
257           else             value = v[i+j*m];
258           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
259           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
260         }
261       }
262     } else {
263       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
264       if (!baij->donotstash) {
265         mat->assembled = PETSC_FALSE;
266         if (roworiented) {
267           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
268         } else {
269           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
270         }
271       }
272     }
273   }
274   PetscFunctionReturn(0);
275 }
276 
277 #undef __FUNCT__
278 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
279 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
280 {
281   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
282   const PetscScalar *value;
283   MatScalar         *barray     = baij->barray;
284   PetscBool         roworiented = baij->roworiented;
285   PetscErrorCode    ierr;
286   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
287   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
288   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
289 
290   PetscFunctionBegin;
291   if (!barray) {
292     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
293     baij->barray = barray;
294   }
295 
296   if (roworiented) stepval = (n-1)*bs;
297   else stepval = (m-1)*bs;
298 
299   for (i=0; i<m; i++) {
300     if (im[i] < 0) continue;
301 #if defined(PETSC_USE_DEBUG)
302     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
303 #endif
304     if (im[i] >= rstart && im[i] < rend) {
305       row = im[i] - rstart;
306       for (j=0; j<n; j++) {
307         /* If NumCol = 1 then a copy is not required */
308         if ((roworiented) && (n == 1)) {
309           barray = (MatScalar*)v + i*bs2;
310         } else if ((!roworiented) && (m == 1)) {
311           barray = (MatScalar*)v + j*bs2;
312         } else { /* Here a copy is required */
313           if (roworiented) {
314             value = v + (i*(stepval+bs) + j)*bs;
315           } else {
316             value = v + (j*(stepval+bs) + i)*bs;
317           }
318           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
319             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
320             barray += bs;
321           }
322           barray -= bs2;
323         }
324 
325         if (in[j] >= cstart && in[j] < cend) {
326           col  = in[j] - cstart;
327           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
328         } else if (in[j] < 0) continue;
329 #if defined(PETSC_USE_DEBUG)
330         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
331 #endif
332         else {
333           if (mat->was_assembled) {
334             if (!baij->colmap) {
335               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
336             }
337 
338 #if defined(PETSC_USE_DEBUG)
339 #if defined(PETSC_USE_CTABLE)
340             { PetscInt data;
341               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
342               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
343             }
344 #else
345             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
346 #endif
347 #endif
348 #if defined(PETSC_USE_CTABLE)
349             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
350             col  = (col - 1)/bs;
351 #else
352             col = (baij->colmap[in[j]] - 1)/bs;
353 #endif
354             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
355               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
356               col  =  in[j];
357             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]);
358           } else col = in[j];
359           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
360         }
361       }
362     } else {
363       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
364       if (!baij->donotstash) {
365         if (roworiented) {
366           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
367         } else {
368           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
369         }
370       }
371     }
372   }
373   PetscFunctionReturn(0);
374 }
375 
376 #define HASH_KEY 0.6180339887
377 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
378 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
379 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
380 #undef __FUNCT__
381 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
382 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
383 {
384   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
385   PetscBool      roworiented = baij->roworiented;
386   PetscErrorCode ierr;
387   PetscInt       i,j,row,col;
388   PetscInt       rstart_orig=mat->rmap->rstart;
389   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
390   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
391   PetscReal      tmp;
392   MatScalar      **HD = baij->hd,value;
393 #if defined(PETSC_USE_DEBUG)
394   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
395 #endif
396 
397   PetscFunctionBegin;
398   if (v) PetscValidScalarPointer(v,6);
399   for (i=0; i<m; i++) {
400 #if defined(PETSC_USE_DEBUG)
401     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
402     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
403 #endif
404     row = im[i];
405     if (row >= rstart_orig && row < rend_orig) {
406       for (j=0; j<n; j++) {
407         col = in[j];
408         if (roworiented) value = v[i*n+j];
409         else             value = v[i+j*m];
410         /* Look up PetscInto the Hash Table */
411         key = (row/bs)*Nbs+(col/bs)+1;
412         h1  = HASH(size,key,tmp);
413 
414 
415         idx = h1;
416 #if defined(PETSC_USE_DEBUG)
417         insert_ct++;
418         total_ct++;
419         if (HT[idx] != key) {
420           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
421           if (idx == size) {
422             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
423             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
424           }
425         }
426 #else
427         if (HT[idx] != key) {
428           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
429           if (idx == size) {
430             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
431             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
432           }
433         }
434 #endif
435         /* A HASH table entry is found, so insert the values at the correct address */
436         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
437         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
438       }
439     } else if (!baij->donotstash) {
440       if (roworiented) {
441         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
442       } else {
443         ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
444       }
445     }
446   }
447 #if defined(PETSC_USE_DEBUG)
448   baij->ht_total_ct  = total_ct;
449   baij->ht_insert_ct = insert_ct;
450 #endif
451   PetscFunctionReturn(0);
452 }
453 
454 #undef __FUNCT__
455 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
456 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
457 {
458   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
459   PetscBool         roworiented = baij->roworiented;
460   PetscErrorCode    ierr;
461   PetscInt          i,j,ii,jj,row,col;
462   PetscInt          rstart=baij->rstartbs;
463   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
464   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
465   PetscReal         tmp;
466   MatScalar         **HD = baij->hd,*baij_a;
467   const PetscScalar *v_t,*value;
468 #if defined(PETSC_USE_DEBUG)
469   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
470 #endif
471 
472   PetscFunctionBegin;
473   if (roworiented) stepval = (n-1)*bs;
474   else stepval = (m-1)*bs;
475 
476   for (i=0; i<m; i++) {
477 #if defined(PETSC_USE_DEBUG)
478     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
479     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
480 #endif
481     row = im[i];
482     v_t = v + i*nbs2;
483     if (row >= rstart && row < rend) {
484       for (j=0; j<n; j++) {
485         col = in[j];
486 
487         /* Look up into the Hash Table */
488         key = row*Nbs+col+1;
489         h1  = HASH(size,key,tmp);
490 
491         idx = h1;
492 #if defined(PETSC_USE_DEBUG)
493         total_ct++;
494         insert_ct++;
495         if (HT[idx] != key) {
496           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
497           if (idx == size) {
498             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
499             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
500           }
501         }
502 #else
503         if (HT[idx] != key) {
504           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
505           if (idx == size) {
506             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
507             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
508           }
509         }
510 #endif
511         baij_a = HD[idx];
512         if (roworiented) {
513           /*value = v + i*(stepval+bs)*bs + j*bs;*/
514           /* value = v + (i*(stepval+bs)+j)*bs; */
515           value = v_t;
516           v_t  += bs;
517           if (addv == ADD_VALUES) {
518             for (ii=0; ii<bs; ii++,value+=stepval) {
519               for (jj=ii; jj<bs2; jj+=bs) {
520                 baij_a[jj] += *value++;
521               }
522             }
523           } else {
524             for (ii=0; ii<bs; ii++,value+=stepval) {
525               for (jj=ii; jj<bs2; jj+=bs) {
526                 baij_a[jj] = *value++;
527               }
528             }
529           }
530         } else {
531           value = v + j*(stepval+bs)*bs + i*bs;
532           if (addv == ADD_VALUES) {
533             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
534               for (jj=0; jj<bs; jj++) {
535                 baij_a[jj] += *value++;
536               }
537             }
538           } else {
539             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
540               for (jj=0; jj<bs; jj++) {
541                 baij_a[jj] = *value++;
542               }
543             }
544           }
545         }
546       }
547     } else {
548       if (!baij->donotstash) {
549         if (roworiented) {
550           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
551         } else {
552           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
553         }
554       }
555     }
556   }
557 #if defined(PETSC_USE_DEBUG)
558   baij->ht_total_ct  = total_ct;
559   baij->ht_insert_ct = insert_ct;
560 #endif
561   PetscFunctionReturn(0);
562 }
563 
564 #undef __FUNCT__
565 #define __FUNCT__ "MatGetValues_MPIBAIJ"
566 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
567 {
568   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
569   PetscErrorCode ierr;
570   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
571   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
572 
573   PetscFunctionBegin;
574   for (i=0; i<m; i++) {
575     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
576     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
577     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
578       row = idxm[i] - bsrstart;
579       for (j=0; j<n; j++) {
580         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
581         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
582         if (idxn[j] >= bscstart && idxn[j] < bscend) {
583           col  = idxn[j] - bscstart;
584           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
585         } else {
586           if (!baij->colmap) {
587             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
588           }
589 #if defined(PETSC_USE_CTABLE)
590           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
591           data--;
592 #else
593           data = baij->colmap[idxn[j]/bs]-1;
594 #endif
595           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
596           else {
597             col  = data + idxn[j]%bs;
598             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
599           }
600         }
601       }
602     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
603   }
604   PetscFunctionReturn(0);
605 }
606 
607 #undef __FUNCT__
608 #define __FUNCT__ "MatNorm_MPIBAIJ"
609 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
610 {
611   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
612   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
613   PetscErrorCode ierr;
614   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
615   PetscReal      sum = 0.0;
616   MatScalar      *v;
617 
618   PetscFunctionBegin;
619   if (baij->size == 1) {
620     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
621   } else {
622     if (type == NORM_FROBENIUS) {
623       v  = amat->a;
624       nz = amat->nz*bs2;
625       for (i=0; i<nz; i++) {
626         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
627       }
628       v  = bmat->a;
629       nz = bmat->nz*bs2;
630       for (i=0; i<nz; i++) {
631         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
632       }
633       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
634       *nrm = PetscSqrtReal(*nrm);
635     } else if (type == NORM_1) { /* max column sum */
636       PetscReal *tmp,*tmp2;
637       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
638       ierr = PetscMalloc2(mat->cmap->N,&tmp,mat->cmap->N,&tmp2);CHKERRQ(ierr);
639       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
640       v    = amat->a; jj = amat->j;
641       for (i=0; i<amat->nz; i++) {
642         for (j=0; j<bs; j++) {
643           col = bs*(cstart + *jj) + j; /* column index */
644           for (row=0; row<bs; row++) {
645             tmp[col] += PetscAbsScalar(*v);  v++;
646           }
647         }
648         jj++;
649       }
650       v = bmat->a; jj = bmat->j;
651       for (i=0; i<bmat->nz; i++) {
652         for (j=0; j<bs; j++) {
653           col = bs*garray[*jj] + j;
654           for (row=0; row<bs; row++) {
655             tmp[col] += PetscAbsScalar(*v); v++;
656           }
657         }
658         jj++;
659       }
660       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
661       *nrm = 0.0;
662       for (j=0; j<mat->cmap->N; j++) {
663         if (tmp2[j] > *nrm) *nrm = tmp2[j];
664       }
665       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
666     } else if (type == NORM_INFINITY) { /* max row sum */
667       PetscReal *sums;
668       ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr);
669       sum  = 0.0;
670       for (j=0; j<amat->mbs; j++) {
671         for (row=0; row<bs; row++) sums[row] = 0.0;
672         v  = amat->a + bs2*amat->i[j];
673         nz = amat->i[j+1]-amat->i[j];
674         for (i=0; i<nz; i++) {
675           for (col=0; col<bs; col++) {
676             for (row=0; row<bs; row++) {
677               sums[row] += PetscAbsScalar(*v); v++;
678             }
679           }
680         }
681         v  = bmat->a + bs2*bmat->i[j];
682         nz = bmat->i[j+1]-bmat->i[j];
683         for (i=0; i<nz; i++) {
684           for (col=0; col<bs; col++) {
685             for (row=0; row<bs; row++) {
686               sums[row] += PetscAbsScalar(*v); v++;
687             }
688           }
689         }
690         for (row=0; row<bs; row++) {
691           if (sums[row] > sum) sum = sums[row];
692         }
693       }
694       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
695       ierr = PetscFree(sums);CHKERRQ(ierr);
696     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
697   }
698   PetscFunctionReturn(0);
699 }
700 
701 /*
702   Creates the hash table, and sets the table
703   This table is created only once.
704   If new entried need to be added to the matrix
705   then the hash table has to be destroyed and
706   recreated.
707 */
708 #undef __FUNCT__
709 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
710 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
711 {
712   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
713   Mat            A     = baij->A,B=baij->B;
714   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
715   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
716   PetscErrorCode ierr;
717   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
718   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
719   PetscInt       *HT,key;
720   MatScalar      **HD;
721   PetscReal      tmp;
722 #if defined(PETSC_USE_INFO)
723   PetscInt ct=0,max=0;
724 #endif
725 
726   PetscFunctionBegin;
727   if (baij->ht) PetscFunctionReturn(0);
728 
729   baij->ht_size = (PetscInt)(factor*nz);
730   ht_size       = baij->ht_size;
731 
732   /* Allocate Memory for Hash Table */
733   ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr);
734   HD   = baij->hd;
735   HT   = baij->ht;
736 
737   /* Loop Over A */
738   for (i=0; i<a->mbs; i++) {
739     for (j=ai[i]; j<ai[i+1]; j++) {
740       row = i+rstart;
741       col = aj[j]+cstart;
742 
743       key = row*Nbs + col + 1;
744       h1  = HASH(ht_size,key,tmp);
745       for (k=0; k<ht_size; k++) {
746         if (!HT[(h1+k)%ht_size]) {
747           HT[(h1+k)%ht_size] = key;
748           HD[(h1+k)%ht_size] = a->a + j*bs2;
749           break;
750 #if defined(PETSC_USE_INFO)
751         } else {
752           ct++;
753 #endif
754         }
755       }
756 #if defined(PETSC_USE_INFO)
757       if (k> max) max = k;
758 #endif
759     }
760   }
761   /* Loop Over B */
762   for (i=0; i<b->mbs; i++) {
763     for (j=bi[i]; j<bi[i+1]; j++) {
764       row = i+rstart;
765       col = garray[bj[j]];
766       key = row*Nbs + col + 1;
767       h1  = HASH(ht_size,key,tmp);
768       for (k=0; k<ht_size; k++) {
769         if (!HT[(h1+k)%ht_size]) {
770           HT[(h1+k)%ht_size] = key;
771           HD[(h1+k)%ht_size] = b->a + j*bs2;
772           break;
773 #if defined(PETSC_USE_INFO)
774         } else {
775           ct++;
776 #endif
777         }
778       }
779 #if defined(PETSC_USE_INFO)
780       if (k> max) max = k;
781 #endif
782     }
783   }
784 
785   /* Print Summary */
786 #if defined(PETSC_USE_INFO)
787   for (i=0,j=0; i<ht_size; i++) {
788     if (HT[i]) j++;
789   }
790   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
791 #endif
792   PetscFunctionReturn(0);
793 }
794 
795 #undef __FUNCT__
796 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
797 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
798 {
799   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
800   PetscErrorCode ierr;
801   PetscInt       nstash,reallocs;
802   InsertMode     addv;
803 
804   PetscFunctionBegin;
805   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
806 
807   /* make sure all processors are either in INSERTMODE or ADDMODE */
808   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
809   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
810   mat->insertmode = addv; /* in case this processor had no cache */
811 
812   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
813   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
814   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
815   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
816   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
817   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
818   PetscFunctionReturn(0);
819 }
820 
821 #undef __FUNCT__
822 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
823 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
824 {
825   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
826   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
827   PetscErrorCode ierr;
828   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
829   PetscInt       *row,*col;
830   PetscBool      r1,r2,r3,other_disassembled;
831   MatScalar      *val;
832   InsertMode     addv = mat->insertmode;
833   PetscMPIInt    n;
834 
835   PetscFunctionBegin;
836   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
837   if (!baij->donotstash && !mat->nooffprocentries) {
838     while (1) {
839       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
840       if (!flg) break;
841 
842       for (i=0; i<n;) {
843         /* Now identify the consecutive vals belonging to the same row */
844         for (j=i,rstart=row[j]; j<n; j++) {
845           if (row[j] != rstart) break;
846         }
847         if (j < n) ncols = j-i;
848         else       ncols = n-i;
849         /* Now assemble all these values with a single function call */
850         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
851         i    = j;
852       }
853     }
854     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
855     /* Now process the block-stash. Since the values are stashed column-oriented,
856        set the roworiented flag to column oriented, and after MatSetValues()
857        restore the original flags */
858     r1 = baij->roworiented;
859     r2 = a->roworiented;
860     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
861 
862     baij->roworiented = PETSC_FALSE;
863     a->roworiented    = PETSC_FALSE;
864 
865     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
866     while (1) {
867       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
868       if (!flg) break;
869 
870       for (i=0; i<n;) {
871         /* Now identify the consecutive vals belonging to the same row */
872         for (j=i,rstart=row[j]; j<n; j++) {
873           if (row[j] != rstart) break;
874         }
875         if (j < n) ncols = j-i;
876         else       ncols = n-i;
877         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
878         i    = j;
879       }
880     }
881     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
882 
883     baij->roworiented = r1;
884     a->roworiented    = r2;
885 
886     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
887   }
888 
889   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
890   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
891 
892   /* determine if any processor has disassembled, if so we must
893      also disassemble ourselfs, in order that we may reassemble. */
894   /*
895      if nonzero structure of submatrix B cannot change then we know that
896      no processor disassembled thus we can skip this stuff
897   */
898   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
899     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
900     if (mat->was_assembled && !other_disassembled) {
901       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
902     }
903   }
904 
905   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
906     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
907   }
908   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
909   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
910 
911 #if defined(PETSC_USE_INFO)
912   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
913     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
914 
915     baij->ht_total_ct  = 0;
916     baij->ht_insert_ct = 0;
917   }
918 #endif
919   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
920     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
921 
922     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
923     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
924   }
925 
926   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
927 
928   baij->rowvalues = 0;
929 
930   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
931   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
932     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
933     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
934   }
935   PetscFunctionReturn(0);
936 }
937 
938 #include <petscdraw.h>
939 #undef __FUNCT__
940 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
941 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
942 {
943   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
944   PetscErrorCode    ierr;
945   PetscMPIInt       size = baij->size,rank = baij->rank;
946   PetscInt          bs   = mat->rmap->bs;
947   PetscBool         iascii,isdraw;
948   PetscViewer       sviewer;
949   PetscViewerFormat format;
950 
951   PetscFunctionBegin;
952   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
953   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
954   if (iascii) {
955     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
956     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
957       MatInfo info;
958       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
959       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
960       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
961       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
962                                                 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
963       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
964       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
965       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
966       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
967       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
968       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
969       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
970       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
971       PetscFunctionReturn(0);
972     } else if (format == PETSC_VIEWER_ASCII_INFO) {
973       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
974       PetscFunctionReturn(0);
975     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
976       PetscFunctionReturn(0);
977     }
978   }
979 
980   if (isdraw) {
981     PetscDraw draw;
982     PetscBool isnull;
983     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
984     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
985   }
986 
987   if (size == 1) {
988     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
989     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
990   } else {
991     /* assemble the entire matrix onto first processor. */
992     Mat         A;
993     Mat_SeqBAIJ *Aloc;
994     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
995     MatScalar   *a;
996 
997     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
998     /* Perhaps this should be the type of mat? */
999     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1000     if (!rank) {
1001       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1002     } else {
1003       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1004     }
1005     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1006     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
1007     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1008     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1009 
1010     /* copy over the A part */
1011     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1012     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1013     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1014 
1015     for (i=0; i<mbs; i++) {
1016       rvals[0] = bs*(baij->rstartbs + i);
1017       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1018       for (j=ai[i]; j<ai[i+1]; j++) {
1019         col = (baij->cstartbs+aj[j])*bs;
1020         for (k=0; k<bs; k++) {
1021           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1022           col++; a += bs;
1023         }
1024       }
1025     }
1026     /* copy over the B part */
1027     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1028     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1029     for (i=0; i<mbs; i++) {
1030       rvals[0] = bs*(baij->rstartbs + i);
1031       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1032       for (j=ai[i]; j<ai[i+1]; j++) {
1033         col = baij->garray[aj[j]]*bs;
1034         for (k=0; k<bs; k++) {
1035           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1036           col++; a += bs;
1037         }
1038       }
1039     }
1040     ierr = PetscFree(rvals);CHKERRQ(ierr);
1041     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1042     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1043     /*
1044        Everyone has to call to draw the matrix since the graphics waits are
1045        synchronized across all processors that share the PetscDraw object
1046     */
1047     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1048     if (!rank) {
1049       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1050       /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/
1051       PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ);
1052       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1053     }
1054     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1055     ierr = MatDestroy(&A);CHKERRQ(ierr);
1056   }
1057   PetscFunctionReturn(0);
1058 }
1059 
1060 #undef __FUNCT__
1061 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1062 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1063 {
1064   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1065   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)a->A->data;
1066   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
1067   PetscErrorCode ierr;
1068   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1069   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1070   int            fd;
1071   PetscScalar    *column_values;
1072   FILE           *file;
1073   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1074   PetscInt       message_count,flowcontrolcount;
1075 
1076   PetscFunctionBegin;
1077   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1078   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1079   nz   = bs2*(A->nz + B->nz);
1080   rlen = mat->rmap->n;
1081   if (!rank) {
1082     header[0] = MAT_FILE_CLASSID;
1083     header[1] = mat->rmap->N;
1084     header[2] = mat->cmap->N;
1085 
1086     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1087     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1088     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1089     /* get largest number of rows any processor has */
1090     range = mat->rmap->range;
1091     for (i=1; i<size; i++) {
1092       rlen = PetscMax(rlen,range[i+1] - range[i]);
1093     }
1094   } else {
1095     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1096   }
1097 
1098   ierr = PetscMalloc1((rlen/bs),&crow_lens);CHKERRQ(ierr);
1099   /* compute lengths of each row  */
1100   for (i=0; i<a->mbs; i++) {
1101     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1102   }
1103   /* store the row lengths to the file */
1104   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1105   if (!rank) {
1106     MPI_Status status;
1107     ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr);
1108     rlen = (range[1] - range[0])/bs;
1109     for (i=0; i<rlen; i++) {
1110       for (j=0; j<bs; j++) {
1111         row_lens[i*bs+j] = bs*crow_lens[i];
1112       }
1113     }
1114     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1115     for (i=1; i<size; i++) {
1116       rlen = (range[i+1] - range[i])/bs;
1117       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1118       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1119       for (k=0; k<rlen; k++) {
1120         for (j=0; j<bs; j++) {
1121           row_lens[k*bs+j] = bs*crow_lens[k];
1122         }
1123       }
1124       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1125     }
1126     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1127     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1128   } else {
1129     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1130     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1131     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1132   }
1133   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1134 
1135   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1136      information needed to make it for each row from a block row. This does require more communication but still not more than
1137      the communication needed for the nonzero values  */
1138   nzmax = nz; /*  space a largest processor needs */
1139   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1140   ierr  = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr);
1141   cnt   = 0;
1142   for (i=0; i<a->mbs; i++) {
1143     pcnt = cnt;
1144     for (j=B->i[i]; j<B->i[i+1]; j++) {
1145       if ((col = garray[B->j[j]]) > cstart) break;
1146       for (l=0; l<bs; l++) {
1147         column_indices[cnt++] = bs*col+l;
1148       }
1149     }
1150     for (k=A->i[i]; k<A->i[i+1]; k++) {
1151       for (l=0; l<bs; l++) {
1152         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1153       }
1154     }
1155     for (; j<B->i[i+1]; j++) {
1156       for (l=0; l<bs; l++) {
1157         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1158       }
1159     }
1160     len = cnt - pcnt;
1161     for (k=1; k<bs; k++) {
1162       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1163       cnt += len;
1164     }
1165   }
1166   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1167 
1168   /* store the columns to the file */
1169   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1170   if (!rank) {
1171     MPI_Status status;
1172     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1173     for (i=1; i<size; i++) {
1174       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1175       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1176       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1177       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1178     }
1179     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1180   } else {
1181     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1182     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1183     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1184     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1185   }
1186   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1187 
1188   /* load up the numerical values */
1189   ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr);
1190   cnt  = 0;
1191   for (i=0; i<a->mbs; i++) {
1192     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1193     for (j=B->i[i]; j<B->i[i+1]; j++) {
1194       if (garray[B->j[j]] > cstart) break;
1195       for (l=0; l<bs; l++) {
1196         for (ll=0; ll<bs; ll++) {
1197           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1198         }
1199       }
1200       cnt += bs;
1201     }
1202     for (k=A->i[i]; k<A->i[i+1]; k++) {
1203       for (l=0; l<bs; l++) {
1204         for (ll=0; ll<bs; ll++) {
1205           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1206         }
1207       }
1208       cnt += bs;
1209     }
1210     for (; j<B->i[i+1]; j++) {
1211       for (l=0; l<bs; l++) {
1212         for (ll=0; ll<bs; ll++) {
1213           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1214         }
1215       }
1216       cnt += bs;
1217     }
1218     cnt += (bs-1)*rlen;
1219   }
1220   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1221 
1222   /* store the column values to the file */
1223   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1224   if (!rank) {
1225     MPI_Status status;
1226     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1227     for (i=1; i<size; i++) {
1228       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1229       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1230       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1231       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1232     }
1233     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1234   } else {
1235     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1236     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1237     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1238     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1239   }
1240   ierr = PetscFree(column_values);CHKERRQ(ierr);
1241 
1242   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1243   if (file) {
1244     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1245   }
1246   PetscFunctionReturn(0);
1247 }
1248 
1249 #undef __FUNCT__
1250 #define __FUNCT__ "MatView_MPIBAIJ"
1251 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1252 {
1253   PetscErrorCode ierr;
1254   PetscBool      iascii,isdraw,issocket,isbinary;
1255 
1256   PetscFunctionBegin;
1257   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1258   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1259   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1260   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1261   if (iascii || isdraw || issocket) {
1262     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1263   } else if (isbinary) {
1264     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1265   }
1266   PetscFunctionReturn(0);
1267 }
1268 
1269 #undef __FUNCT__
1270 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1271 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1272 {
1273   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1274   PetscErrorCode ierr;
1275 
1276   PetscFunctionBegin;
1277 #if defined(PETSC_USE_LOG)
1278   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1279 #endif
1280   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1281   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1282   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1283   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1284 #if defined(PETSC_USE_CTABLE)
1285   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1286 #else
1287   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1288 #endif
1289   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1290   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1291   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1292   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1293   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1294   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1295   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1296   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1297 
1298   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1304   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1305   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr);
1306   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1307   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr);
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 #undef __FUNCT__
1312 #define __FUNCT__ "MatMult_MPIBAIJ"
1313 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1314 {
1315   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1316   PetscErrorCode ierr;
1317   PetscInt       nt;
1318 
1319   PetscFunctionBegin;
1320   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1321   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1322   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1323   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1324   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1325   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1326   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1327   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1328   PetscFunctionReturn(0);
1329 }
1330 
1331 #undef __FUNCT__
1332 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1333 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1334 {
1335   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1336   PetscErrorCode ierr;
1337 
1338   PetscFunctionBegin;
1339   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1340   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1341   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1342   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1343   PetscFunctionReturn(0);
1344 }
1345 
1346 #undef __FUNCT__
1347 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1348 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1349 {
1350   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1351   PetscErrorCode ierr;
1352   PetscBool      merged;
1353 
1354   PetscFunctionBegin;
1355   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1356   /* do nondiagonal part */
1357   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1358   if (!merged) {
1359     /* send it on its way */
1360     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1361     /* do local part */
1362     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1363     /* receive remote parts: note this assumes the values are not actually */
1364     /* inserted in yy until the next line */
1365     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1366   } else {
1367     /* do local part */
1368     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1369     /* send it on its way */
1370     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1371     /* values actually were received in the Begin() but we need to call this nop */
1372     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1373   }
1374   PetscFunctionReturn(0);
1375 }
1376 
1377 #undef __FUNCT__
1378 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1379 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1380 {
1381   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1382   PetscErrorCode ierr;
1383 
1384   PetscFunctionBegin;
1385   /* do nondiagonal part */
1386   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1387   /* send it on its way */
1388   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1389   /* do local part */
1390   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1391   /* receive remote parts: note this assumes the values are not actually */
1392   /* inserted in yy until the next line, which is true for my implementation*/
1393   /* but is not perhaps always true. */
1394   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1395   PetscFunctionReturn(0);
1396 }
1397 
1398 /*
1399   This only works correctly for square matrices where the subblock A->A is the
1400    diagonal block
1401 */
1402 #undef __FUNCT__
1403 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1404 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1405 {
1406   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1407   PetscErrorCode ierr;
1408 
1409   PetscFunctionBegin;
1410   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1411   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1412   PetscFunctionReturn(0);
1413 }
1414 
1415 #undef __FUNCT__
1416 #define __FUNCT__ "MatScale_MPIBAIJ"
1417 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1418 {
1419   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1420   PetscErrorCode ierr;
1421 
1422   PetscFunctionBegin;
1423   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1424   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 #undef __FUNCT__
1429 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1430 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1431 {
1432   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1433   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1434   PetscErrorCode ierr;
1435   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1436   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1437   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1438 
1439   PetscFunctionBegin;
1440   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1441   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1442   mat->getrowactive = PETSC_TRUE;
1443 
1444   if (!mat->rowvalues && (idx || v)) {
1445     /*
1446         allocate enough space to hold information from the longest row.
1447     */
1448     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1449     PetscInt    max = 1,mbs = mat->mbs,tmp;
1450     for (i=0; i<mbs; i++) {
1451       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1452       if (max < tmp) max = tmp;
1453     }
1454     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1455   }
1456   lrow = row - brstart;
1457 
1458   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1459   if (!v)   {pvA = 0; pvB = 0;}
1460   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1461   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1462   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1463   nztot = nzA + nzB;
1464 
1465   cmap = mat->garray;
1466   if (v  || idx) {
1467     if (nztot) {
1468       /* Sort by increasing column numbers, assuming A and B already sorted */
1469       PetscInt imark = -1;
1470       if (v) {
1471         *v = v_p = mat->rowvalues;
1472         for (i=0; i<nzB; i++) {
1473           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1474           else break;
1475         }
1476         imark = i;
1477         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1478         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1479       }
1480       if (idx) {
1481         *idx = idx_p = mat->rowindices;
1482         if (imark > -1) {
1483           for (i=0; i<imark; i++) {
1484             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1485           }
1486         } else {
1487           for (i=0; i<nzB; i++) {
1488             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1489             else break;
1490           }
1491           imark = i;
1492         }
1493         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1494         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1495       }
1496     } else {
1497       if (idx) *idx = 0;
1498       if (v)   *v   = 0;
1499     }
1500   }
1501   *nz  = nztot;
1502   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1503   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1504   PetscFunctionReturn(0);
1505 }
1506 
1507 #undef __FUNCT__
1508 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1509 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1510 {
1511   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1512 
1513   PetscFunctionBegin;
1514   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1515   baij->getrowactive = PETSC_FALSE;
1516   PetscFunctionReturn(0);
1517 }
1518 
1519 #undef __FUNCT__
1520 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1521 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1522 {
1523   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1524   PetscErrorCode ierr;
1525 
1526   PetscFunctionBegin;
1527   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1528   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1529   PetscFunctionReturn(0);
1530 }
1531 
1532 #undef __FUNCT__
1533 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1534 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1535 {
1536   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1537   Mat            A  = a->A,B = a->B;
1538   PetscErrorCode ierr;
1539   PetscReal      isend[5],irecv[5];
1540 
1541   PetscFunctionBegin;
1542   info->block_size = (PetscReal)matin->rmap->bs;
1543 
1544   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1545 
1546   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1547   isend[3] = info->memory;  isend[4] = info->mallocs;
1548 
1549   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1550 
1551   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1552   isend[3] += info->memory;  isend[4] += info->mallocs;
1553 
1554   if (flag == MAT_LOCAL) {
1555     info->nz_used      = isend[0];
1556     info->nz_allocated = isend[1];
1557     info->nz_unneeded  = isend[2];
1558     info->memory       = isend[3];
1559     info->mallocs      = isend[4];
1560   } else if (flag == MAT_GLOBAL_MAX) {
1561     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1562 
1563     info->nz_used      = irecv[0];
1564     info->nz_allocated = irecv[1];
1565     info->nz_unneeded  = irecv[2];
1566     info->memory       = irecv[3];
1567     info->mallocs      = irecv[4];
1568   } else if (flag == MAT_GLOBAL_SUM) {
1569     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1570 
1571     info->nz_used      = irecv[0];
1572     info->nz_allocated = irecv[1];
1573     info->nz_unneeded  = irecv[2];
1574     info->memory       = irecv[3];
1575     info->mallocs      = irecv[4];
1576   } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1577   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1578   info->fill_ratio_needed = 0;
1579   info->factor_mallocs    = 0;
1580   PetscFunctionReturn(0);
1581 }
1582 
1583 #undef __FUNCT__
1584 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1585 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1586 {
1587   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1588   PetscErrorCode ierr;
1589 
1590   PetscFunctionBegin;
1591   switch (op) {
1592   case MAT_NEW_NONZERO_LOCATIONS:
1593   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1594   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1595   case MAT_KEEP_NONZERO_PATTERN:
1596   case MAT_NEW_NONZERO_LOCATION_ERR:
1597     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1598     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1599     break;
1600   case MAT_ROW_ORIENTED:
1601     a->roworiented = flg;
1602 
1603     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1604     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1605     break;
1606   case MAT_NEW_DIAGONALS:
1607     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1608     break;
1609   case MAT_IGNORE_OFF_PROC_ENTRIES:
1610     a->donotstash = flg;
1611     break;
1612   case MAT_USE_HASH_TABLE:
1613     a->ht_flag = flg;
1614     break;
1615   case MAT_SYMMETRIC:
1616   case MAT_STRUCTURALLY_SYMMETRIC:
1617   case MAT_HERMITIAN:
1618   case MAT_SYMMETRY_ETERNAL:
1619     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1620     break;
1621   default:
1622     SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1623   }
1624   PetscFunctionReturn(0);
1625 }
1626 
1627 #undef __FUNCT__
1628 #define __FUNCT__ "MatTranspose_MPIBAIJ"
1629 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1630 {
1631   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1632   Mat_SeqBAIJ    *Aloc;
1633   Mat            B;
1634   PetscErrorCode ierr;
1635   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1636   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1637   MatScalar      *a;
1638 
1639   PetscFunctionBegin;
1640   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1641   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1642     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1643     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1644     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1645     /* Do not know preallocation information, but must set block size */
1646     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr);
1647   } else {
1648     B = *matout;
1649   }
1650 
1651   /* copy over the A part */
1652   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1653   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1654   ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1655 
1656   for (i=0; i<mbs; i++) {
1657     rvals[0] = bs*(baij->rstartbs + i);
1658     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1659     for (j=ai[i]; j<ai[i+1]; j++) {
1660       col = (baij->cstartbs+aj[j])*bs;
1661       for (k=0; k<bs; k++) {
1662         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1663 
1664         col++; a += bs;
1665       }
1666     }
1667   }
1668   /* copy over the B part */
1669   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1670   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1671   for (i=0; i<mbs; i++) {
1672     rvals[0] = bs*(baij->rstartbs + i);
1673     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1674     for (j=ai[i]; j<ai[i+1]; j++) {
1675       col = baij->garray[aj[j]]*bs;
1676       for (k=0; k<bs; k++) {
1677         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1678         col++;
1679         a += bs;
1680       }
1681     }
1682   }
1683   ierr = PetscFree(rvals);CHKERRQ(ierr);
1684   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1685   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1686 
1687   if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B;
1688   else {
1689     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1690   }
1691   PetscFunctionReturn(0);
1692 }
1693 
1694 #undef __FUNCT__
1695 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1696 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1697 {
1698   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1699   Mat            a     = baij->A,b = baij->B;
1700   PetscErrorCode ierr;
1701   PetscInt       s1,s2,s3;
1702 
1703   PetscFunctionBegin;
1704   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1705   if (rr) {
1706     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1707     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1708     /* Overlap communication with computation. */
1709     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1710   }
1711   if (ll) {
1712     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1713     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1714     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1715   }
1716   /* scale  the diagonal block */
1717   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1718 
1719   if (rr) {
1720     /* Do a scatter end and then right scale the off-diagonal block */
1721     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1722     ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1723   }
1724   PetscFunctionReturn(0);
1725 }
1726 
1727 #undef __FUNCT__
1728 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1729 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1730 {
1731   Mat_MPIBAIJ   *l      = (Mat_MPIBAIJ *) A->data;
1732   PetscInt      *owners = A->rmap->range;
1733   PetscInt       n      = A->rmap->n;
1734   PetscMPIInt    size   = l->size;
1735   PetscSF        sf;
1736   PetscInt      *lrows;
1737   PetscSFNode   *rrows;
1738   PetscInt       lastidx = -1, r, p = 0, len = 0;
1739   PetscErrorCode ierr;
1740 
1741   PetscFunctionBegin;
1742   /* Create SF where leaves are input rows and roots are owned rows */
1743   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1744   for (r = 0; r < n; ++r) lrows[r] = -1;
1745   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1746   for (r = 0; r < N; ++r) {
1747     const PetscInt idx   = rows[r];
1748     PetscBool      found = PETSC_FALSE;
1749     /* Trick for efficient searching for sorted rows */
1750     if (lastidx > idx) p = 0;
1751     lastidx = idx;
1752     for (; p < size; ++p) {
1753       if (idx >= owners[p] && idx < owners[p+1]) {
1754         rrows[r].rank  = p;
1755         rrows[r].index = rows[r] - owners[p];
1756         found = PETSC_TRUE;
1757         break;
1758       }
1759     }
1760     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
1761   }
1762   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1763   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1764   /* Collect flags for rows to be zeroed */
1765   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1766   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1767   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1768   /* Compress and put in row numbers */
1769   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1770   /* fix right hand side if needed */
1771   if (x && b) {
1772     const PetscScalar *xx;
1773     PetscScalar       *bb;
1774 
1775     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1776     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1777     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
1778     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1779     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1780   }
1781 
1782   /* actually zap the local rows */
1783   /*
1784         Zero the required rows. If the "diagonal block" of the matrix
1785      is square and the user wishes to set the diagonal we use separate
1786      code so that MatSetValues() is not called for each diagonal allocating
1787      new memory, thus calling lots of mallocs and slowing things down.
1788 
1789   */
1790   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1791   ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,0,0);CHKERRQ(ierr);
1792   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1793     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,0,0);CHKERRQ(ierr);
1794   } else if (diag != 0.0) {
1795     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr);
1796     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1797        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1798     for (r = 0; r < len; ++r) {
1799       const PetscInt row = lrows[r] + A->rmap->rstart;
1800       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1801     }
1802     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1803     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1804   } else {
1805     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr);
1806   }
1807   ierr = PetscFree(lrows);CHKERRQ(ierr);
1808 
1809   /* only change matrix nonzero state if pattern was allowed to be changed */
1810   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1811     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1812     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1813   }
1814   PetscFunctionReturn(0);
1815 }
1816 
1817 #undef __FUNCT__
1818 #define __FUNCT__ "MatZeroRowsColumns_MPIBAIJ"
1819 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1820 {
1821   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1822   PetscErrorCode    ierr;
1823   PetscMPIInt       size = l->size,n = A->rmap->n,lastidx = -1;
1824   PetscInt          i,j,k,r,p = 0,len = 0,row,col,count;
1825   PetscInt          *lrows,*owners = A->rmap->range;
1826   PetscSFNode       *rrows;
1827   PetscSF           sf;
1828   const PetscScalar *xx;
1829   PetscScalar       *bb,*mask;
1830   Vec               xmask,lmask;
1831   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ*)l->B->data;
1832   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1833   PetscScalar       *aa;
1834 #if defined(PETSC_DEBUG)
1835   PetscBool found = PETSC_FALSE;
1836 #endif
1837 
1838   PetscFunctionBegin;
1839   /* Create SF where leaves are input rows and roots are owned rows */
1840   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1841   for (r = 0; r < n; ++r) lrows[r] = -1;
1842   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1843   for (r = 0; r < N; ++r) {
1844     const PetscInt idx   = rows[r];
1845     PetscBool      found = PETSC_FALSE;
1846     /* Trick for efficient searching for sorted rows */
1847     if (lastidx > idx) p = 0;
1848     lastidx = idx;
1849     for (; p < size; ++p) {
1850       if (idx >= owners[p] && idx < owners[p+1]) {
1851         rrows[r].rank  = p;
1852         rrows[r].index = rows[r] - owners[p];
1853         found = PETSC_TRUE;
1854         break;
1855       }
1856     }
1857     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
1858   }
1859   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1860   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1861   /* Collect flags for rows to be zeroed */
1862   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1863   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1864   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1865   /* Compress and put in row numbers */
1866   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1867   /* zero diagonal part of matrix */
1868   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1869   /* handle off diagonal part of matrix */
1870   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
1871   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1872   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1873   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1874   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1875   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1876   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1877   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1878   if (x) {
1879     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1880     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1881     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1882     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1883   }
1884   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1885   /* remove zeroed rows of off diagonal matrix */
1886   for (i = 0; i < len; ++i) {
1887     row   = lrows[i];
1888     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1889     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1890     for (k = 0; k < count; ++k) {
1891       aa[0] = 0.0;
1892       aa   += bs;
1893     }
1894   }
1895   /* loop over all elements of off process part of matrix zeroing removed columns*/
1896   for (i = 0; i < l->B->rmap->N; ++i) {
1897     row = i/bs;
1898     for (j = baij->i[row]; j < baij->i[row+1]; ++j) {
1899       for (k = 0; k < bs; ++k) {
1900         col = bs*baij->j[j] + k;
1901         if (PetscAbsScalar(mask[col])) {
1902           aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
1903           if (b) bb[i] -= aa[0]*xx[col];
1904           aa[0] = 0.0;
1905         }
1906       }
1907     }
1908   }
1909   if (x) {
1910     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1911     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1912   }
1913   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1914   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1915   ierr = PetscFree(lrows);CHKERRQ(ierr);
1916 
1917   /* only change matrix nonzero state if pattern was allowed to be changed */
1918   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1919     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1920     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1921   }
1922   PetscFunctionReturn(0);
1923 }
1924 
1925 #undef __FUNCT__
1926 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1927 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1928 {
1929   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1930   PetscErrorCode ierr;
1931 
1932   PetscFunctionBegin;
1933   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1934   PetscFunctionReturn(0);
1935 }
1936 
1937 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
1938 
1939 #undef __FUNCT__
1940 #define __FUNCT__ "MatEqual_MPIBAIJ"
1941 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1942 {
1943   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1944   Mat            a,b,c,d;
1945   PetscBool      flg;
1946   PetscErrorCode ierr;
1947 
1948   PetscFunctionBegin;
1949   a = matA->A; b = matA->B;
1950   c = matB->A; d = matB->B;
1951 
1952   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1953   if (flg) {
1954     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1955   }
1956   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1957   PetscFunctionReturn(0);
1958 }
1959 
1960 #undef __FUNCT__
1961 #define __FUNCT__ "MatCopy_MPIBAIJ"
1962 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1963 {
1964   PetscErrorCode ierr;
1965   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1966   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
1967 
1968   PetscFunctionBegin;
1969   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1970   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1971     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1972   } else {
1973     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1974     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1975   }
1976   PetscFunctionReturn(0);
1977 }
1978 
1979 #undef __FUNCT__
1980 #define __FUNCT__ "MatSetUp_MPIBAIJ"
1981 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1982 {
1983   PetscErrorCode ierr;
1984 
1985   PetscFunctionBegin;
1986   ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1987   PetscFunctionReturn(0);
1988 }
1989 
1990 #undef __FUNCT__
1991 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1992 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1993 {
1994   PetscErrorCode ierr;
1995   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
1996   PetscBLASInt   bnz,one=1;
1997   Mat_SeqBAIJ    *x,*y;
1998 
1999   PetscFunctionBegin;
2000   if (str == SAME_NONZERO_PATTERN) {
2001     PetscScalar alpha = a;
2002     x    = (Mat_SeqBAIJ*)xx->A->data;
2003     y    = (Mat_SeqBAIJ*)yy->A->data;
2004     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2005     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2006     x    = (Mat_SeqBAIJ*)xx->B->data;
2007     y    = (Mat_SeqBAIJ*)yy->B->data;
2008     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2009     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2010   } else {
2011     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2012   }
2013   PetscFunctionReturn(0);
2014 }
2015 
2016 #undef __FUNCT__
2017 #define __FUNCT__ "MatRealPart_MPIBAIJ"
2018 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
2019 {
2020   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2021   PetscErrorCode ierr;
2022 
2023   PetscFunctionBegin;
2024   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2025   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2026   PetscFunctionReturn(0);
2027 }
2028 
2029 #undef __FUNCT__
2030 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
2031 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2032 {
2033   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2034   PetscErrorCode ierr;
2035 
2036   PetscFunctionBegin;
2037   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2038   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2039   PetscFunctionReturn(0);
2040 }
2041 
2042 #undef __FUNCT__
2043 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
2044 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2045 {
2046   PetscErrorCode ierr;
2047   IS             iscol_local;
2048   PetscInt       csize;
2049 
2050   PetscFunctionBegin;
2051   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2052   if (call == MAT_REUSE_MATRIX) {
2053     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2054     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2055   } else {
2056     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2057   }
2058   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2059   if (call == MAT_INITIAL_MATRIX) {
2060     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2061     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2062   }
2063   PetscFunctionReturn(0);
2064 }
2065 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*);
2066 #undef __FUNCT__
2067 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
2068 /*
2069   Not great since it makes two copies of the submatrix, first an SeqBAIJ
2070   in local and then by concatenating the local matrices the end result.
2071   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
2072 */
2073 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2074 {
2075   PetscErrorCode ierr;
2076   PetscMPIInt    rank,size;
2077   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2078   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow;
2079   Mat            M,Mreuse;
2080   MatScalar      *vwork,*aa;
2081   MPI_Comm       comm;
2082   IS             isrow_new, iscol_new;
2083   PetscBool      idflag,allrows, allcols;
2084   Mat_SeqBAIJ    *aij;
2085 
2086   PetscFunctionBegin;
2087   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2088   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2089   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2090   /* The compression and expansion should be avoided. Doesn't point
2091      out errors, might change the indices, hence buggey */
2092   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
2093   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
2094 
2095   /* Check for special case: each processor gets entire matrix columns */
2096   ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr);
2097   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
2098   if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE;
2099   else allcols = PETSC_FALSE;
2100 
2101   ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr);
2102   ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr);
2103   if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE;
2104   else allrows = PETSC_FALSE;
2105 
2106   if (call ==  MAT_REUSE_MATRIX) {
2107     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
2108     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2109     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2110   } else {
2111     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2112   }
2113   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
2114   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
2115   /*
2116       m - number of local rows
2117       n - number of columns (same on all processors)
2118       rstart - first row in new global matrix generated
2119   */
2120   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2121   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2122   m    = m/bs;
2123   n    = n/bs;
2124 
2125   if (call == MAT_INITIAL_MATRIX) {
2126     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2127     ii  = aij->i;
2128     jj  = aij->j;
2129 
2130     /*
2131         Determine the number of non-zeros in the diagonal and off-diagonal
2132         portions of the matrix in order to do correct preallocation
2133     */
2134 
2135     /* first get start and end of "diagonal" columns */
2136     if (csize == PETSC_DECIDE) {
2137       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2138       if (mglobal == n*bs) { /* square matrix */
2139         nlocal = m;
2140       } else {
2141         nlocal = n/size + ((n % size) > rank);
2142       }
2143     } else {
2144       nlocal = csize/bs;
2145     }
2146     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2147     rstart = rend - nlocal;
2148     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2149 
2150     /* next, compute all the lengths */
2151     ierr  = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr);
2152     for (i=0; i<m; i++) {
2153       jend = ii[i+1] - ii[i];
2154       olen = 0;
2155       dlen = 0;
2156       for (j=0; j<jend; j++) {
2157         if (*jj < rstart || *jj >= rend) olen++;
2158         else dlen++;
2159         jj++;
2160       }
2161       olens[i] = olen;
2162       dlens[i] = dlen;
2163     }
2164     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2165     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2166     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2167     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2168     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
2169   } else {
2170     PetscInt ml,nl;
2171 
2172     M    = *newmat;
2173     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2174     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2175     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2176     /*
2177          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2178        rather than the slower MatSetValues().
2179     */
2180     M->was_assembled = PETSC_TRUE;
2181     M->assembled     = PETSC_FALSE;
2182   }
2183   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2184   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2185   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
2186   ii   = aij->i;
2187   jj   = aij->j;
2188   aa   = aij->a;
2189   for (i=0; i<m; i++) {
2190     row   = rstart/bs + i;
2191     nz    = ii[i+1] - ii[i];
2192     cwork = jj;     jj += nz;
2193     vwork = aa;     aa += nz*bs*bs;
2194     ierr  = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2195   }
2196 
2197   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2198   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2199   *newmat = M;
2200 
2201   /* save submatrix used in processor for next request */
2202   if (call ==  MAT_INITIAL_MATRIX) {
2203     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2204     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2205   }
2206   PetscFunctionReturn(0);
2207 }
2208 
2209 #undef __FUNCT__
2210 #define __FUNCT__ "MatPermute_MPIBAIJ"
2211 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2212 {
2213   MPI_Comm       comm,pcomm;
2214   PetscInt       clocal_size,nrows;
2215   const PetscInt *rows;
2216   PetscMPIInt    size;
2217   IS             crowp,lcolp;
2218   PetscErrorCode ierr;
2219 
2220   PetscFunctionBegin;
2221   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2222   /* make a collective version of 'rowp' */
2223   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2224   if (pcomm==comm) {
2225     crowp = rowp;
2226   } else {
2227     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2228     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2229     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2230     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2231   }
2232   ierr = ISSetPermutation(crowp);CHKERRQ(ierr);
2233   /* make a local version of 'colp' */
2234   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2235   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2236   if (size==1) {
2237     lcolp = colp;
2238   } else {
2239     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2240   }
2241   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2242   /* now we just get the submatrix */
2243   ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr);
2244   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2245   /* clean up */
2246   if (pcomm!=comm) {
2247     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2248   }
2249   if (size>1) {
2250     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2251   }
2252   PetscFunctionReturn(0);
2253 }
2254 
2255 #undef __FUNCT__
2256 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2257 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2258 {
2259   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2260   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
2261 
2262   PetscFunctionBegin;
2263   if (nghosts) *nghosts = B->nbs;
2264   if (ghosts) *ghosts = baij->garray;
2265   PetscFunctionReturn(0);
2266 }
2267 
2268 #undef __FUNCT__
2269 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ"
2270 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2271 {
2272   Mat            B;
2273   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2274   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2275   Mat_SeqAIJ     *b;
2276   PetscErrorCode ierr;
2277   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2278   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2279   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2280 
2281   PetscFunctionBegin;
2282   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2283   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2284 
2285   /* ----------------------------------------------------------------
2286      Tell every processor the number of nonzeros per row
2287   */
2288   ierr = PetscMalloc1((A->rmap->N/bs),&lens);CHKERRQ(ierr);
2289   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2290     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2291   }
2292   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2293   ierr      = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr);
2294   displs    = recvcounts + size;
2295   for (i=0; i<size; i++) {
2296     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2297     displs[i]     = A->rmap->range[i]/bs;
2298   }
2299 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2300   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2301 #else
2302   ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2303 #endif
2304   /* ---------------------------------------------------------------
2305      Create the sequential matrix of the same type as the local block diagonal
2306   */
2307   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2308   ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2309   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2310   ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2311   b    = (Mat_SeqAIJ*)B->data;
2312 
2313   /*--------------------------------------------------------------------
2314     Copy my part of matrix column indices over
2315   */
2316   sendcount  = ad->nz + bd->nz;
2317   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2318   a_jsendbuf = ad->j;
2319   b_jsendbuf = bd->j;
2320   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2321   cnt        = 0;
2322   for (i=0; i<n; i++) {
2323 
2324     /* put in lower diagonal portion */
2325     m = bd->i[i+1] - bd->i[i];
2326     while (m > 0) {
2327       /* is it above diagonal (in bd (compressed) numbering) */
2328       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2329       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2330       m--;
2331     }
2332 
2333     /* put in diagonal portion */
2334     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2335       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2336     }
2337 
2338     /* put in upper diagonal portion */
2339     while (m-- > 0) {
2340       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2341     }
2342   }
2343   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2344 
2345   /*--------------------------------------------------------------------
2346     Gather all column indices to all processors
2347   */
2348   for (i=0; i<size; i++) {
2349     recvcounts[i] = 0;
2350     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2351       recvcounts[i] += lens[j];
2352     }
2353   }
2354   displs[0] = 0;
2355   for (i=1; i<size; i++) {
2356     displs[i] = displs[i-1] + recvcounts[i-1];
2357   }
2358 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2359   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2360 #else
2361   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2362 #endif
2363   /*--------------------------------------------------------------------
2364     Assemble the matrix into useable form (note numerical values not yet set)
2365   */
2366   /* set the b->ilen (length of each row) values */
2367   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2368   /* set the b->i indices */
2369   b->i[0] = 0;
2370   for (i=1; i<=A->rmap->N/bs; i++) {
2371     b->i[i] = b->i[i-1] + lens[i-1];
2372   }
2373   ierr = PetscFree(lens);CHKERRQ(ierr);
2374   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2375   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2376   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2377 
2378   if (A->symmetric) {
2379     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2380   } else if (A->hermitian) {
2381     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2382   } else if (A->structurally_symmetric) {
2383     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2384   }
2385   *newmat = B;
2386   PetscFunctionReturn(0);
2387 }
2388 
2389 #undef __FUNCT__
2390 #define __FUNCT__ "MatSOR_MPIBAIJ"
2391 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2392 {
2393   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2394   PetscErrorCode ierr;
2395   Vec            bb1 = 0;
2396 
2397   PetscFunctionBegin;
2398   if (flag == SOR_APPLY_UPPER) {
2399     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2400     PetscFunctionReturn(0);
2401   }
2402 
2403   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2404     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2405   }
2406 
2407   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2408     if (flag & SOR_ZERO_INITIAL_GUESS) {
2409       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2410       its--;
2411     }
2412 
2413     while (its--) {
2414       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2415       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2416 
2417       /* update rhs: bb1 = bb - B*x */
2418       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2419       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2420 
2421       /* local sweep */
2422       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2423     }
2424   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2425     if (flag & SOR_ZERO_INITIAL_GUESS) {
2426       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2427       its--;
2428     }
2429     while (its--) {
2430       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2431       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2432 
2433       /* update rhs: bb1 = bb - B*x */
2434       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2435       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2436 
2437       /* local sweep */
2438       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2439     }
2440   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2441     if (flag & SOR_ZERO_INITIAL_GUESS) {
2442       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2443       its--;
2444     }
2445     while (its--) {
2446       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2447       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2448 
2449       /* update rhs: bb1 = bb - B*x */
2450       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2451       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2452 
2453       /* local sweep */
2454       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2455     }
2456   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2457 
2458   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2459   PetscFunctionReturn(0);
2460 }
2461 
2462 #undef __FUNCT__
2463 #define __FUNCT__ "MatGetColumnNorms_MPIBAIJ"
2464 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms)
2465 {
2466   PetscErrorCode ierr;
2467   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)A->data;
2468   PetscInt       N,i,*garray = aij->garray;
2469   PetscInt       ib,jb,bs = A->rmap->bs;
2470   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) aij->A->data;
2471   MatScalar      *a_val = a_aij->a;
2472   Mat_SeqBAIJ    *b_aij = (Mat_SeqBAIJ*) aij->B->data;
2473   MatScalar      *b_val = b_aij->a;
2474   PetscReal      *work;
2475 
2476   PetscFunctionBegin;
2477   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
2478   ierr = PetscCalloc1(N,&work);CHKERRQ(ierr);
2479   if (type == NORM_2) {
2480     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2481       for (jb=0; jb<bs; jb++) {
2482         for (ib=0; ib<bs; ib++) {
2483           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2484           a_val++;
2485         }
2486       }
2487     }
2488     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2489       for (jb=0; jb<bs; jb++) {
2490         for (ib=0; ib<bs; ib++) {
2491           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2492           b_val++;
2493         }
2494       }
2495     }
2496   } else if (type == NORM_1) {
2497     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2498       for (jb=0; jb<bs; jb++) {
2499         for (ib=0; ib<bs; ib++) {
2500           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2501           a_val++;
2502         }
2503       }
2504     }
2505     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2506       for (jb=0; jb<bs; jb++) {
2507        for (ib=0; ib<bs; ib++) {
2508           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2509           b_val++;
2510         }
2511       }
2512     }
2513   } else if (type == NORM_INFINITY) {
2514     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2515       for (jb=0; jb<bs; jb++) {
2516         for (ib=0; ib<bs; ib++) {
2517           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2518           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2519           a_val++;
2520         }
2521       }
2522     }
2523     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2524       for (jb=0; jb<bs; jb++) {
2525         for (ib=0; ib<bs; ib++) {
2526           int col = garray[b_aij->j[i]] * bs + jb;
2527           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2528           b_val++;
2529         }
2530       }
2531     }
2532   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
2533   if (type == NORM_INFINITY) {
2534     ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2535   } else {
2536     ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2537   }
2538   ierr = PetscFree(work);CHKERRQ(ierr);
2539   if (type == NORM_2) {
2540     for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]);
2541   }
2542   PetscFunctionReturn(0);
2543 }
2544 
2545 #undef __FUNCT__
2546 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ"
2547 PetscErrorCode  MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2548 {
2549   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2550   PetscErrorCode ierr;
2551 
2552   PetscFunctionBegin;
2553   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2554   PetscFunctionReturn(0);
2555 }
2556 
2557 
2558 /* -------------------------------------------------------------------*/
2559 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2560                                        MatGetRow_MPIBAIJ,
2561                                        MatRestoreRow_MPIBAIJ,
2562                                        MatMult_MPIBAIJ,
2563                                 /* 4*/ MatMultAdd_MPIBAIJ,
2564                                        MatMultTranspose_MPIBAIJ,
2565                                        MatMultTransposeAdd_MPIBAIJ,
2566                                        0,
2567                                        0,
2568                                        0,
2569                                 /*10*/ 0,
2570                                        0,
2571                                        0,
2572                                        MatSOR_MPIBAIJ,
2573                                        MatTranspose_MPIBAIJ,
2574                                 /*15*/ MatGetInfo_MPIBAIJ,
2575                                        MatEqual_MPIBAIJ,
2576                                        MatGetDiagonal_MPIBAIJ,
2577                                        MatDiagonalScale_MPIBAIJ,
2578                                        MatNorm_MPIBAIJ,
2579                                 /*20*/ MatAssemblyBegin_MPIBAIJ,
2580                                        MatAssemblyEnd_MPIBAIJ,
2581                                        MatSetOption_MPIBAIJ,
2582                                        MatZeroEntries_MPIBAIJ,
2583                                 /*24*/ MatZeroRows_MPIBAIJ,
2584                                        0,
2585                                        0,
2586                                        0,
2587                                        0,
2588                                 /*29*/ MatSetUp_MPIBAIJ,
2589                                        0,
2590                                        0,
2591                                        0,
2592                                        0,
2593                                 /*34*/ MatDuplicate_MPIBAIJ,
2594                                        0,
2595                                        0,
2596                                        0,
2597                                        0,
2598                                 /*39*/ MatAXPY_MPIBAIJ,
2599                                        MatGetSubMatrices_MPIBAIJ,
2600                                        MatIncreaseOverlap_MPIBAIJ,
2601                                        MatGetValues_MPIBAIJ,
2602                                        MatCopy_MPIBAIJ,
2603                                 /*44*/ 0,
2604                                        MatScale_MPIBAIJ,
2605                                        0,
2606                                        0,
2607                                        MatZeroRowsColumns_MPIBAIJ,
2608                                 /*49*/ 0,
2609                                        0,
2610                                        0,
2611                                        0,
2612                                        0,
2613                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2614                                        0,
2615                                        MatSetUnfactored_MPIBAIJ,
2616                                        MatPermute_MPIBAIJ,
2617                                        MatSetValuesBlocked_MPIBAIJ,
2618                                 /*59*/ MatGetSubMatrix_MPIBAIJ,
2619                                        MatDestroy_MPIBAIJ,
2620                                        MatView_MPIBAIJ,
2621                                        0,
2622                                        0,
2623                                 /*64*/ 0,
2624                                        0,
2625                                        0,
2626                                        0,
2627                                        0,
2628                                 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2629                                        0,
2630                                        0,
2631                                        0,
2632                                        0,
2633                                 /*74*/ 0,
2634                                        MatFDColoringApply_BAIJ,
2635                                        0,
2636                                        0,
2637                                        0,
2638                                 /*79*/ 0,
2639                                        0,
2640                                        0,
2641                                        0,
2642                                        MatLoad_MPIBAIJ,
2643                                 /*84*/ 0,
2644                                        0,
2645                                        0,
2646                                        0,
2647                                        0,
2648                                 /*89*/ 0,
2649                                        0,
2650                                        0,
2651                                        0,
2652                                        0,
2653                                 /*94*/ 0,
2654                                        0,
2655                                        0,
2656                                        0,
2657                                        0,
2658                                 /*99*/ 0,
2659                                        0,
2660                                        0,
2661                                        0,
2662                                        0,
2663                                 /*104*/0,
2664                                        MatRealPart_MPIBAIJ,
2665                                        MatImaginaryPart_MPIBAIJ,
2666                                        0,
2667                                        0,
2668                                 /*109*/0,
2669                                        0,
2670                                        0,
2671                                        0,
2672                                        0,
2673                                 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2674                                        0,
2675                                        MatGetGhosts_MPIBAIJ,
2676                                        0,
2677                                        0,
2678                                 /*119*/0,
2679                                        0,
2680                                        0,
2681                                        0,
2682                                        MatGetMultiProcBlock_MPIBAIJ,
2683                                 /*124*/0,
2684                                        MatGetColumnNorms_MPIBAIJ,
2685                                        MatInvertBlockDiagonal_MPIBAIJ,
2686                                        0,
2687                                        0,
2688                                /*129*/ 0,
2689                                        0,
2690                                        0,
2691                                        0,
2692                                        0,
2693                                /*134*/ 0,
2694                                        0,
2695                                        0,
2696                                        0,
2697                                        0,
2698                                /*139*/ 0,
2699                                        0,
2700                                        0,
2701                                        MatFDColoringSetUp_MPIXAIJ
2702 };
2703 
2704 #undef __FUNCT__
2705 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2706 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2707 {
2708   PetscFunctionBegin;
2709   *a = ((Mat_MPIBAIJ*)A->data)->A;
2710   PetscFunctionReturn(0);
2711 }
2712 
2713 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2714 
2715 #undef __FUNCT__
2716 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2717 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2718 {
2719   PetscInt       m,rstart,cstart,cend;
2720   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2721   const PetscInt *JJ    =0;
2722   PetscScalar    *values=0;
2723   PetscBool      roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2724   PetscErrorCode ierr;
2725 
2726   PetscFunctionBegin;
2727   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2728   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2729   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2730   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2731   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2732   m      = B->rmap->n/bs;
2733   rstart = B->rmap->rstart/bs;
2734   cstart = B->cmap->rstart/bs;
2735   cend   = B->cmap->rend/bs;
2736 
2737   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2738   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2739   for (i=0; i<m; i++) {
2740     nz = ii[i+1] - ii[i];
2741     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2742     nz_max = PetscMax(nz_max,nz);
2743     JJ     = jj + ii[i];
2744     for (j=0; j<nz; j++) {
2745       if (*JJ >= cstart) break;
2746       JJ++;
2747     }
2748     d = 0;
2749     for (; j<nz; j++) {
2750       if (*JJ++ >= cend) break;
2751       d++;
2752     }
2753     d_nnz[i] = d;
2754     o_nnz[i] = nz - d;
2755   }
2756   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2757   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2758 
2759   values = (PetscScalar*)V;
2760   if (!values) {
2761     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2762     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2763   }
2764   for (i=0; i<m; i++) {
2765     PetscInt          row    = i + rstart;
2766     PetscInt          ncols  = ii[i+1] - ii[i];
2767     const PetscInt    *icols = jj + ii[i];
2768     if (!roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2769       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2770       ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2771     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2772       PetscInt j;
2773       for (j=0; j<ncols; j++) {
2774         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2775         ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2776       }
2777     }
2778   }
2779 
2780   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2781   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2782   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2783   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2784   PetscFunctionReturn(0);
2785 }
2786 
2787 #undef __FUNCT__
2788 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2789 /*@C
2790    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2791    (the default parallel PETSc format).
2792 
2793    Collective on MPI_Comm
2794 
2795    Input Parameters:
2796 +  A - the matrix
2797 .  bs - the block size
2798 .  i - the indices into j for the start of each local row (starts with zero)
2799 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2800 -  v - optional values in the matrix
2801 
2802    Level: developer
2803 
2804    Notes: The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
2805    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
2806    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2807    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2808    block column and the second index is over columns within a block.
2809 
2810 .keywords: matrix, aij, compressed row, sparse, parallel
2811 
2812 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ
2813 @*/
2814 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2815 {
2816   PetscErrorCode ierr;
2817 
2818   PetscFunctionBegin;
2819   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2820   PetscValidType(B,1);
2821   PetscValidLogicalCollectiveInt(B,bs,2);
2822   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2823   PetscFunctionReturn(0);
2824 }
2825 
2826 #undef __FUNCT__
2827 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2828 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2829 {
2830   Mat_MPIBAIJ    *b;
2831   PetscErrorCode ierr;
2832   PetscInt       i;
2833 
2834   PetscFunctionBegin;
2835   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2836   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2837   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2838   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2839 
2840   if (d_nnz) {
2841     for (i=0; i<B->rmap->n/bs; i++) {
2842       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2843     }
2844   }
2845   if (o_nnz) {
2846     for (i=0; i<B->rmap->n/bs; i++) {
2847       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2848     }
2849   }
2850 
2851   b      = (Mat_MPIBAIJ*)B->data;
2852   b->bs2 = bs*bs;
2853   b->mbs = B->rmap->n/bs;
2854   b->nbs = B->cmap->n/bs;
2855   b->Mbs = B->rmap->N/bs;
2856   b->Nbs = B->cmap->N/bs;
2857 
2858   for (i=0; i<=b->size; i++) {
2859     b->rangebs[i] = B->rmap->range[i]/bs;
2860   }
2861   b->rstartbs = B->rmap->rstart/bs;
2862   b->rendbs   = B->rmap->rend/bs;
2863   b->cstartbs = B->cmap->rstart/bs;
2864   b->cendbs   = B->cmap->rend/bs;
2865 
2866   if (!B->preallocated) {
2867     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2868     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2869     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2870     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2871     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2872     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2873     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2874     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2875     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2876   }
2877 
2878   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2879   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2880   B->preallocated = PETSC_TRUE;
2881   PetscFunctionReturn(0);
2882 }
2883 
2884 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2885 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2886 
2887 #undef __FUNCT__
2888 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
2889 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
2890 {
2891   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2892   PetscErrorCode ierr;
2893   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2894   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2895   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2896 
2897   PetscFunctionBegin;
2898   ierr  = PetscMalloc1((M+1),&ii);CHKERRQ(ierr);
2899   ii[0] = 0;
2900   for (i=0; i<M; i++) {
2901     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2902     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
2903     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2904     /* remove one from count of matrix has diagonal */
2905     for (j=id[i]; j<id[i+1]; j++) {
2906       if (jd[j] == i) {ii[i+1]--;break;}
2907     }
2908   }
2909   ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr);
2910   cnt  = 0;
2911   for (i=0; i<M; i++) {
2912     for (j=io[i]; j<io[i+1]; j++) {
2913       if (garray[jo[j]] > rstart) break;
2914       jj[cnt++] = garray[jo[j]];
2915     }
2916     for (k=id[i]; k<id[i+1]; k++) {
2917       if (jd[k] != i) {
2918         jj[cnt++] = rstart + jd[k];
2919       }
2920     }
2921     for (; j<io[i+1]; j++) {
2922       jj[cnt++] = garray[jo[j]];
2923     }
2924   }
2925   ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr);
2926   PetscFunctionReturn(0);
2927 }
2928 
2929 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2930 
2931 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
2932 
2933 #undef __FUNCT__
2934 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ"
2935 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
2936 {
2937   PetscErrorCode ierr;
2938   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2939   Mat            B;
2940   Mat_MPIAIJ     *b;
2941 
2942   PetscFunctionBegin;
2943   if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
2944 
2945   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2946   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2947   ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2948   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
2949   ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
2950   b    = (Mat_MPIAIJ*) B->data;
2951 
2952   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
2953   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2954   ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
2955   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
2956   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
2957   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2958   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2959   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2960   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2961   if (reuse == MAT_REUSE_MATRIX) {
2962     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
2963   } else {
2964    *newmat = B;
2965   }
2966   PetscFunctionReturn(0);
2967 }
2968 
2969 #if defined(PETSC_HAVE_MUMPS)
2970 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
2971 #endif
2972 
2973 /*MC
2974    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2975 
2976    Options Database Keys:
2977 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2978 . -mat_block_size <bs> - set the blocksize used to store the matrix
2979 - -mat_use_hash_table <fact>
2980 
2981   Level: beginner
2982 
2983 .seealso: MatCreateMPIBAIJ
2984 M*/
2985 
2986 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
2987 
2988 #undef __FUNCT__
2989 #define __FUNCT__ "MatCreate_MPIBAIJ"
2990 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2991 {
2992   Mat_MPIBAIJ    *b;
2993   PetscErrorCode ierr;
2994   PetscBool      flg;
2995 
2996   PetscFunctionBegin;
2997   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2998   B->data = (void*)b;
2999 
3000   ierr         = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3001   B->assembled = PETSC_FALSE;
3002 
3003   B->insertmode = NOT_SET_VALUES;
3004   ierr          = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
3005   ierr          = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
3006 
3007   /* build local table of row and column ownerships */
3008   ierr = PetscMalloc1((b->size+1),&b->rangebs);CHKERRQ(ierr);
3009 
3010   /* build cache for off array entries formed */
3011   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
3012 
3013   b->donotstash  = PETSC_FALSE;
3014   b->colmap      = NULL;
3015   b->garray      = NULL;
3016   b->roworiented = PETSC_TRUE;
3017 
3018   /* stuff used in block assembly */
3019   b->barray = 0;
3020 
3021   /* stuff used for matrix vector multiply */
3022   b->lvec  = 0;
3023   b->Mvctx = 0;
3024 
3025   /* stuff for MatGetRow() */
3026   b->rowindices   = 0;
3027   b->rowvalues    = 0;
3028   b->getrowactive = PETSC_FALSE;
3029 
3030   /* hash table stuff */
3031   b->ht           = 0;
3032   b->hd           = 0;
3033   b->ht_size      = 0;
3034   b->ht_flag      = PETSC_FALSE;
3035   b->ht_fact      = 0;
3036   b->ht_total_ct  = 0;
3037   b->ht_insert_ct = 0;
3038 
3039   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
3040   b->ijonly = PETSC_FALSE;
3041 
3042   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3043   ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr);
3044   if (flg) {
3045     PetscReal fact = 1.39;
3046     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3047     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
3048     if (fact <= 1.0) fact = 1.39;
3049     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3050     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3051   }
3052   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3053 
3054 #if defined(PETSC_HAVE_MUMPS)
3055   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_baij_mumps);CHKERRQ(ierr);
3056 #endif
3057   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3058   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3059   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3060   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3061   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3062   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3063   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3064   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3065   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3066   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3067   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr);
3068   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3069   PetscFunctionReturn(0);
3070 }
3071 
3072 /*MC
3073    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3074 
3075    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3076    and MATMPIBAIJ otherwise.
3077 
3078    Options Database Keys:
3079 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3080 
3081   Level: beginner
3082 
3083 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3084 M*/
3085 
3086 #undef __FUNCT__
3087 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3088 /*@C
3089    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3090    (block compressed row).  For good matrix assembly performance
3091    the user should preallocate the matrix storage by setting the parameters
3092    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3093    performance can be increased by more than a factor of 50.
3094 
3095    Collective on Mat
3096 
3097    Input Parameters:
3098 +  A - the matrix
3099 .  bs   - size of block
3100 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3101            submatrix  (same for all local rows)
3102 .  d_nnz - array containing the number of block nonzeros in the various block rows
3103            of the in diagonal portion of the local (possibly different for each block
3104            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3105            set it even if it is zero.
3106 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3107            submatrix (same for all local rows).
3108 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3109            off-diagonal portion of the local submatrix (possibly different for
3110            each block row) or NULL.
3111 
3112    If the *_nnz parameter is given then the *_nz parameter is ignored
3113 
3114    Options Database Keys:
3115 +   -mat_block_size - size of the blocks to use
3116 -   -mat_use_hash_table <fact>
3117 
3118    Notes:
3119    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3120    than it must be used on all processors that share the object for that argument.
3121 
3122    Storage Information:
3123    For a square global matrix we define each processor's diagonal portion
3124    to be its local rows and the corresponding columns (a square submatrix);
3125    each processor's off-diagonal portion encompasses the remainder of the
3126    local matrix (a rectangular submatrix).
3127 
3128    The user can specify preallocated storage for the diagonal part of
3129    the local submatrix with either d_nz or d_nnz (not both).  Set
3130    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3131    memory allocation.  Likewise, specify preallocated storage for the
3132    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3133 
3134    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3135    the figure below we depict these three local rows and all columns (0-11).
3136 
3137 .vb
3138            0 1 2 3 4 5 6 7 8 9 10 11
3139           --------------------------
3140    row 3  |o o o d d d o o o o  o  o
3141    row 4  |o o o d d d o o o o  o  o
3142    row 5  |o o o d d d o o o o  o  o
3143           --------------------------
3144 .ve
3145 
3146    Thus, any entries in the d locations are stored in the d (diagonal)
3147    submatrix, and any entries in the o locations are stored in the
3148    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3149    stored simply in the MATSEQBAIJ format for compressed row storage.
3150 
3151    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3152    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3153    In general, for PDE problems in which most nonzeros are near the diagonal,
3154    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3155    or you will get TERRIBLE performance; see the users' manual chapter on
3156    matrices.
3157 
3158    You can call MatGetInfo() to get information on how effective the preallocation was;
3159    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3160    You can also run with the option -info and look for messages with the string
3161    malloc in them to see if additional memory allocation was needed.
3162 
3163    Level: intermediate
3164 
3165 .keywords: matrix, block, aij, compressed row, sparse, parallel
3166 
3167 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3168 @*/
3169 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3170 {
3171   PetscErrorCode ierr;
3172 
3173   PetscFunctionBegin;
3174   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3175   PetscValidType(B,1);
3176   PetscValidLogicalCollectiveInt(B,bs,2);
3177   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3178   PetscFunctionReturn(0);
3179 }
3180 
3181 #undef __FUNCT__
3182 #define __FUNCT__ "MatCreateBAIJ"
3183 /*@C
3184    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3185    (block compressed row).  For good matrix assembly performance
3186    the user should preallocate the matrix storage by setting the parameters
3187    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3188    performance can be increased by more than a factor of 50.
3189 
3190    Collective on MPI_Comm
3191 
3192    Input Parameters:
3193 +  comm - MPI communicator
3194 .  bs   - size of blockk
3195 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3196            This value should be the same as the local size used in creating the
3197            y vector for the matrix-vector product y = Ax.
3198 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3199            This value should be the same as the local size used in creating the
3200            x vector for the matrix-vector product y = Ax.
3201 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3202 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3203 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3204            submatrix  (same for all local rows)
3205 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3206            of the in diagonal portion of the local (possibly different for each block
3207            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3208            and set it even if it is zero.
3209 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3210            submatrix (same for all local rows).
3211 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3212            off-diagonal portion of the local submatrix (possibly different for
3213            each block row) or NULL.
3214 
3215    Output Parameter:
3216 .  A - the matrix
3217 
3218    Options Database Keys:
3219 +   -mat_block_size - size of the blocks to use
3220 -   -mat_use_hash_table <fact>
3221 
3222    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3223    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3224    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3225 
3226    Notes:
3227    If the *_nnz parameter is given then the *_nz parameter is ignored
3228 
3229    A nonzero block is any block that as 1 or more nonzeros in it
3230 
3231    The user MUST specify either the local or global matrix dimensions
3232    (possibly both).
3233 
3234    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3235    than it must be used on all processors that share the object for that argument.
3236 
3237    Storage Information:
3238    For a square global matrix we define each processor's diagonal portion
3239    to be its local rows and the corresponding columns (a square submatrix);
3240    each processor's off-diagonal portion encompasses the remainder of the
3241    local matrix (a rectangular submatrix).
3242 
3243    The user can specify preallocated storage for the diagonal part of
3244    the local submatrix with either d_nz or d_nnz (not both).  Set
3245    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3246    memory allocation.  Likewise, specify preallocated storage for the
3247    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3248 
3249    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3250    the figure below we depict these three local rows and all columns (0-11).
3251 
3252 .vb
3253            0 1 2 3 4 5 6 7 8 9 10 11
3254           --------------------------
3255    row 3  |o o o d d d o o o o  o  o
3256    row 4  |o o o d d d o o o o  o  o
3257    row 5  |o o o d d d o o o o  o  o
3258           --------------------------
3259 .ve
3260 
3261    Thus, any entries in the d locations are stored in the d (diagonal)
3262    submatrix, and any entries in the o locations are stored in the
3263    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3264    stored simply in the MATSEQBAIJ format for compressed row storage.
3265 
3266    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3267    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3268    In general, for PDE problems in which most nonzeros are near the diagonal,
3269    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3270    or you will get TERRIBLE performance; see the users' manual chapter on
3271    matrices.
3272 
3273    Level: intermediate
3274 
3275 .keywords: matrix, block, aij, compressed row, sparse, parallel
3276 
3277 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3278 @*/
3279 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3280 {
3281   PetscErrorCode ierr;
3282   PetscMPIInt    size;
3283 
3284   PetscFunctionBegin;
3285   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3286   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3287   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3288   if (size > 1) {
3289     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3290     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3291   } else {
3292     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3293     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3294   }
3295   PetscFunctionReturn(0);
3296 }
3297 
3298 #undef __FUNCT__
3299 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3300 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3301 {
3302   Mat            mat;
3303   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3304   PetscErrorCode ierr;
3305   PetscInt       len=0;
3306 
3307   PetscFunctionBegin;
3308   *newmat = 0;
3309   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3310   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3311   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3312   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3313 
3314   mat->factortype   = matin->factortype;
3315   mat->preallocated = PETSC_TRUE;
3316   mat->assembled    = PETSC_TRUE;
3317   mat->insertmode   = NOT_SET_VALUES;
3318 
3319   a             = (Mat_MPIBAIJ*)mat->data;
3320   mat->rmap->bs = matin->rmap->bs;
3321   a->bs2        = oldmat->bs2;
3322   a->mbs        = oldmat->mbs;
3323   a->nbs        = oldmat->nbs;
3324   a->Mbs        = oldmat->Mbs;
3325   a->Nbs        = oldmat->Nbs;
3326 
3327   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3328   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3329 
3330   a->size         = oldmat->size;
3331   a->rank         = oldmat->rank;
3332   a->donotstash   = oldmat->donotstash;
3333   a->roworiented  = oldmat->roworiented;
3334   a->rowindices   = 0;
3335   a->rowvalues    = 0;
3336   a->getrowactive = PETSC_FALSE;
3337   a->barray       = 0;
3338   a->rstartbs     = oldmat->rstartbs;
3339   a->rendbs       = oldmat->rendbs;
3340   a->cstartbs     = oldmat->cstartbs;
3341   a->cendbs       = oldmat->cendbs;
3342 
3343   /* hash table stuff */
3344   a->ht           = 0;
3345   a->hd           = 0;
3346   a->ht_size      = 0;
3347   a->ht_flag      = oldmat->ht_flag;
3348   a->ht_fact      = oldmat->ht_fact;
3349   a->ht_total_ct  = 0;
3350   a->ht_insert_ct = 0;
3351 
3352   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3353   if (oldmat->colmap) {
3354 #if defined(PETSC_USE_CTABLE)
3355     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3356 #else
3357     ierr = PetscMalloc1((a->Nbs),&a->colmap);CHKERRQ(ierr);
3358     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3359     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3360 #endif
3361   } else a->colmap = 0;
3362 
3363   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3364     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
3365     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3366     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3367   } else a->garray = 0;
3368 
3369   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3370   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3371   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3372   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3373   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3374 
3375   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3376   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3377   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3378   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3379   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3380   *newmat = mat;
3381   PetscFunctionReturn(0);
3382 }
3383 
3384 #undef __FUNCT__
3385 #define __FUNCT__ "MatLoad_MPIBAIJ"
3386 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3387 {
3388   PetscErrorCode ierr;
3389   int            fd;
3390   PetscInt       i,nz,j,rstart,rend;
3391   PetscScalar    *vals,*buf;
3392   MPI_Comm       comm;
3393   MPI_Status     status;
3394   PetscMPIInt    rank,size,maxnz;
3395   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3396   PetscInt       *locrowlens = NULL,*procsnz = NULL,*browners = NULL;
3397   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3398   PetscMPIInt    tag    = ((PetscObject)viewer)->tag;
3399   PetscInt       *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount;
3400   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3401 
3402   PetscFunctionBegin;
3403   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3404   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3405   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3406   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3407 
3408   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3409   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3410   if (!rank) {
3411     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3412     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3413     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3414   }
3415 
3416   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3417 
3418   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3419   M    = header[1]; N = header[2];
3420 
3421   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3422   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3423   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3424 
3425   /* If global sizes are set, check if they are consistent with that given in the file */
3426   if (sizesset) {
3427     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3428   }
3429   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3430   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3431 
3432   if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices");
3433 
3434   /*
3435      This code adds extra rows to make sure the number of rows is
3436      divisible by the blocksize
3437   */
3438   Mbs        = M/bs;
3439   extra_rows = bs - M + bs*Mbs;
3440   if (extra_rows == bs) extra_rows = 0;
3441   else                  Mbs++;
3442   if (extra_rows && !rank) {
3443     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3444   }
3445 
3446   /* determine ownership of all rows */
3447   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3448     mbs = Mbs/size + ((Mbs % size) > rank);
3449     m   = mbs*bs;
3450   } else { /* User set */
3451     m   = newmat->rmap->n;
3452     mbs = m/bs;
3453   }
3454   ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
3455   ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3456 
3457   /* process 0 needs enough room for process with most rows */
3458   if (!rank) {
3459     mmax = rowners[1];
3460     for (i=2; i<=size; i++) {
3461       mmax = PetscMax(mmax,rowners[i]);
3462     }
3463     mmax*=bs;
3464   } else mmax = -1;             /* unused, but compiler warns anyway */
3465 
3466   rowners[0] = 0;
3467   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
3468   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
3469   rstart = rowners[rank];
3470   rend   = rowners[rank+1];
3471 
3472   /* distribute row lengths to all processors */
3473   ierr = PetscMalloc1(m,&locrowlens);CHKERRQ(ierr);
3474   if (!rank) {
3475     mend = m;
3476     if (size == 1) mend = mend - extra_rows;
3477     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3478     for (j=mend; j<m; j++) locrowlens[j] = 1;
3479     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3480     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3481     for (j=0; j<m; j++) {
3482       procsnz[0] += locrowlens[j];
3483     }
3484     for (i=1; i<size; i++) {
3485       mend = browners[i+1] - browners[i];
3486       if (i == size-1) mend = mend - extra_rows;
3487       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3488       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3489       /* calculate the number of nonzeros on each processor */
3490       for (j=0; j<browners[i+1]-browners[i]; j++) {
3491         procsnz[i] += rowlengths[j];
3492       }
3493       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3494     }
3495     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3496   } else {
3497     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3498   }
3499 
3500   if (!rank) {
3501     /* determine max buffer needed and allocate it */
3502     maxnz = procsnz[0];
3503     for (i=1; i<size; i++) {
3504       maxnz = PetscMax(maxnz,procsnz[i]);
3505     }
3506     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3507 
3508     /* read in my part of the matrix column indices  */
3509     nz     = procsnz[0];
3510     ierr   = PetscMalloc1((nz+1),&ibuf);CHKERRQ(ierr);
3511     mycols = ibuf;
3512     if (size == 1) nz -= extra_rows;
3513     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3514     if (size == 1) {
3515       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
3516     }
3517 
3518     /* read in every ones (except the last) and ship off */
3519     for (i=1; i<size-1; i++) {
3520       nz   = procsnz[i];
3521       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3522       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3523     }
3524     /* read in the stuff for the last proc */
3525     if (size != 1) {
3526       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3527       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3528       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3529       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3530     }
3531     ierr = PetscFree(cols);CHKERRQ(ierr);
3532   } else {
3533     /* determine buffer space needed for message */
3534     nz = 0;
3535     for (i=0; i<m; i++) {
3536       nz += locrowlens[i];
3537     }
3538     ierr   = PetscMalloc1((nz+1),&ibuf);CHKERRQ(ierr);
3539     mycols = ibuf;
3540     /* receive message of column indices*/
3541     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3542     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3543     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3544   }
3545 
3546   /* loop over local rows, determining number of off diagonal entries */
3547   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
3548   ierr     = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
3549   rowcount = 0; nzcount = 0;
3550   for (i=0; i<mbs; i++) {
3551     dcount  = 0;
3552     odcount = 0;
3553     for (j=0; j<bs; j++) {
3554       kmax = locrowlens[rowcount];
3555       for (k=0; k<kmax; k++) {
3556         tmp = mycols[nzcount++]/bs;
3557         if (!mask[tmp]) {
3558           mask[tmp] = 1;
3559           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3560           else masked1[dcount++] = tmp;
3561         }
3562       }
3563       rowcount++;
3564     }
3565 
3566     dlens[i]  = dcount;
3567     odlens[i] = odcount;
3568 
3569     /* zero out the mask elements we set */
3570     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3571     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3572   }
3573 
3574 
3575   if (!sizesset) {
3576     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3577   }
3578   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3579 
3580   if (!rank) {
3581     ierr = PetscMalloc1((maxnz+1),&buf);CHKERRQ(ierr);
3582     /* read in my part of the matrix numerical values  */
3583     nz     = procsnz[0];
3584     vals   = buf;
3585     mycols = ibuf;
3586     if (size == 1) nz -= extra_rows;
3587     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3588     if (size == 1) {
3589       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
3590     }
3591 
3592     /* insert into matrix */
3593     jj = rstart*bs;
3594     for (i=0; i<m; i++) {
3595       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3596       mycols += locrowlens[i];
3597       vals   += locrowlens[i];
3598       jj++;
3599     }
3600     /* read in other processors (except the last one) and ship out */
3601     for (i=1; i<size-1; i++) {
3602       nz   = procsnz[i];
3603       vals = buf;
3604       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3605       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3606     }
3607     /* the last proc */
3608     if (size != 1) {
3609       nz   = procsnz[i] - extra_rows;
3610       vals = buf;
3611       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3612       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3613       ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3614     }
3615     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3616   } else {
3617     /* receive numeric values */
3618     ierr = PetscMalloc1((nz+1),&buf);CHKERRQ(ierr);
3619 
3620     /* receive message of values*/
3621     vals   = buf;
3622     mycols = ibuf;
3623     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3624 
3625     /* insert into matrix */
3626     jj = rstart*bs;
3627     for (i=0; i<m; i++) {
3628       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3629       mycols += locrowlens[i];
3630       vals   += locrowlens[i];
3631       jj++;
3632     }
3633   }
3634   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3635   ierr = PetscFree(buf);CHKERRQ(ierr);
3636   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3637   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3638   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3639   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3640   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3641   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3642   PetscFunctionReturn(0);
3643 }
3644 
3645 #undef __FUNCT__
3646 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3647 /*@
3648    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3649 
3650    Input Parameters:
3651 .  mat  - the matrix
3652 .  fact - factor
3653 
3654    Not Collective, each process can use a different factor
3655 
3656    Level: advanced
3657 
3658   Notes:
3659    This can also be set by the command line option: -mat_use_hash_table <fact>
3660 
3661 .keywords: matrix, hashtable, factor, HT
3662 
3663 .seealso: MatSetOption()
3664 @*/
3665 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3666 {
3667   PetscErrorCode ierr;
3668 
3669   PetscFunctionBegin;
3670   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3671   PetscFunctionReturn(0);
3672 }
3673 
3674 #undef __FUNCT__
3675 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3676 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3677 {
3678   Mat_MPIBAIJ *baij;
3679 
3680   PetscFunctionBegin;
3681   baij          = (Mat_MPIBAIJ*)mat->data;
3682   baij->ht_fact = fact;
3683   PetscFunctionReturn(0);
3684 }
3685 
3686 #undef __FUNCT__
3687 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3688 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3689 {
3690   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
3691 
3692   PetscFunctionBegin;
3693   *Ad     = a->A;
3694   *Ao     = a->B;
3695   *colmap = a->garray;
3696   PetscFunctionReturn(0);
3697 }
3698 
3699 /*
3700     Special version for direct calls from Fortran (to eliminate two function call overheads
3701 */
3702 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3703 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3704 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3705 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3706 #endif
3707 
3708 #undef __FUNCT__
3709 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3710 /*@C
3711   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3712 
3713   Collective on Mat
3714 
3715   Input Parameters:
3716 + mat - the matrix
3717 . min - number of input rows
3718 . im - input rows
3719 . nin - number of input columns
3720 . in - input columns
3721 . v - numerical values input
3722 - addvin - INSERT_VALUES or ADD_VALUES
3723 
3724   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3725 
3726   Level: advanced
3727 
3728 .seealso:   MatSetValuesBlocked()
3729 @*/
3730 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3731 {
3732   /* convert input arguments to C version */
3733   Mat        mat  = *matin;
3734   PetscInt   m    = *min, n = *nin;
3735   InsertMode addv = *addvin;
3736 
3737   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3738   const MatScalar *value;
3739   MatScalar       *barray     = baij->barray;
3740   PetscBool       roworiented = baij->roworiented;
3741   PetscErrorCode  ierr;
3742   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3743   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3744   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3745 
3746   PetscFunctionBegin;
3747   /* tasks normally handled by MatSetValuesBlocked() */
3748   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3749 #if defined(PETSC_USE_DEBUG)
3750   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3751   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3752 #endif
3753   if (mat->assembled) {
3754     mat->was_assembled = PETSC_TRUE;
3755     mat->assembled     = PETSC_FALSE;
3756   }
3757   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3758 
3759 
3760   if (!barray) {
3761     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
3762     baij->barray = barray;
3763   }
3764 
3765   if (roworiented) stepval = (n-1)*bs;
3766   else stepval = (m-1)*bs;
3767 
3768   for (i=0; i<m; i++) {
3769     if (im[i] < 0) continue;
3770 #if defined(PETSC_USE_DEBUG)
3771     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3772 #endif
3773     if (im[i] >= rstart && im[i] < rend) {
3774       row = im[i] - rstart;
3775       for (j=0; j<n; j++) {
3776         /* If NumCol = 1 then a copy is not required */
3777         if ((roworiented) && (n == 1)) {
3778           barray = (MatScalar*)v + i*bs2;
3779         } else if ((!roworiented) && (m == 1)) {
3780           barray = (MatScalar*)v + j*bs2;
3781         } else { /* Here a copy is required */
3782           if (roworiented) {
3783             value = v + i*(stepval+bs)*bs + j*bs;
3784           } else {
3785             value = v + j*(stepval+bs)*bs + i*bs;
3786           }
3787           for (ii=0; ii<bs; ii++,value+=stepval) {
3788             for (jj=0; jj<bs; jj++) {
3789               *barray++ = *value++;
3790             }
3791           }
3792           barray -=bs2;
3793         }
3794 
3795         if (in[j] >= cstart && in[j] < cend) {
3796           col  = in[j] - cstart;
3797           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3798         } else if (in[j] < 0) continue;
3799 #if defined(PETSC_USE_DEBUG)
3800         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3801 #endif
3802         else {
3803           if (mat->was_assembled) {
3804             if (!baij->colmap) {
3805               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3806             }
3807 
3808 #if defined(PETSC_USE_DEBUG)
3809 #if defined(PETSC_USE_CTABLE)
3810             { PetscInt data;
3811               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3812               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3813             }
3814 #else
3815             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3816 #endif
3817 #endif
3818 #if defined(PETSC_USE_CTABLE)
3819             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3820             col  = (col - 1)/bs;
3821 #else
3822             col = (baij->colmap[in[j]] - 1)/bs;
3823 #endif
3824             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3825               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3826               col  =  in[j];
3827             }
3828           } else col = in[j];
3829           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3830         }
3831       }
3832     } else {
3833       if (!baij->donotstash) {
3834         if (roworiented) {
3835           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3836         } else {
3837           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3838         }
3839       }
3840     }
3841   }
3842 
3843   /* task normally handled by MatSetValuesBlocked() */
3844   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3845   PetscFunctionReturn(0);
3846 }
3847 
3848 #undef __FUNCT__
3849 #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
3850 /*@
3851      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
3852          CSR format the local rows.
3853 
3854    Collective on MPI_Comm
3855 
3856    Input Parameters:
3857 +  comm - MPI communicator
3858 .  bs - the block size, only a block size of 1 is supported
3859 .  m - number of local rows (Cannot be PETSC_DECIDE)
3860 .  n - This value should be the same as the local size used in creating the
3861        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3862        calculated if N is given) For square matrices n is almost always m.
3863 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3864 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3865 .   i - row indices
3866 .   j - column indices
3867 -   a - matrix values
3868 
3869    Output Parameter:
3870 .   mat - the matrix
3871 
3872    Level: intermediate
3873 
3874    Notes:
3875        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3876      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3877      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3878 
3879      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3880      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3881      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3882      with column-major ordering within blocks.
3883 
3884        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3885 
3886 .keywords: matrix, aij, compressed row, sparse, parallel
3887 
3888 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3889           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3890 @*/
3891 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3892 {
3893   PetscErrorCode ierr;
3894 
3895   PetscFunctionBegin;
3896   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3897   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3898   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3899   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3900   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
3901   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3902   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3903   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3904   PetscFunctionReturn(0);
3905 }
3906