xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 0c8c14c502e79aaeb7ec26e1cd1e476322d9010e)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 #include <petscblaslapack.h>
4 #include <petscsf.h>
5 
6 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
7 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat);
8 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
9 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
10 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
11 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
12 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec);
14 
15 #undef __FUNCT__
16 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
17 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
18 {
19   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
20   PetscErrorCode ierr;
21   PetscInt       i,*idxb = 0;
22   PetscScalar    *va,*vb;
23   Vec            vtmp;
24 
25   PetscFunctionBegin;
26   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
27   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
28   if (idx) {
29     for (i=0; i<A->rmap->n; i++) {
30       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
31     }
32   }
33 
34   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
35   if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);}
36   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
37   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
38 
39   for (i=0; i<A->rmap->n; i++) {
40     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
41       va[i] = vb[i];
42       if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);
43     }
44   }
45 
46   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
47   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
48   ierr = PetscFree(idxb);CHKERRQ(ierr);
49   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
50   PetscFunctionReturn(0);
51 }
52 
53 #undef __FUNCT__
54 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
55 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
56 {
57   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
58   PetscErrorCode ierr;
59 
60   PetscFunctionBegin;
61   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
62   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
63   PetscFunctionReturn(0);
64 }
65 
66 #undef __FUNCT__
67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
68 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
69 {
70   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
71   PetscErrorCode ierr;
72 
73   PetscFunctionBegin;
74   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
75   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
76   PetscFunctionReturn(0);
77 }
78 
79 /*
80      Local utility routine that creates a mapping from the global column
81    number to the local number in the off-diagonal part of the local
82    storage of the matrix.  This is done in a non scalable way since the
83    length of colmap equals the global matrix length.
84 */
85 #undef __FUNCT__
86 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private"
87 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
88 {
89   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
90   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
91   PetscErrorCode ierr;
92   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
93 
94   PetscFunctionBegin;
95 #if defined(PETSC_USE_CTABLE)
96   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
97   for (i=0; i<nbs; i++) {
98     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
99   }
100 #else
101   ierr = PetscMalloc1((baij->Nbs+1),&baij->colmap);CHKERRQ(ierr);
102   ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
103   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
104   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
105 #endif
106   PetscFunctionReturn(0);
107 }
108 
109 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
110   { \
111  \
112     brow = row/bs;  \
113     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
114     rmax = aimax[brow]; nrow = ailen[brow]; \
115     bcol = col/bs; \
116     ridx = row % bs; cidx = col % bs; \
117     low  = 0; high = nrow; \
118     while (high-low > 3) { \
119       t = (low+high)/2; \
120       if (rp[t] > bcol) high = t; \
121       else              low  = t; \
122     } \
123     for (_i=low; _i<high; _i++) { \
124       if (rp[_i] > bcol) break; \
125       if (rp[_i] == bcol) { \
126         bap = ap +  bs2*_i + bs*cidx + ridx; \
127         if (addv == ADD_VALUES) *bap += value;  \
128         else                    *bap  = value;  \
129         goto a_noinsert; \
130       } \
131     } \
132     if (a->nonew == 1) goto a_noinsert; \
133     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
134     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
135     N = nrow++ - 1;  \
136     /* shift up all the later entries in this row */ \
137     for (ii=N; ii>=_i; ii--) { \
138       rp[ii+1] = rp[ii]; \
139       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
140     } \
141     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
142     rp[_i]                      = bcol;  \
143     ap[bs2*_i + bs*cidx + ridx] = value;  \
144 a_noinsert:; \
145     ailen[brow] = nrow; \
146   }
147 
148 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
149   { \
150     brow = row/bs;  \
151     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
152     rmax = bimax[brow]; nrow = bilen[brow]; \
153     bcol = col/bs; \
154     ridx = row % bs; cidx = col % bs; \
155     low  = 0; high = nrow; \
156     while (high-low > 3) { \
157       t = (low+high)/2; \
158       if (rp[t] > bcol) high = t; \
159       else              low  = t; \
160     } \
161     for (_i=low; _i<high; _i++) { \
162       if (rp[_i] > bcol) break; \
163       if (rp[_i] == bcol) { \
164         bap = ap +  bs2*_i + bs*cidx + ridx; \
165         if (addv == ADD_VALUES) *bap += value;  \
166         else                    *bap  = value;  \
167         goto b_noinsert; \
168       } \
169     } \
170     if (b->nonew == 1) goto b_noinsert; \
171     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
172     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
173     N = nrow++ - 1;  \
174     /* shift up all the later entries in this row */ \
175     for (ii=N; ii>=_i; ii--) { \
176       rp[ii+1] = rp[ii]; \
177       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
178     } \
179     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
180     rp[_i]                      = bcol;  \
181     ap[bs2*_i + bs*cidx + ridx] = value;  \
182 b_noinsert:; \
183     bilen[brow] = nrow; \
184   }
185 
186 #undef __FUNCT__
187 #define __FUNCT__ "MatSetValues_MPIBAIJ"
188 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
189 {
190   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
191   MatScalar      value;
192   PetscBool      roworiented = baij->roworiented;
193   PetscErrorCode ierr;
194   PetscInt       i,j,row,col;
195   PetscInt       rstart_orig=mat->rmap->rstart;
196   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
197   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
198 
199   /* Some Variables required in the macro */
200   Mat         A     = baij->A;
201   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
202   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
203   MatScalar   *aa   =a->a;
204 
205   Mat         B     = baij->B;
206   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
207   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
208   MatScalar   *ba   =b->a;
209 
210   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
211   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
212   MatScalar *ap,*bap;
213 
214   PetscFunctionBegin;
215   if (v) PetscValidScalarPointer(v,6);
216   for (i=0; i<m; i++) {
217     if (im[i] < 0) continue;
218 #if defined(PETSC_USE_DEBUG)
219     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
220 #endif
221     if (im[i] >= rstart_orig && im[i] < rend_orig) {
222       row = im[i] - rstart_orig;
223       for (j=0; j<n; j++) {
224         if (in[j] >= cstart_orig && in[j] < cend_orig) {
225           col = in[j] - cstart_orig;
226           if (roworiented) value = v[i*n+j];
227           else             value = v[i+j*m];
228           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
229           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
230         } else if (in[j] < 0) continue;
231 #if defined(PETSC_USE_DEBUG)
232         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
233 #endif
234         else {
235           if (mat->was_assembled) {
236             if (!baij->colmap) {
237               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
238             }
239 #if defined(PETSC_USE_CTABLE)
240             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
241             col  = col - 1;
242 #else
243             col = baij->colmap[in[j]/bs] - 1;
244 #endif
245             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
246               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
247               col  =  in[j];
248               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
249               B    = baij->B;
250               b    = (Mat_SeqBAIJ*)(B)->data;
251               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
252               ba   =b->a;
253             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
254             else col += in[j]%bs;
255           } else col = in[j];
256           if (roworiented) value = v[i*n+j];
257           else             value = v[i+j*m];
258           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
259           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
260         }
261       }
262     } else {
263       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
264       if (!baij->donotstash) {
265         mat->assembled = PETSC_FALSE;
266         if (roworiented) {
267           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
268         } else {
269           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
270         }
271       }
272     }
273   }
274   PetscFunctionReturn(0);
275 }
276 
277 #undef __FUNCT__
278 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
279 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
280 {
281   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
282   const PetscScalar *value;
283   MatScalar         *barray     = baij->barray;
284   PetscBool         roworiented = baij->roworiented;
285   PetscErrorCode    ierr;
286   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
287   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
288   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
289 
290   PetscFunctionBegin;
291   if (!barray) {
292     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
293     baij->barray = barray;
294   }
295 
296   if (roworiented) stepval = (n-1)*bs;
297   else stepval = (m-1)*bs;
298 
299   for (i=0; i<m; i++) {
300     if (im[i] < 0) continue;
301 #if defined(PETSC_USE_DEBUG)
302     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
303 #endif
304     if (im[i] >= rstart && im[i] < rend) {
305       row = im[i] - rstart;
306       for (j=0; j<n; j++) {
307         /* If NumCol = 1 then a copy is not required */
308         if ((roworiented) && (n == 1)) {
309           barray = (MatScalar*)v + i*bs2;
310         } else if ((!roworiented) && (m == 1)) {
311           barray = (MatScalar*)v + j*bs2;
312         } else { /* Here a copy is required */
313           if (roworiented) {
314             value = v + (i*(stepval+bs) + j)*bs;
315           } else {
316             value = v + (j*(stepval+bs) + i)*bs;
317           }
318           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
319             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
320             barray += bs;
321           }
322           barray -= bs2;
323         }
324 
325         if (in[j] >= cstart && in[j] < cend) {
326           col  = in[j] - cstart;
327           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
328         } else if (in[j] < 0) continue;
329 #if defined(PETSC_USE_DEBUG)
330         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
331 #endif
332         else {
333           if (mat->was_assembled) {
334             if (!baij->colmap) {
335               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
336             }
337 
338 #if defined(PETSC_USE_DEBUG)
339 #if defined(PETSC_USE_CTABLE)
340             { PetscInt data;
341               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
342               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
343             }
344 #else
345             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
346 #endif
347 #endif
348 #if defined(PETSC_USE_CTABLE)
349             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
350             col  = (col - 1)/bs;
351 #else
352             col = (baij->colmap[in[j]] - 1)/bs;
353 #endif
354             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
355               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
356               col  =  in[j];
357             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]);
358           } else col = in[j];
359           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
360         }
361       }
362     } else {
363       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
364       if (!baij->donotstash) {
365         if (roworiented) {
366           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
367         } else {
368           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
369         }
370       }
371     }
372   }
373   PetscFunctionReturn(0);
374 }
375 
376 #define HASH_KEY 0.6180339887
377 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
378 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
379 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
380 #undef __FUNCT__
381 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
382 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
383 {
384   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
385   PetscBool      roworiented = baij->roworiented;
386   PetscErrorCode ierr;
387   PetscInt       i,j,row,col;
388   PetscInt       rstart_orig=mat->rmap->rstart;
389   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
390   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
391   PetscReal      tmp;
392   MatScalar      **HD = baij->hd,value;
393 #if defined(PETSC_USE_DEBUG)
394   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
395 #endif
396 
397   PetscFunctionBegin;
398   if (v) PetscValidScalarPointer(v,6);
399   for (i=0; i<m; i++) {
400 #if defined(PETSC_USE_DEBUG)
401     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
402     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
403 #endif
404     row = im[i];
405     if (row >= rstart_orig && row < rend_orig) {
406       for (j=0; j<n; j++) {
407         col = in[j];
408         if (roworiented) value = v[i*n+j];
409         else             value = v[i+j*m];
410         /* Look up PetscInto the Hash Table */
411         key = (row/bs)*Nbs+(col/bs)+1;
412         h1  = HASH(size,key,tmp);
413 
414 
415         idx = h1;
416 #if defined(PETSC_USE_DEBUG)
417         insert_ct++;
418         total_ct++;
419         if (HT[idx] != key) {
420           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
421           if (idx == size) {
422             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
423             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
424           }
425         }
426 #else
427         if (HT[idx] != key) {
428           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
429           if (idx == size) {
430             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
431             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
432           }
433         }
434 #endif
435         /* A HASH table entry is found, so insert the values at the correct address */
436         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
437         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
438       }
439     } else if (!baij->donotstash) {
440       if (roworiented) {
441         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
442       } else {
443         ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
444       }
445     }
446   }
447 #if defined(PETSC_USE_DEBUG)
448   baij->ht_total_ct  = total_ct;
449   baij->ht_insert_ct = insert_ct;
450 #endif
451   PetscFunctionReturn(0);
452 }
453 
454 #undef __FUNCT__
455 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
456 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
457 {
458   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
459   PetscBool         roworiented = baij->roworiented;
460   PetscErrorCode    ierr;
461   PetscInt          i,j,ii,jj,row,col;
462   PetscInt          rstart=baij->rstartbs;
463   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
464   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
465   PetscReal         tmp;
466   MatScalar         **HD = baij->hd,*baij_a;
467   const PetscScalar *v_t,*value;
468 #if defined(PETSC_USE_DEBUG)
469   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
470 #endif
471 
472   PetscFunctionBegin;
473   if (roworiented) stepval = (n-1)*bs;
474   else stepval = (m-1)*bs;
475 
476   for (i=0; i<m; i++) {
477 #if defined(PETSC_USE_DEBUG)
478     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
479     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
480 #endif
481     row = im[i];
482     v_t = v + i*nbs2;
483     if (row >= rstart && row < rend) {
484       for (j=0; j<n; j++) {
485         col = in[j];
486 
487         /* Look up into the Hash Table */
488         key = row*Nbs+col+1;
489         h1  = HASH(size,key,tmp);
490 
491         idx = h1;
492 #if defined(PETSC_USE_DEBUG)
493         total_ct++;
494         insert_ct++;
495         if (HT[idx] != key) {
496           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
497           if (idx == size) {
498             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
499             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
500           }
501         }
502 #else
503         if (HT[idx] != key) {
504           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
505           if (idx == size) {
506             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
507             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
508           }
509         }
510 #endif
511         baij_a = HD[idx];
512         if (roworiented) {
513           /*value = v + i*(stepval+bs)*bs + j*bs;*/
514           /* value = v + (i*(stepval+bs)+j)*bs; */
515           value = v_t;
516           v_t  += bs;
517           if (addv == ADD_VALUES) {
518             for (ii=0; ii<bs; ii++,value+=stepval) {
519               for (jj=ii; jj<bs2; jj+=bs) {
520                 baij_a[jj] += *value++;
521               }
522             }
523           } else {
524             for (ii=0; ii<bs; ii++,value+=stepval) {
525               for (jj=ii; jj<bs2; jj+=bs) {
526                 baij_a[jj] = *value++;
527               }
528             }
529           }
530         } else {
531           value = v + j*(stepval+bs)*bs + i*bs;
532           if (addv == ADD_VALUES) {
533             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
534               for (jj=0; jj<bs; jj++) {
535                 baij_a[jj] += *value++;
536               }
537             }
538           } else {
539             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
540               for (jj=0; jj<bs; jj++) {
541                 baij_a[jj] = *value++;
542               }
543             }
544           }
545         }
546       }
547     } else {
548       if (!baij->donotstash) {
549         if (roworiented) {
550           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
551         } else {
552           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
553         }
554       }
555     }
556   }
557 #if defined(PETSC_USE_DEBUG)
558   baij->ht_total_ct  = total_ct;
559   baij->ht_insert_ct = insert_ct;
560 #endif
561   PetscFunctionReturn(0);
562 }
563 
564 #undef __FUNCT__
565 #define __FUNCT__ "MatGetValues_MPIBAIJ"
566 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
567 {
568   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
569   PetscErrorCode ierr;
570   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
571   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
572 
573   PetscFunctionBegin;
574   for (i=0; i<m; i++) {
575     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
576     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
577     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
578       row = idxm[i] - bsrstart;
579       for (j=0; j<n; j++) {
580         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
581         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
582         if (idxn[j] >= bscstart && idxn[j] < bscend) {
583           col  = idxn[j] - bscstart;
584           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
585         } else {
586           if (!baij->colmap) {
587             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
588           }
589 #if defined(PETSC_USE_CTABLE)
590           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
591           data--;
592 #else
593           data = baij->colmap[idxn[j]/bs]-1;
594 #endif
595           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
596           else {
597             col  = data + idxn[j]%bs;
598             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
599           }
600         }
601       }
602     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
603   }
604   PetscFunctionReturn(0);
605 }
606 
607 #undef __FUNCT__
608 #define __FUNCT__ "MatNorm_MPIBAIJ"
609 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
610 {
611   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
612   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
613   PetscErrorCode ierr;
614   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
615   PetscReal      sum = 0.0;
616   MatScalar      *v;
617 
618   PetscFunctionBegin;
619   if (baij->size == 1) {
620     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
621   } else {
622     if (type == NORM_FROBENIUS) {
623       v  = amat->a;
624       nz = amat->nz*bs2;
625       for (i=0; i<nz; i++) {
626         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
627       }
628       v  = bmat->a;
629       nz = bmat->nz*bs2;
630       for (i=0; i<nz; i++) {
631         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
632       }
633       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
634       *nrm = PetscSqrtReal(*nrm);
635     } else if (type == NORM_1) { /* max column sum */
636       PetscReal *tmp,*tmp2;
637       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
638       ierr = PetscMalloc2(mat->cmap->N,&tmp,mat->cmap->N,&tmp2);CHKERRQ(ierr);
639       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
640       v    = amat->a; jj = amat->j;
641       for (i=0; i<amat->nz; i++) {
642         for (j=0; j<bs; j++) {
643           col = bs*(cstart + *jj) + j; /* column index */
644           for (row=0; row<bs; row++) {
645             tmp[col] += PetscAbsScalar(*v);  v++;
646           }
647         }
648         jj++;
649       }
650       v = bmat->a; jj = bmat->j;
651       for (i=0; i<bmat->nz; i++) {
652         for (j=0; j<bs; j++) {
653           col = bs*garray[*jj] + j;
654           for (row=0; row<bs; row++) {
655             tmp[col] += PetscAbsScalar(*v); v++;
656           }
657         }
658         jj++;
659       }
660       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
661       *nrm = 0.0;
662       for (j=0; j<mat->cmap->N; j++) {
663         if (tmp2[j] > *nrm) *nrm = tmp2[j];
664       }
665       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
666     } else if (type == NORM_INFINITY) { /* max row sum */
667       PetscReal *sums;
668       ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr);
669       sum  = 0.0;
670       for (j=0; j<amat->mbs; j++) {
671         for (row=0; row<bs; row++) sums[row] = 0.0;
672         v  = amat->a + bs2*amat->i[j];
673         nz = amat->i[j+1]-amat->i[j];
674         for (i=0; i<nz; i++) {
675           for (col=0; col<bs; col++) {
676             for (row=0; row<bs; row++) {
677               sums[row] += PetscAbsScalar(*v); v++;
678             }
679           }
680         }
681         v  = bmat->a + bs2*bmat->i[j];
682         nz = bmat->i[j+1]-bmat->i[j];
683         for (i=0; i<nz; i++) {
684           for (col=0; col<bs; col++) {
685             for (row=0; row<bs; row++) {
686               sums[row] += PetscAbsScalar(*v); v++;
687             }
688           }
689         }
690         for (row=0; row<bs; row++) {
691           if (sums[row] > sum) sum = sums[row];
692         }
693       }
694       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
695       ierr = PetscFree(sums);CHKERRQ(ierr);
696     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
697   }
698   PetscFunctionReturn(0);
699 }
700 
701 /*
702   Creates the hash table, and sets the table
703   This table is created only once.
704   If new entried need to be added to the matrix
705   then the hash table has to be destroyed and
706   recreated.
707 */
708 #undef __FUNCT__
709 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
710 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
711 {
712   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
713   Mat            A     = baij->A,B=baij->B;
714   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
715   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
716   PetscErrorCode ierr;
717   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
718   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
719   PetscInt       *HT,key;
720   MatScalar      **HD;
721   PetscReal      tmp;
722 #if defined(PETSC_USE_INFO)
723   PetscInt ct=0,max=0;
724 #endif
725 
726   PetscFunctionBegin;
727   if (baij->ht) PetscFunctionReturn(0);
728 
729   baij->ht_size = (PetscInt)(factor*nz);
730   ht_size       = baij->ht_size;
731 
732   /* Allocate Memory for Hash Table */
733   ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr);
734   HD   = baij->hd;
735   HT   = baij->ht;
736 
737   /* Loop Over A */
738   for (i=0; i<a->mbs; i++) {
739     for (j=ai[i]; j<ai[i+1]; j++) {
740       row = i+rstart;
741       col = aj[j]+cstart;
742 
743       key = row*Nbs + col + 1;
744       h1  = HASH(ht_size,key,tmp);
745       for (k=0; k<ht_size; k++) {
746         if (!HT[(h1+k)%ht_size]) {
747           HT[(h1+k)%ht_size] = key;
748           HD[(h1+k)%ht_size] = a->a + j*bs2;
749           break;
750 #if defined(PETSC_USE_INFO)
751         } else {
752           ct++;
753 #endif
754         }
755       }
756 #if defined(PETSC_USE_INFO)
757       if (k> max) max = k;
758 #endif
759     }
760   }
761   /* Loop Over B */
762   for (i=0; i<b->mbs; i++) {
763     for (j=bi[i]; j<bi[i+1]; j++) {
764       row = i+rstart;
765       col = garray[bj[j]];
766       key = row*Nbs + col + 1;
767       h1  = HASH(ht_size,key,tmp);
768       for (k=0; k<ht_size; k++) {
769         if (!HT[(h1+k)%ht_size]) {
770           HT[(h1+k)%ht_size] = key;
771           HD[(h1+k)%ht_size] = b->a + j*bs2;
772           break;
773 #if defined(PETSC_USE_INFO)
774         } else {
775           ct++;
776 #endif
777         }
778       }
779 #if defined(PETSC_USE_INFO)
780       if (k> max) max = k;
781 #endif
782     }
783   }
784 
785   /* Print Summary */
786 #if defined(PETSC_USE_INFO)
787   for (i=0,j=0; i<ht_size; i++) {
788     if (HT[i]) j++;
789   }
790   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
791 #endif
792   PetscFunctionReturn(0);
793 }
794 
795 #undef __FUNCT__
796 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
797 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
798 {
799   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
800   PetscErrorCode ierr;
801   PetscInt       nstash,reallocs;
802   InsertMode     addv;
803 
804   PetscFunctionBegin;
805   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
806 
807   /* make sure all processors are either in INSERTMODE or ADDMODE */
808   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
809   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
810   mat->insertmode = addv; /* in case this processor had no cache */
811 
812   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
813   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
814   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
815   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
816   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
817   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
818   PetscFunctionReturn(0);
819 }
820 
821 #undef __FUNCT__
822 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
823 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
824 {
825   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
826   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
827   PetscErrorCode ierr;
828   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
829   PetscInt       *row,*col;
830   PetscBool      r1,r2,r3,other_disassembled;
831   MatScalar      *val;
832   InsertMode     addv = mat->insertmode;
833   PetscMPIInt    n;
834 
835   PetscFunctionBegin;
836   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
837   if (!baij->donotstash && !mat->nooffprocentries) {
838     while (1) {
839       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
840       if (!flg) break;
841 
842       for (i=0; i<n;) {
843         /* Now identify the consecutive vals belonging to the same row */
844         for (j=i,rstart=row[j]; j<n; j++) {
845           if (row[j] != rstart) break;
846         }
847         if (j < n) ncols = j-i;
848         else       ncols = n-i;
849         /* Now assemble all these values with a single function call */
850         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
851         i    = j;
852       }
853     }
854     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
855     /* Now process the block-stash. Since the values are stashed column-oriented,
856        set the roworiented flag to column oriented, and after MatSetValues()
857        restore the original flags */
858     r1 = baij->roworiented;
859     r2 = a->roworiented;
860     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
861 
862     baij->roworiented = PETSC_FALSE;
863     a->roworiented    = PETSC_FALSE;
864 
865     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
866     while (1) {
867       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
868       if (!flg) break;
869 
870       for (i=0; i<n;) {
871         /* Now identify the consecutive vals belonging to the same row */
872         for (j=i,rstart=row[j]; j<n; j++) {
873           if (row[j] != rstart) break;
874         }
875         if (j < n) ncols = j-i;
876         else       ncols = n-i;
877         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
878         i    = j;
879       }
880     }
881     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
882 
883     baij->roworiented = r1;
884     a->roworiented    = r2;
885 
886     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
887   }
888 
889   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
890   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
891 
892   /* determine if any processor has disassembled, if so we must
893      also disassemble ourselfs, in order that we may reassemble. */
894   /*
895      if nonzero structure of submatrix B cannot change then we know that
896      no processor disassembled thus we can skip this stuff
897   */
898   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
899     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
900     if (mat->was_assembled && !other_disassembled) {
901       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
902     }
903   }
904 
905   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
906     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
907   }
908   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
909   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
910 
911 #if defined(PETSC_USE_INFO)
912   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
913     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
914 
915     baij->ht_total_ct  = 0;
916     baij->ht_insert_ct = 0;
917   }
918 #endif
919   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
920     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
921 
922     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
923     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
924   }
925 
926   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
927 
928   baij->rowvalues = 0;
929 
930   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
931   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
932     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
933     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
934   }
935   PetscFunctionReturn(0);
936 }
937 
938 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer);
939 #include <petscdraw.h>
940 #undef __FUNCT__
941 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
942 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
943 {
944   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
945   PetscErrorCode    ierr;
946   PetscMPIInt       rank = baij->rank;
947   PetscInt          bs   = mat->rmap->bs;
948   PetscBool         iascii,isdraw;
949   PetscViewer       sviewer;
950   PetscViewerFormat format;
951 
952   PetscFunctionBegin;
953   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
954   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
955   if (iascii) {
956     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
957     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
958       MatInfo info;
959       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
960       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
961       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
962       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
963                                                 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
964       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
965       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
966       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
967       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
968       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
969       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
970       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
971       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
972       PetscFunctionReturn(0);
973     } else if (format == PETSC_VIEWER_ASCII_INFO) {
974       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
975       PetscFunctionReturn(0);
976     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
977       PetscFunctionReturn(0);
978     }
979   }
980 
981   if (isdraw) {
982     PetscDraw draw;
983     PetscBool isnull;
984     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
985     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
986   }
987 
988   {
989     /* assemble the entire matrix onto first processor. */
990     Mat         A;
991     Mat_SeqBAIJ *Aloc;
992     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
993     MatScalar   *a;
994 
995     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
996     /* Perhaps this should be the type of mat? */
997     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
998     if (!rank) {
999       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1000     } else {
1001       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1002     }
1003     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1004     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
1005     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1006     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1007 
1008     /* copy over the A part */
1009     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1010     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1011     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1012 
1013     for (i=0; i<mbs; i++) {
1014       rvals[0] = bs*(baij->rstartbs + i);
1015       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1016       for (j=ai[i]; j<ai[i+1]; j++) {
1017         col = (baij->cstartbs+aj[j])*bs;
1018         for (k=0; k<bs; k++) {
1019           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1020           col++; a += bs;
1021         }
1022       }
1023     }
1024     /* copy over the B part */
1025     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1026     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1027     for (i=0; i<mbs; i++) {
1028       rvals[0] = bs*(baij->rstartbs + i);
1029       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1030       for (j=ai[i]; j<ai[i+1]; j++) {
1031         col = baij->garray[aj[j]]*bs;
1032         for (k=0; k<bs; k++) {
1033           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1034           col++; a += bs;
1035         }
1036       }
1037     }
1038     ierr = PetscFree(rvals);CHKERRQ(ierr);
1039     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1040     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1041     /*
1042        Everyone has to call to draw the matrix since the graphics waits are
1043        synchronized across all processors that share the PetscDraw object
1044     */
1045     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1046     if (!rank) {
1047       ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1048     }
1049     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1050     ierr = MatDestroy(&A);CHKERRQ(ierr);
1051   }
1052   PetscFunctionReturn(0);
1053 }
1054 
1055 #undef __FUNCT__
1056 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1057 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1058 {
1059   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1060   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)a->A->data;
1061   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
1062   PetscErrorCode ierr;
1063   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1064   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1065   int            fd;
1066   PetscScalar    *column_values;
1067   FILE           *file;
1068   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1069   PetscInt       message_count,flowcontrolcount;
1070 
1071   PetscFunctionBegin;
1072   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1073   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1074   nz   = bs2*(A->nz + B->nz);
1075   rlen = mat->rmap->n;
1076   if (!rank) {
1077     header[0] = MAT_FILE_CLASSID;
1078     header[1] = mat->rmap->N;
1079     header[2] = mat->cmap->N;
1080 
1081     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1082     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1083     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1084     /* get largest number of rows any processor has */
1085     range = mat->rmap->range;
1086     for (i=1; i<size; i++) {
1087       rlen = PetscMax(rlen,range[i+1] - range[i]);
1088     }
1089   } else {
1090     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1091   }
1092 
1093   ierr = PetscMalloc1((rlen/bs),&crow_lens);CHKERRQ(ierr);
1094   /* compute lengths of each row  */
1095   for (i=0; i<a->mbs; i++) {
1096     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1097   }
1098   /* store the row lengths to the file */
1099   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1100   if (!rank) {
1101     MPI_Status status;
1102     ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr);
1103     rlen = (range[1] - range[0])/bs;
1104     for (i=0; i<rlen; i++) {
1105       for (j=0; j<bs; j++) {
1106         row_lens[i*bs+j] = bs*crow_lens[i];
1107       }
1108     }
1109     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1110     for (i=1; i<size; i++) {
1111       rlen = (range[i+1] - range[i])/bs;
1112       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1113       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1114       for (k=0; k<rlen; k++) {
1115         for (j=0; j<bs; j++) {
1116           row_lens[k*bs+j] = bs*crow_lens[k];
1117         }
1118       }
1119       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1120     }
1121     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1122     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1123   } else {
1124     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1125     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1126     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1127   }
1128   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1129 
1130   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1131      information needed to make it for each row from a block row. This does require more communication but still not more than
1132      the communication needed for the nonzero values  */
1133   nzmax = nz; /*  space a largest processor needs */
1134   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1135   ierr  = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr);
1136   cnt   = 0;
1137   for (i=0; i<a->mbs; i++) {
1138     pcnt = cnt;
1139     for (j=B->i[i]; j<B->i[i+1]; j++) {
1140       if ((col = garray[B->j[j]]) > cstart) break;
1141       for (l=0; l<bs; l++) {
1142         column_indices[cnt++] = bs*col+l;
1143       }
1144     }
1145     for (k=A->i[i]; k<A->i[i+1]; k++) {
1146       for (l=0; l<bs; l++) {
1147         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1148       }
1149     }
1150     for (; j<B->i[i+1]; j++) {
1151       for (l=0; l<bs; l++) {
1152         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1153       }
1154     }
1155     len = cnt - pcnt;
1156     for (k=1; k<bs; k++) {
1157       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1158       cnt += len;
1159     }
1160   }
1161   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1162 
1163   /* store the columns to the file */
1164   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1165   if (!rank) {
1166     MPI_Status status;
1167     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1168     for (i=1; i<size; i++) {
1169       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1170       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1171       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1172       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1173     }
1174     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1175   } else {
1176     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1177     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1178     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1179     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1180   }
1181   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1182 
1183   /* load up the numerical values */
1184   ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr);
1185   cnt  = 0;
1186   for (i=0; i<a->mbs; i++) {
1187     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1188     for (j=B->i[i]; j<B->i[i+1]; j++) {
1189       if (garray[B->j[j]] > cstart) break;
1190       for (l=0; l<bs; l++) {
1191         for (ll=0; ll<bs; ll++) {
1192           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1193         }
1194       }
1195       cnt += bs;
1196     }
1197     for (k=A->i[i]; k<A->i[i+1]; k++) {
1198       for (l=0; l<bs; l++) {
1199         for (ll=0; ll<bs; ll++) {
1200           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1201         }
1202       }
1203       cnt += bs;
1204     }
1205     for (; j<B->i[i+1]; j++) {
1206       for (l=0; l<bs; l++) {
1207         for (ll=0; ll<bs; ll++) {
1208           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1209         }
1210       }
1211       cnt += bs;
1212     }
1213     cnt += (bs-1)*rlen;
1214   }
1215   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1216 
1217   /* store the column values to the file */
1218   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1219   if (!rank) {
1220     MPI_Status status;
1221     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1222     for (i=1; i<size; i++) {
1223       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1224       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1225       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1226       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1227     }
1228     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1229   } else {
1230     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1231     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1232     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1233     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1234   }
1235   ierr = PetscFree(column_values);CHKERRQ(ierr);
1236 
1237   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1238   if (file) {
1239     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1240   }
1241   PetscFunctionReturn(0);
1242 }
1243 
1244 #undef __FUNCT__
1245 #define __FUNCT__ "MatView_MPIBAIJ"
1246 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1247 {
1248   PetscErrorCode ierr;
1249   PetscBool      iascii,isdraw,issocket,isbinary;
1250 
1251   PetscFunctionBegin;
1252   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1253   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1254   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1255   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1256   if (iascii || isdraw || issocket) {
1257     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1258   } else if (isbinary) {
1259     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1260   }
1261   PetscFunctionReturn(0);
1262 }
1263 
1264 #undef __FUNCT__
1265 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1266 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1267 {
1268   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1269   PetscErrorCode ierr;
1270 
1271   PetscFunctionBegin;
1272 #if defined(PETSC_USE_LOG)
1273   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1274 #endif
1275   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1276   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1277   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1278   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1279 #if defined(PETSC_USE_CTABLE)
1280   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1281 #else
1282   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1283 #endif
1284   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1285   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1286   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1287   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1288   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1289   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1290   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1291   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1292 
1293   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1294   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1295   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1296   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr);
1303   PetscFunctionReturn(0);
1304 }
1305 
1306 #undef __FUNCT__
1307 #define __FUNCT__ "MatMult_MPIBAIJ"
1308 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1309 {
1310   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1311   PetscErrorCode ierr;
1312   PetscInt       nt;
1313 
1314   PetscFunctionBegin;
1315   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1316   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1317   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1318   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1319   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1320   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1321   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1322   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1323   PetscFunctionReturn(0);
1324 }
1325 
1326 #undef __FUNCT__
1327 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1328 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1329 {
1330   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1331   PetscErrorCode ierr;
1332 
1333   PetscFunctionBegin;
1334   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1335   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1336   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1337   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1338   PetscFunctionReturn(0);
1339 }
1340 
1341 #undef __FUNCT__
1342 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1343 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1344 {
1345   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1346   PetscErrorCode ierr;
1347   PetscBool      merged;
1348 
1349   PetscFunctionBegin;
1350   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1351   /* do nondiagonal part */
1352   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1353   if (!merged) {
1354     /* send it on its way */
1355     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1356     /* do local part */
1357     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1358     /* receive remote parts: note this assumes the values are not actually */
1359     /* inserted in yy until the next line */
1360     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1361   } else {
1362     /* do local part */
1363     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1364     /* send it on its way */
1365     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1366     /* values actually were received in the Begin() but we need to call this nop */
1367     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1368   }
1369   PetscFunctionReturn(0);
1370 }
1371 
1372 #undef __FUNCT__
1373 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1374 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1375 {
1376   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1377   PetscErrorCode ierr;
1378 
1379   PetscFunctionBegin;
1380   /* do nondiagonal part */
1381   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1382   /* send it on its way */
1383   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1384   /* do local part */
1385   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1386   /* receive remote parts: note this assumes the values are not actually */
1387   /* inserted in yy until the next line, which is true for my implementation*/
1388   /* but is not perhaps always true. */
1389   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1390   PetscFunctionReturn(0);
1391 }
1392 
1393 /*
1394   This only works correctly for square matrices where the subblock A->A is the
1395    diagonal block
1396 */
1397 #undef __FUNCT__
1398 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1399 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1400 {
1401   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1402   PetscErrorCode ierr;
1403 
1404   PetscFunctionBegin;
1405   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1406   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1407   PetscFunctionReturn(0);
1408 }
1409 
1410 #undef __FUNCT__
1411 #define __FUNCT__ "MatScale_MPIBAIJ"
1412 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1413 {
1414   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1415   PetscErrorCode ierr;
1416 
1417   PetscFunctionBegin;
1418   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1419   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1420   PetscFunctionReturn(0);
1421 }
1422 
1423 #undef __FUNCT__
1424 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1425 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1426 {
1427   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1428   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1429   PetscErrorCode ierr;
1430   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1431   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1432   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1433 
1434   PetscFunctionBegin;
1435   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1436   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1437   mat->getrowactive = PETSC_TRUE;
1438 
1439   if (!mat->rowvalues && (idx || v)) {
1440     /*
1441         allocate enough space to hold information from the longest row.
1442     */
1443     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1444     PetscInt    max = 1,mbs = mat->mbs,tmp;
1445     for (i=0; i<mbs; i++) {
1446       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1447       if (max < tmp) max = tmp;
1448     }
1449     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1450   }
1451   lrow = row - brstart;
1452 
1453   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1454   if (!v)   {pvA = 0; pvB = 0;}
1455   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1456   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1457   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1458   nztot = nzA + nzB;
1459 
1460   cmap = mat->garray;
1461   if (v  || idx) {
1462     if (nztot) {
1463       /* Sort by increasing column numbers, assuming A and B already sorted */
1464       PetscInt imark = -1;
1465       if (v) {
1466         *v = v_p = mat->rowvalues;
1467         for (i=0; i<nzB; i++) {
1468           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1469           else break;
1470         }
1471         imark = i;
1472         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1473         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1474       }
1475       if (idx) {
1476         *idx = idx_p = mat->rowindices;
1477         if (imark > -1) {
1478           for (i=0; i<imark; i++) {
1479             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1480           }
1481         } else {
1482           for (i=0; i<nzB; i++) {
1483             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1484             else break;
1485           }
1486           imark = i;
1487         }
1488         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1489         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1490       }
1491     } else {
1492       if (idx) *idx = 0;
1493       if (v)   *v   = 0;
1494     }
1495   }
1496   *nz  = nztot;
1497   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1498   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1499   PetscFunctionReturn(0);
1500 }
1501 
1502 #undef __FUNCT__
1503 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1504 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1505 {
1506   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1507 
1508   PetscFunctionBegin;
1509   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1510   baij->getrowactive = PETSC_FALSE;
1511   PetscFunctionReturn(0);
1512 }
1513 
1514 #undef __FUNCT__
1515 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1516 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1517 {
1518   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1519   PetscErrorCode ierr;
1520 
1521   PetscFunctionBegin;
1522   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1523   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1524   PetscFunctionReturn(0);
1525 }
1526 
1527 #undef __FUNCT__
1528 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1529 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1530 {
1531   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1532   Mat            A  = a->A,B = a->B;
1533   PetscErrorCode ierr;
1534   PetscReal      isend[5],irecv[5];
1535 
1536   PetscFunctionBegin;
1537   info->block_size = (PetscReal)matin->rmap->bs;
1538 
1539   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1540 
1541   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1542   isend[3] = info->memory;  isend[4] = info->mallocs;
1543 
1544   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1545 
1546   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1547   isend[3] += info->memory;  isend[4] += info->mallocs;
1548 
1549   if (flag == MAT_LOCAL) {
1550     info->nz_used      = isend[0];
1551     info->nz_allocated = isend[1];
1552     info->nz_unneeded  = isend[2];
1553     info->memory       = isend[3];
1554     info->mallocs      = isend[4];
1555   } else if (flag == MAT_GLOBAL_MAX) {
1556     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1557 
1558     info->nz_used      = irecv[0];
1559     info->nz_allocated = irecv[1];
1560     info->nz_unneeded  = irecv[2];
1561     info->memory       = irecv[3];
1562     info->mallocs      = irecv[4];
1563   } else if (flag == MAT_GLOBAL_SUM) {
1564     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1565 
1566     info->nz_used      = irecv[0];
1567     info->nz_allocated = irecv[1];
1568     info->nz_unneeded  = irecv[2];
1569     info->memory       = irecv[3];
1570     info->mallocs      = irecv[4];
1571   } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1572   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1573   info->fill_ratio_needed = 0;
1574   info->factor_mallocs    = 0;
1575   PetscFunctionReturn(0);
1576 }
1577 
1578 #undef __FUNCT__
1579 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1580 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1581 {
1582   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1583   PetscErrorCode ierr;
1584 
1585   PetscFunctionBegin;
1586   switch (op) {
1587   case MAT_NEW_NONZERO_LOCATIONS:
1588   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1589   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1590   case MAT_KEEP_NONZERO_PATTERN:
1591   case MAT_NEW_NONZERO_LOCATION_ERR:
1592     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1593     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1594     break;
1595   case MAT_ROW_ORIENTED:
1596     a->roworiented = flg;
1597 
1598     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1599     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1600     break;
1601   case MAT_NEW_DIAGONALS:
1602     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1603     break;
1604   case MAT_IGNORE_OFF_PROC_ENTRIES:
1605     a->donotstash = flg;
1606     break;
1607   case MAT_USE_HASH_TABLE:
1608     a->ht_flag = flg;
1609     break;
1610   case MAT_SYMMETRIC:
1611   case MAT_STRUCTURALLY_SYMMETRIC:
1612   case MAT_HERMITIAN:
1613   case MAT_SYMMETRY_ETERNAL:
1614     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1615     break;
1616   default:
1617     SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1618   }
1619   PetscFunctionReturn(0);
1620 }
1621 
1622 #undef __FUNCT__
1623 #define __FUNCT__ "MatTranspose_MPIBAIJ"
1624 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1625 {
1626   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1627   Mat_SeqBAIJ    *Aloc;
1628   Mat            B;
1629   PetscErrorCode ierr;
1630   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1631   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1632   MatScalar      *a;
1633 
1634   PetscFunctionBegin;
1635   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1636   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1637     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1638     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1639     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1640     /* Do not know preallocation information, but must set block size */
1641     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr);
1642   } else {
1643     B = *matout;
1644   }
1645 
1646   /* copy over the A part */
1647   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1648   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1649   ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1650 
1651   for (i=0; i<mbs; i++) {
1652     rvals[0] = bs*(baij->rstartbs + i);
1653     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1654     for (j=ai[i]; j<ai[i+1]; j++) {
1655       col = (baij->cstartbs+aj[j])*bs;
1656       for (k=0; k<bs; k++) {
1657         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1658 
1659         col++; a += bs;
1660       }
1661     }
1662   }
1663   /* copy over the B part */
1664   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1665   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1666   for (i=0; i<mbs; i++) {
1667     rvals[0] = bs*(baij->rstartbs + i);
1668     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1669     for (j=ai[i]; j<ai[i+1]; j++) {
1670       col = baij->garray[aj[j]]*bs;
1671       for (k=0; k<bs; k++) {
1672         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1673         col++;
1674         a += bs;
1675       }
1676     }
1677   }
1678   ierr = PetscFree(rvals);CHKERRQ(ierr);
1679   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1680   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1681 
1682   if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B;
1683   else {
1684     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1685   }
1686   PetscFunctionReturn(0);
1687 }
1688 
1689 #undef __FUNCT__
1690 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1691 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1692 {
1693   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1694   Mat            a     = baij->A,b = baij->B;
1695   PetscErrorCode ierr;
1696   PetscInt       s1,s2,s3;
1697 
1698   PetscFunctionBegin;
1699   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1700   if (rr) {
1701     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1702     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1703     /* Overlap communication with computation. */
1704     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1705   }
1706   if (ll) {
1707     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1708     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1709     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1710   }
1711   /* scale  the diagonal block */
1712   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1713 
1714   if (rr) {
1715     /* Do a scatter end and then right scale the off-diagonal block */
1716     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1717     ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1718   }
1719   PetscFunctionReturn(0);
1720 }
1721 
1722 #undef __FUNCT__
1723 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1724 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1725 {
1726   Mat_MPIBAIJ   *l      = (Mat_MPIBAIJ *) A->data;
1727   PetscInt      *owners = A->rmap->range;
1728   PetscInt       n      = A->rmap->n;
1729   PetscMPIInt    size   = l->size;
1730   PetscSF        sf;
1731   PetscInt      *lrows;
1732   PetscSFNode   *rrows;
1733   PetscInt       lastidx = -1, r, p = 0, len = 0;
1734   PetscErrorCode ierr;
1735 
1736   PetscFunctionBegin;
1737   /* Create SF where leaves are input rows and roots are owned rows */
1738   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1739   for (r = 0; r < n; ++r) lrows[r] = -1;
1740   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1741   for (r = 0; r < N; ++r) {
1742     const PetscInt idx   = rows[r];
1743     PetscBool      found = PETSC_FALSE;
1744     /* Trick for efficient searching for sorted rows */
1745     if (lastidx > idx) p = 0;
1746     lastidx = idx;
1747     for (; p < size; ++p) {
1748       if (idx >= owners[p] && idx < owners[p+1]) {
1749         rrows[r].rank  = p;
1750         rrows[r].index = rows[r] - owners[p];
1751         found = PETSC_TRUE;
1752         break;
1753       }
1754     }
1755     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
1756   }
1757   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1758   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1759   /* Collect flags for rows to be zeroed */
1760   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1761   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1762   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1763   /* Compress and put in row numbers */
1764   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1765   /* fix right hand side if needed */
1766   if (x && b) {
1767     const PetscScalar *xx;
1768     PetscScalar       *bb;
1769 
1770     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1771     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1772     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
1773     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1774     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1775   }
1776 
1777   /* actually zap the local rows */
1778   /*
1779         Zero the required rows. If the "diagonal block" of the matrix
1780      is square and the user wishes to set the diagonal we use separate
1781      code so that MatSetValues() is not called for each diagonal allocating
1782      new memory, thus calling lots of mallocs and slowing things down.
1783 
1784   */
1785   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1786   ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,0,0);CHKERRQ(ierr);
1787   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1788     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,0,0);CHKERRQ(ierr);
1789   } else if (diag != 0.0) {
1790     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr);
1791     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1792        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1793     for (r = 0; r < len; ++r) {
1794       const PetscInt row = lrows[r] + A->rmap->rstart;
1795       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1796     }
1797     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1798     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1799   } else {
1800     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr);
1801   }
1802   ierr = PetscFree(lrows);CHKERRQ(ierr);
1803 
1804   /* only change matrix nonzero state if pattern was allowed to be changed */
1805   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1806     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1807     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1808   }
1809   PetscFunctionReturn(0);
1810 }
1811 
1812 #undef __FUNCT__
1813 #define __FUNCT__ "MatZeroRowsColumns_MPIBAIJ"
1814 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1815 {
1816   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1817   PetscErrorCode    ierr;
1818   PetscMPIInt       size = l->size,n = A->rmap->n,lastidx = -1;
1819   PetscInt          i,j,k,r,p = 0,len = 0,row,col,count;
1820   PetscInt          *lrows,*owners = A->rmap->range;
1821   PetscSFNode       *rrows;
1822   PetscSF           sf;
1823   const PetscScalar *xx;
1824   PetscScalar       *bb,*mask;
1825   Vec               xmask,lmask;
1826   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ*)l->B->data;
1827   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1828   PetscScalar       *aa;
1829 #if defined(PETSC_DEBUG)
1830   PetscBool found = PETSC_FALSE;
1831 #endif
1832 
1833   PetscFunctionBegin;
1834   /* Create SF where leaves are input rows and roots are owned rows */
1835   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1836   for (r = 0; r < n; ++r) lrows[r] = -1;
1837   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1838   for (r = 0; r < N; ++r) {
1839     const PetscInt idx   = rows[r];
1840     PetscBool      found = PETSC_FALSE;
1841     /* Trick for efficient searching for sorted rows */
1842     if (lastidx > idx) p = 0;
1843     lastidx = idx;
1844     for (; p < size; ++p) {
1845       if (idx >= owners[p] && idx < owners[p+1]) {
1846         rrows[r].rank  = p;
1847         rrows[r].index = rows[r] - owners[p];
1848         found = PETSC_TRUE;
1849         break;
1850       }
1851     }
1852     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
1853   }
1854   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1855   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1856   /* Collect flags for rows to be zeroed */
1857   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1858   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1859   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1860   /* Compress and put in row numbers */
1861   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1862   /* zero diagonal part of matrix */
1863   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1864   /* handle off diagonal part of matrix */
1865   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
1866   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1867   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1868   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1869   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1870   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1871   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1872   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1873   if (x) {
1874     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1875     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1876     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1877     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1878   }
1879   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1880   /* remove zeroed rows of off diagonal matrix */
1881   for (i = 0; i < len; ++i) {
1882     row   = lrows[i];
1883     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1884     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1885     for (k = 0; k < count; ++k) {
1886       aa[0] = 0.0;
1887       aa   += bs;
1888     }
1889   }
1890   /* loop over all elements of off process part of matrix zeroing removed columns*/
1891   for (i = 0; i < l->B->rmap->N; ++i) {
1892     row = i/bs;
1893     for (j = baij->i[row]; j < baij->i[row+1]; ++j) {
1894       for (k = 0; k < bs; ++k) {
1895         col = bs*baij->j[j] + k;
1896         if (PetscAbsScalar(mask[col])) {
1897           aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
1898           if (b) bb[i] -= aa[0]*xx[col];
1899           aa[0] = 0.0;
1900         }
1901       }
1902     }
1903   }
1904   if (x) {
1905     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1906     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1907   }
1908   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1909   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1910   ierr = PetscFree(lrows);CHKERRQ(ierr);
1911 
1912   /* only change matrix nonzero state if pattern was allowed to be changed */
1913   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1914     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1915     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1916   }
1917   PetscFunctionReturn(0);
1918 }
1919 
1920 #undef __FUNCT__
1921 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1922 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1923 {
1924   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1925   PetscErrorCode ierr;
1926 
1927   PetscFunctionBegin;
1928   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1929   PetscFunctionReturn(0);
1930 }
1931 
1932 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
1933 
1934 #undef __FUNCT__
1935 #define __FUNCT__ "MatEqual_MPIBAIJ"
1936 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1937 {
1938   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1939   Mat            a,b,c,d;
1940   PetscBool      flg;
1941   PetscErrorCode ierr;
1942 
1943   PetscFunctionBegin;
1944   a = matA->A; b = matA->B;
1945   c = matB->A; d = matB->B;
1946 
1947   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1948   if (flg) {
1949     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1950   }
1951   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1952   PetscFunctionReturn(0);
1953 }
1954 
1955 #undef __FUNCT__
1956 #define __FUNCT__ "MatCopy_MPIBAIJ"
1957 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1958 {
1959   PetscErrorCode ierr;
1960   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1961   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
1962 
1963   PetscFunctionBegin;
1964   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1965   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1966     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1967   } else {
1968     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1969     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1970   }
1971   PetscFunctionReturn(0);
1972 }
1973 
1974 #undef __FUNCT__
1975 #define __FUNCT__ "MatSetUp_MPIBAIJ"
1976 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1977 {
1978   PetscErrorCode ierr;
1979 
1980   PetscFunctionBegin;
1981   ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1982   PetscFunctionReturn(0);
1983 }
1984 
1985 #undef __FUNCT__
1986 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1987 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1988 {
1989   PetscErrorCode ierr;
1990   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
1991   PetscBLASInt   bnz,one=1;
1992   Mat_SeqBAIJ    *x,*y;
1993 
1994   PetscFunctionBegin;
1995   if (str == SAME_NONZERO_PATTERN) {
1996     PetscScalar alpha = a;
1997     x    = (Mat_SeqBAIJ*)xx->A->data;
1998     y    = (Mat_SeqBAIJ*)yy->A->data;
1999     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2000     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2001     x    = (Mat_SeqBAIJ*)xx->B->data;
2002     y    = (Mat_SeqBAIJ*)yy->B->data;
2003     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2004     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2005   } else {
2006     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2007   }
2008   PetscFunctionReturn(0);
2009 }
2010 
2011 #undef __FUNCT__
2012 #define __FUNCT__ "MatRealPart_MPIBAIJ"
2013 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
2014 {
2015   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2016   PetscErrorCode ierr;
2017 
2018   PetscFunctionBegin;
2019   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2020   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2021   PetscFunctionReturn(0);
2022 }
2023 
2024 #undef __FUNCT__
2025 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
2026 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2027 {
2028   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2029   PetscErrorCode ierr;
2030 
2031   PetscFunctionBegin;
2032   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2033   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2034   PetscFunctionReturn(0);
2035 }
2036 
2037 #undef __FUNCT__
2038 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
2039 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2040 {
2041   PetscErrorCode ierr;
2042   IS             iscol_local;
2043   PetscInt       csize;
2044 
2045   PetscFunctionBegin;
2046   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2047   if (call == MAT_REUSE_MATRIX) {
2048     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2049     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2050   } else {
2051     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2052   }
2053   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2054   if (call == MAT_INITIAL_MATRIX) {
2055     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2056     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2057   }
2058   PetscFunctionReturn(0);
2059 }
2060 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*);
2061 #undef __FUNCT__
2062 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
2063 /*
2064   Not great since it makes two copies of the submatrix, first an SeqBAIJ
2065   in local and then by concatenating the local matrices the end result.
2066   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
2067 */
2068 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2069 {
2070   PetscErrorCode ierr;
2071   PetscMPIInt    rank,size;
2072   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2073   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow;
2074   Mat            M,Mreuse;
2075   MatScalar      *vwork,*aa;
2076   MPI_Comm       comm;
2077   IS             isrow_new, iscol_new;
2078   PetscBool      idflag,allrows, allcols;
2079   Mat_SeqBAIJ    *aij;
2080 
2081   PetscFunctionBegin;
2082   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2083   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2084   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2085   /* The compression and expansion should be avoided. Doesn't point
2086      out errors, might change the indices, hence buggey */
2087   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
2088   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
2089 
2090   /* Check for special case: each processor gets entire matrix columns */
2091   ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr);
2092   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
2093   if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE;
2094   else allcols = PETSC_FALSE;
2095 
2096   ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr);
2097   ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr);
2098   if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE;
2099   else allrows = PETSC_FALSE;
2100 
2101   if (call ==  MAT_REUSE_MATRIX) {
2102     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
2103     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2104     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2105   } else {
2106     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2107   }
2108   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
2109   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
2110   /*
2111       m - number of local rows
2112       n - number of columns (same on all processors)
2113       rstart - first row in new global matrix generated
2114   */
2115   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2116   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2117   m    = m/bs;
2118   n    = n/bs;
2119 
2120   if (call == MAT_INITIAL_MATRIX) {
2121     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2122     ii  = aij->i;
2123     jj  = aij->j;
2124 
2125     /*
2126         Determine the number of non-zeros in the diagonal and off-diagonal
2127         portions of the matrix in order to do correct preallocation
2128     */
2129 
2130     /* first get start and end of "diagonal" columns */
2131     if (csize == PETSC_DECIDE) {
2132       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2133       if (mglobal == n*bs) { /* square matrix */
2134         nlocal = m;
2135       } else {
2136         nlocal = n/size + ((n % size) > rank);
2137       }
2138     } else {
2139       nlocal = csize/bs;
2140     }
2141     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2142     rstart = rend - nlocal;
2143     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2144 
2145     /* next, compute all the lengths */
2146     ierr  = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr);
2147     for (i=0; i<m; i++) {
2148       jend = ii[i+1] - ii[i];
2149       olen = 0;
2150       dlen = 0;
2151       for (j=0; j<jend; j++) {
2152         if (*jj < rstart || *jj >= rend) olen++;
2153         else dlen++;
2154         jj++;
2155       }
2156       olens[i] = olen;
2157       dlens[i] = dlen;
2158     }
2159     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2160     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2161     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2162     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2163     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
2164   } else {
2165     PetscInt ml,nl;
2166 
2167     M    = *newmat;
2168     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2169     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2170     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2171     /*
2172          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2173        rather than the slower MatSetValues().
2174     */
2175     M->was_assembled = PETSC_TRUE;
2176     M->assembled     = PETSC_FALSE;
2177   }
2178   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2179   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2180   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
2181   ii   = aij->i;
2182   jj   = aij->j;
2183   aa   = aij->a;
2184   for (i=0; i<m; i++) {
2185     row   = rstart/bs + i;
2186     nz    = ii[i+1] - ii[i];
2187     cwork = jj;     jj += nz;
2188     vwork = aa;     aa += nz*bs*bs;
2189     ierr  = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2190   }
2191 
2192   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2193   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2194   *newmat = M;
2195 
2196   /* save submatrix used in processor for next request */
2197   if (call ==  MAT_INITIAL_MATRIX) {
2198     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2199     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2200   }
2201   PetscFunctionReturn(0);
2202 }
2203 
2204 #undef __FUNCT__
2205 #define __FUNCT__ "MatPermute_MPIBAIJ"
2206 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2207 {
2208   MPI_Comm       comm,pcomm;
2209   PetscInt       clocal_size,nrows;
2210   const PetscInt *rows;
2211   PetscMPIInt    size;
2212   IS             crowp,lcolp;
2213   PetscErrorCode ierr;
2214 
2215   PetscFunctionBegin;
2216   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2217   /* make a collective version of 'rowp' */
2218   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2219   if (pcomm==comm) {
2220     crowp = rowp;
2221   } else {
2222     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2223     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2224     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2225     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2226   }
2227   ierr = ISSetPermutation(crowp);CHKERRQ(ierr);
2228   /* make a local version of 'colp' */
2229   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2230   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2231   if (size==1) {
2232     lcolp = colp;
2233   } else {
2234     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2235   }
2236   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2237   /* now we just get the submatrix */
2238   ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr);
2239   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2240   /* clean up */
2241   if (pcomm!=comm) {
2242     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2243   }
2244   if (size>1) {
2245     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2246   }
2247   PetscFunctionReturn(0);
2248 }
2249 
2250 #undef __FUNCT__
2251 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2252 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2253 {
2254   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2255   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
2256 
2257   PetscFunctionBegin;
2258   if (nghosts) *nghosts = B->nbs;
2259   if (ghosts) *ghosts = baij->garray;
2260   PetscFunctionReturn(0);
2261 }
2262 
2263 #undef __FUNCT__
2264 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ"
2265 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2266 {
2267   Mat            B;
2268   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2269   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2270   Mat_SeqAIJ     *b;
2271   PetscErrorCode ierr;
2272   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2273   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2274   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2275 
2276   PetscFunctionBegin;
2277   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2278   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2279 
2280   /* ----------------------------------------------------------------
2281      Tell every processor the number of nonzeros per row
2282   */
2283   ierr = PetscMalloc1((A->rmap->N/bs),&lens);CHKERRQ(ierr);
2284   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2285     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2286   }
2287   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2288   ierr      = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr);
2289   displs    = recvcounts + size;
2290   for (i=0; i<size; i++) {
2291     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2292     displs[i]     = A->rmap->range[i]/bs;
2293   }
2294 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2295   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2296 #else
2297   ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2298 #endif
2299   /* ---------------------------------------------------------------
2300      Create the sequential matrix of the same type as the local block diagonal
2301   */
2302   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2303   ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2304   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2305   ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2306   b    = (Mat_SeqAIJ*)B->data;
2307 
2308   /*--------------------------------------------------------------------
2309     Copy my part of matrix column indices over
2310   */
2311   sendcount  = ad->nz + bd->nz;
2312   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2313   a_jsendbuf = ad->j;
2314   b_jsendbuf = bd->j;
2315   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2316   cnt        = 0;
2317   for (i=0; i<n; i++) {
2318 
2319     /* put in lower diagonal portion */
2320     m = bd->i[i+1] - bd->i[i];
2321     while (m > 0) {
2322       /* is it above diagonal (in bd (compressed) numbering) */
2323       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2324       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2325       m--;
2326     }
2327 
2328     /* put in diagonal portion */
2329     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2330       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2331     }
2332 
2333     /* put in upper diagonal portion */
2334     while (m-- > 0) {
2335       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2336     }
2337   }
2338   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2339 
2340   /*--------------------------------------------------------------------
2341     Gather all column indices to all processors
2342   */
2343   for (i=0; i<size; i++) {
2344     recvcounts[i] = 0;
2345     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2346       recvcounts[i] += lens[j];
2347     }
2348   }
2349   displs[0] = 0;
2350   for (i=1; i<size; i++) {
2351     displs[i] = displs[i-1] + recvcounts[i-1];
2352   }
2353 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2354   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2355 #else
2356   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2357 #endif
2358   /*--------------------------------------------------------------------
2359     Assemble the matrix into useable form (note numerical values not yet set)
2360   */
2361   /* set the b->ilen (length of each row) values */
2362   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2363   /* set the b->i indices */
2364   b->i[0] = 0;
2365   for (i=1; i<=A->rmap->N/bs; i++) {
2366     b->i[i] = b->i[i-1] + lens[i-1];
2367   }
2368   ierr = PetscFree(lens);CHKERRQ(ierr);
2369   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2370   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2371   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2372 
2373   if (A->symmetric) {
2374     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2375   } else if (A->hermitian) {
2376     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2377   } else if (A->structurally_symmetric) {
2378     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2379   }
2380   *newmat = B;
2381   PetscFunctionReturn(0);
2382 }
2383 
2384 #undef __FUNCT__
2385 #define __FUNCT__ "MatSOR_MPIBAIJ"
2386 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2387 {
2388   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2389   PetscErrorCode ierr;
2390   Vec            bb1 = 0;
2391 
2392   PetscFunctionBegin;
2393   if (flag == SOR_APPLY_UPPER) {
2394     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2395     PetscFunctionReturn(0);
2396   }
2397 
2398   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2399     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2400   }
2401 
2402   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2403     if (flag & SOR_ZERO_INITIAL_GUESS) {
2404       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2405       its--;
2406     }
2407 
2408     while (its--) {
2409       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2410       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2411 
2412       /* update rhs: bb1 = bb - B*x */
2413       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2414       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2415 
2416       /* local sweep */
2417       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2418     }
2419   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2420     if (flag & SOR_ZERO_INITIAL_GUESS) {
2421       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2422       its--;
2423     }
2424     while (its--) {
2425       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2426       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2427 
2428       /* update rhs: bb1 = bb - B*x */
2429       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2430       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2431 
2432       /* local sweep */
2433       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2434     }
2435   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2436     if (flag & SOR_ZERO_INITIAL_GUESS) {
2437       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2438       its--;
2439     }
2440     while (its--) {
2441       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2442       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2443 
2444       /* update rhs: bb1 = bb - B*x */
2445       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2446       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2447 
2448       /* local sweep */
2449       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2450     }
2451   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2452 
2453   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2454   PetscFunctionReturn(0);
2455 }
2456 
2457 #undef __FUNCT__
2458 #define __FUNCT__ "MatGetColumnNorms_MPIBAIJ"
2459 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms)
2460 {
2461   PetscErrorCode ierr;
2462   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)A->data;
2463   PetscInt       N,i,*garray = aij->garray;
2464   PetscInt       ib,jb,bs = A->rmap->bs;
2465   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) aij->A->data;
2466   MatScalar      *a_val = a_aij->a;
2467   Mat_SeqBAIJ    *b_aij = (Mat_SeqBAIJ*) aij->B->data;
2468   MatScalar      *b_val = b_aij->a;
2469   PetscReal      *work;
2470 
2471   PetscFunctionBegin;
2472   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
2473   ierr = PetscCalloc1(N,&work);CHKERRQ(ierr);
2474   if (type == NORM_2) {
2475     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2476       for (jb=0; jb<bs; jb++) {
2477         for (ib=0; ib<bs; ib++) {
2478           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2479           a_val++;
2480         }
2481       }
2482     }
2483     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2484       for (jb=0; jb<bs; jb++) {
2485         for (ib=0; ib<bs; ib++) {
2486           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2487           b_val++;
2488         }
2489       }
2490     }
2491   } else if (type == NORM_1) {
2492     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2493       for (jb=0; jb<bs; jb++) {
2494         for (ib=0; ib<bs; ib++) {
2495           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2496           a_val++;
2497         }
2498       }
2499     }
2500     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2501       for (jb=0; jb<bs; jb++) {
2502        for (ib=0; ib<bs; ib++) {
2503           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2504           b_val++;
2505         }
2506       }
2507     }
2508   } else if (type == NORM_INFINITY) {
2509     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2510       for (jb=0; jb<bs; jb++) {
2511         for (ib=0; ib<bs; ib++) {
2512           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2513           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2514           a_val++;
2515         }
2516       }
2517     }
2518     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2519       for (jb=0; jb<bs; jb++) {
2520         for (ib=0; ib<bs; ib++) {
2521           int col = garray[b_aij->j[i]] * bs + jb;
2522           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2523           b_val++;
2524         }
2525       }
2526     }
2527   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
2528   if (type == NORM_INFINITY) {
2529     ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2530   } else {
2531     ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2532   }
2533   ierr = PetscFree(work);CHKERRQ(ierr);
2534   if (type == NORM_2) {
2535     for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]);
2536   }
2537   PetscFunctionReturn(0);
2538 }
2539 
2540 #undef __FUNCT__
2541 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ"
2542 PetscErrorCode  MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2543 {
2544   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2545   PetscErrorCode ierr;
2546 
2547   PetscFunctionBegin;
2548   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2549   PetscFunctionReturn(0);
2550 }
2551 
2552 
2553 /* -------------------------------------------------------------------*/
2554 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2555                                        MatGetRow_MPIBAIJ,
2556                                        MatRestoreRow_MPIBAIJ,
2557                                        MatMult_MPIBAIJ,
2558                                 /* 4*/ MatMultAdd_MPIBAIJ,
2559                                        MatMultTranspose_MPIBAIJ,
2560                                        MatMultTransposeAdd_MPIBAIJ,
2561                                        0,
2562                                        0,
2563                                        0,
2564                                 /*10*/ 0,
2565                                        0,
2566                                        0,
2567                                        MatSOR_MPIBAIJ,
2568                                        MatTranspose_MPIBAIJ,
2569                                 /*15*/ MatGetInfo_MPIBAIJ,
2570                                        MatEqual_MPIBAIJ,
2571                                        MatGetDiagonal_MPIBAIJ,
2572                                        MatDiagonalScale_MPIBAIJ,
2573                                        MatNorm_MPIBAIJ,
2574                                 /*20*/ MatAssemblyBegin_MPIBAIJ,
2575                                        MatAssemblyEnd_MPIBAIJ,
2576                                        MatSetOption_MPIBAIJ,
2577                                        MatZeroEntries_MPIBAIJ,
2578                                 /*24*/ MatZeroRows_MPIBAIJ,
2579                                        0,
2580                                        0,
2581                                        0,
2582                                        0,
2583                                 /*29*/ MatSetUp_MPIBAIJ,
2584                                        0,
2585                                        0,
2586                                        0,
2587                                        0,
2588                                 /*34*/ MatDuplicate_MPIBAIJ,
2589                                        0,
2590                                        0,
2591                                        0,
2592                                        0,
2593                                 /*39*/ MatAXPY_MPIBAIJ,
2594                                        MatGetSubMatrices_MPIBAIJ,
2595                                        MatIncreaseOverlap_MPIBAIJ,
2596                                        MatGetValues_MPIBAIJ,
2597                                        MatCopy_MPIBAIJ,
2598                                 /*44*/ 0,
2599                                        MatScale_MPIBAIJ,
2600                                        0,
2601                                        0,
2602                                        MatZeroRowsColumns_MPIBAIJ,
2603                                 /*49*/ 0,
2604                                        0,
2605                                        0,
2606                                        0,
2607                                        0,
2608                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2609                                        0,
2610                                        MatSetUnfactored_MPIBAIJ,
2611                                        MatPermute_MPIBAIJ,
2612                                        MatSetValuesBlocked_MPIBAIJ,
2613                                 /*59*/ MatGetSubMatrix_MPIBAIJ,
2614                                        MatDestroy_MPIBAIJ,
2615                                        MatView_MPIBAIJ,
2616                                        0,
2617                                        0,
2618                                 /*64*/ 0,
2619                                        0,
2620                                        0,
2621                                        0,
2622                                        0,
2623                                 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2624                                        0,
2625                                        0,
2626                                        0,
2627                                        0,
2628                                 /*74*/ 0,
2629                                        MatFDColoringApply_BAIJ,
2630                                        0,
2631                                        0,
2632                                        0,
2633                                 /*79*/ 0,
2634                                        0,
2635                                        0,
2636                                        0,
2637                                        MatLoad_MPIBAIJ,
2638                                 /*84*/ 0,
2639                                        0,
2640                                        0,
2641                                        0,
2642                                        0,
2643                                 /*89*/ 0,
2644                                        0,
2645                                        0,
2646                                        0,
2647                                        0,
2648                                 /*94*/ 0,
2649                                        0,
2650                                        0,
2651                                        0,
2652                                        0,
2653                                 /*99*/ 0,
2654                                        0,
2655                                        0,
2656                                        0,
2657                                        0,
2658                                 /*104*/0,
2659                                        MatRealPart_MPIBAIJ,
2660                                        MatImaginaryPart_MPIBAIJ,
2661                                        0,
2662                                        0,
2663                                 /*109*/0,
2664                                        0,
2665                                        0,
2666                                        0,
2667                                        0,
2668                                 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2669                                        0,
2670                                        MatGetGhosts_MPIBAIJ,
2671                                        0,
2672                                        0,
2673                                 /*119*/0,
2674                                        0,
2675                                        0,
2676                                        0,
2677                                        MatGetMultiProcBlock_MPIBAIJ,
2678                                 /*124*/0,
2679                                        MatGetColumnNorms_MPIBAIJ,
2680                                        MatInvertBlockDiagonal_MPIBAIJ,
2681                                        0,
2682                                        0,
2683                                /*129*/ 0,
2684                                        0,
2685                                        0,
2686                                        0,
2687                                        0,
2688                                /*134*/ 0,
2689                                        0,
2690                                        0,
2691                                        0,
2692                                        0,
2693                                /*139*/ 0,
2694                                        0,
2695                                        0,
2696                                        MatFDColoringSetUp_MPIXAIJ
2697 };
2698 
2699 #undef __FUNCT__
2700 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2701 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2702 {
2703   PetscFunctionBegin;
2704   *a = ((Mat_MPIBAIJ*)A->data)->A;
2705   PetscFunctionReturn(0);
2706 }
2707 
2708 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2709 
2710 #undef __FUNCT__
2711 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2712 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2713 {
2714   PetscInt       m,rstart,cstart,cend;
2715   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2716   const PetscInt *JJ    =0;
2717   PetscScalar    *values=0;
2718   PetscBool      roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2719   PetscErrorCode ierr;
2720 
2721   PetscFunctionBegin;
2722   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2723   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2724   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2725   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2726   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2727   m      = B->rmap->n/bs;
2728   rstart = B->rmap->rstart/bs;
2729   cstart = B->cmap->rstart/bs;
2730   cend   = B->cmap->rend/bs;
2731 
2732   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2733   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2734   for (i=0; i<m; i++) {
2735     nz = ii[i+1] - ii[i];
2736     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2737     nz_max = PetscMax(nz_max,nz);
2738     JJ     = jj + ii[i];
2739     for (j=0; j<nz; j++) {
2740       if (*JJ >= cstart) break;
2741       JJ++;
2742     }
2743     d = 0;
2744     for (; j<nz; j++) {
2745       if (*JJ++ >= cend) break;
2746       d++;
2747     }
2748     d_nnz[i] = d;
2749     o_nnz[i] = nz - d;
2750   }
2751   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2752   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2753 
2754   values = (PetscScalar*)V;
2755   if (!values) {
2756     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2757     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2758   }
2759   for (i=0; i<m; i++) {
2760     PetscInt          row    = i + rstart;
2761     PetscInt          ncols  = ii[i+1] - ii[i];
2762     const PetscInt    *icols = jj + ii[i];
2763     if (!roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2764       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2765       ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2766     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2767       PetscInt j;
2768       for (j=0; j<ncols; j++) {
2769         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2770         ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2771       }
2772     }
2773   }
2774 
2775   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2776   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2777   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2778   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2779   PetscFunctionReturn(0);
2780 }
2781 
2782 #undef __FUNCT__
2783 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2784 /*@C
2785    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2786    (the default parallel PETSc format).
2787 
2788    Collective on MPI_Comm
2789 
2790    Input Parameters:
2791 +  A - the matrix
2792 .  bs - the block size
2793 .  i - the indices into j for the start of each local row (starts with zero)
2794 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2795 -  v - optional values in the matrix
2796 
2797    Level: developer
2798 
2799    Notes: The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
2800    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
2801    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2802    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2803    block column and the second index is over columns within a block.
2804 
2805 .keywords: matrix, aij, compressed row, sparse, parallel
2806 
2807 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ
2808 @*/
2809 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2810 {
2811   PetscErrorCode ierr;
2812 
2813   PetscFunctionBegin;
2814   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2815   PetscValidType(B,1);
2816   PetscValidLogicalCollectiveInt(B,bs,2);
2817   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2818   PetscFunctionReturn(0);
2819 }
2820 
2821 #undef __FUNCT__
2822 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2823 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2824 {
2825   Mat_MPIBAIJ    *b;
2826   PetscErrorCode ierr;
2827   PetscInt       i;
2828 
2829   PetscFunctionBegin;
2830   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2831   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2832   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2833   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2834 
2835   if (d_nnz) {
2836     for (i=0; i<B->rmap->n/bs; i++) {
2837       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2838     }
2839   }
2840   if (o_nnz) {
2841     for (i=0; i<B->rmap->n/bs; i++) {
2842       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2843     }
2844   }
2845 
2846   b      = (Mat_MPIBAIJ*)B->data;
2847   b->bs2 = bs*bs;
2848   b->mbs = B->rmap->n/bs;
2849   b->nbs = B->cmap->n/bs;
2850   b->Mbs = B->rmap->N/bs;
2851   b->Nbs = B->cmap->N/bs;
2852 
2853   for (i=0; i<=b->size; i++) {
2854     b->rangebs[i] = B->rmap->range[i]/bs;
2855   }
2856   b->rstartbs = B->rmap->rstart/bs;
2857   b->rendbs   = B->rmap->rend/bs;
2858   b->cstartbs = B->cmap->rstart/bs;
2859   b->cendbs   = B->cmap->rend/bs;
2860 
2861   if (!B->preallocated) {
2862     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2863     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2864     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2865     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2866     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2867     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2868     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2869     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2870     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2871   }
2872 
2873   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2874   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2875   B->preallocated = PETSC_TRUE;
2876   PetscFunctionReturn(0);
2877 }
2878 
2879 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2880 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2881 
2882 #undef __FUNCT__
2883 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
2884 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
2885 {
2886   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2887   PetscErrorCode ierr;
2888   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2889   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2890   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2891 
2892   PetscFunctionBegin;
2893   ierr  = PetscMalloc1((M+1),&ii);CHKERRQ(ierr);
2894   ii[0] = 0;
2895   for (i=0; i<M; i++) {
2896     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2897     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
2898     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2899     /* remove one from count of matrix has diagonal */
2900     for (j=id[i]; j<id[i+1]; j++) {
2901       if (jd[j] == i) {ii[i+1]--;break;}
2902     }
2903   }
2904   ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr);
2905   cnt  = 0;
2906   for (i=0; i<M; i++) {
2907     for (j=io[i]; j<io[i+1]; j++) {
2908       if (garray[jo[j]] > rstart) break;
2909       jj[cnt++] = garray[jo[j]];
2910     }
2911     for (k=id[i]; k<id[i+1]; k++) {
2912       if (jd[k] != i) {
2913         jj[cnt++] = rstart + jd[k];
2914       }
2915     }
2916     for (; j<io[i+1]; j++) {
2917       jj[cnt++] = garray[jo[j]];
2918     }
2919   }
2920   ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr);
2921   PetscFunctionReturn(0);
2922 }
2923 
2924 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2925 
2926 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
2927 
2928 #undef __FUNCT__
2929 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ"
2930 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
2931 {
2932   PetscErrorCode ierr;
2933   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2934   Mat            B;
2935   Mat_MPIAIJ     *b;
2936 
2937   PetscFunctionBegin;
2938   if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
2939 
2940   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2941   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2942   ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2943   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
2944   ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
2945   b    = (Mat_MPIAIJ*) B->data;
2946 
2947   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
2948   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2949   ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
2950   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
2951   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
2952   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2953   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2954   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2955   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2956   if (reuse == MAT_REUSE_MATRIX) {
2957     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
2958   } else {
2959    *newmat = B;
2960   }
2961   PetscFunctionReturn(0);
2962 }
2963 
2964 #if defined(PETSC_HAVE_MUMPS)
2965 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
2966 #endif
2967 
2968 /*MC
2969    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2970 
2971    Options Database Keys:
2972 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2973 . -mat_block_size <bs> - set the blocksize used to store the matrix
2974 - -mat_use_hash_table <fact>
2975 
2976   Level: beginner
2977 
2978 .seealso: MatCreateMPIBAIJ
2979 M*/
2980 
2981 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
2982 
2983 #undef __FUNCT__
2984 #define __FUNCT__ "MatCreate_MPIBAIJ"
2985 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2986 {
2987   Mat_MPIBAIJ    *b;
2988   PetscErrorCode ierr;
2989   PetscBool      flg;
2990 
2991   PetscFunctionBegin;
2992   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2993   B->data = (void*)b;
2994 
2995   ierr         = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2996   B->assembled = PETSC_FALSE;
2997 
2998   B->insertmode = NOT_SET_VALUES;
2999   ierr          = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
3000   ierr          = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
3001 
3002   /* build local table of row and column ownerships */
3003   ierr = PetscMalloc1((b->size+1),&b->rangebs);CHKERRQ(ierr);
3004 
3005   /* build cache for off array entries formed */
3006   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
3007 
3008   b->donotstash  = PETSC_FALSE;
3009   b->colmap      = NULL;
3010   b->garray      = NULL;
3011   b->roworiented = PETSC_TRUE;
3012 
3013   /* stuff used in block assembly */
3014   b->barray = 0;
3015 
3016   /* stuff used for matrix vector multiply */
3017   b->lvec  = 0;
3018   b->Mvctx = 0;
3019 
3020   /* stuff for MatGetRow() */
3021   b->rowindices   = 0;
3022   b->rowvalues    = 0;
3023   b->getrowactive = PETSC_FALSE;
3024 
3025   /* hash table stuff */
3026   b->ht           = 0;
3027   b->hd           = 0;
3028   b->ht_size      = 0;
3029   b->ht_flag      = PETSC_FALSE;
3030   b->ht_fact      = 0;
3031   b->ht_total_ct  = 0;
3032   b->ht_insert_ct = 0;
3033 
3034   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
3035   b->ijonly = PETSC_FALSE;
3036 
3037   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3038   ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr);
3039   if (flg) {
3040     PetscReal fact = 1.39;
3041     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3042     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
3043     if (fact <= 1.0) fact = 1.39;
3044     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3045     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3046   }
3047   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3048 
3049 #if defined(PETSC_HAVE_MUMPS)
3050   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_baij_mumps);CHKERRQ(ierr);
3051 #endif
3052   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3053   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3054   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3055   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3056   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3057   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3058   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3059   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3060   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3061   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3062   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr);
3063   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3064   PetscFunctionReturn(0);
3065 }
3066 
3067 /*MC
3068    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3069 
3070    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3071    and MATMPIBAIJ otherwise.
3072 
3073    Options Database Keys:
3074 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3075 
3076   Level: beginner
3077 
3078 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3079 M*/
3080 
3081 #undef __FUNCT__
3082 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3083 /*@C
3084    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3085    (block compressed row).  For good matrix assembly performance
3086    the user should preallocate the matrix storage by setting the parameters
3087    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3088    performance can be increased by more than a factor of 50.
3089 
3090    Collective on Mat
3091 
3092    Input Parameters:
3093 +  A - the matrix
3094 .  bs   - size of block
3095 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3096            submatrix  (same for all local rows)
3097 .  d_nnz - array containing the number of block nonzeros in the various block rows
3098            of the in diagonal portion of the local (possibly different for each block
3099            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3100            set it even if it is zero.
3101 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3102            submatrix (same for all local rows).
3103 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3104            off-diagonal portion of the local submatrix (possibly different for
3105            each block row) or NULL.
3106 
3107    If the *_nnz parameter is given then the *_nz parameter is ignored
3108 
3109    Options Database Keys:
3110 +   -mat_block_size - size of the blocks to use
3111 -   -mat_use_hash_table <fact>
3112 
3113    Notes:
3114    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3115    than it must be used on all processors that share the object for that argument.
3116 
3117    Storage Information:
3118    For a square global matrix we define each processor's diagonal portion
3119    to be its local rows and the corresponding columns (a square submatrix);
3120    each processor's off-diagonal portion encompasses the remainder of the
3121    local matrix (a rectangular submatrix).
3122 
3123    The user can specify preallocated storage for the diagonal part of
3124    the local submatrix with either d_nz or d_nnz (not both).  Set
3125    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3126    memory allocation.  Likewise, specify preallocated storage for the
3127    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3128 
3129    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3130    the figure below we depict these three local rows and all columns (0-11).
3131 
3132 .vb
3133            0 1 2 3 4 5 6 7 8 9 10 11
3134           --------------------------
3135    row 3  |o o o d d d o o o o  o  o
3136    row 4  |o o o d d d o o o o  o  o
3137    row 5  |o o o d d d o o o o  o  o
3138           --------------------------
3139 .ve
3140 
3141    Thus, any entries in the d locations are stored in the d (diagonal)
3142    submatrix, and any entries in the o locations are stored in the
3143    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3144    stored simply in the MATSEQBAIJ format for compressed row storage.
3145 
3146    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3147    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3148    In general, for PDE problems in which most nonzeros are near the diagonal,
3149    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3150    or you will get TERRIBLE performance; see the users' manual chapter on
3151    matrices.
3152 
3153    You can call MatGetInfo() to get information on how effective the preallocation was;
3154    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3155    You can also run with the option -info and look for messages with the string
3156    malloc in them to see if additional memory allocation was needed.
3157 
3158    Level: intermediate
3159 
3160 .keywords: matrix, block, aij, compressed row, sparse, parallel
3161 
3162 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3163 @*/
3164 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3165 {
3166   PetscErrorCode ierr;
3167 
3168   PetscFunctionBegin;
3169   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3170   PetscValidType(B,1);
3171   PetscValidLogicalCollectiveInt(B,bs,2);
3172   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3173   PetscFunctionReturn(0);
3174 }
3175 
3176 #undef __FUNCT__
3177 #define __FUNCT__ "MatCreateBAIJ"
3178 /*@C
3179    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3180    (block compressed row).  For good matrix assembly performance
3181    the user should preallocate the matrix storage by setting the parameters
3182    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3183    performance can be increased by more than a factor of 50.
3184 
3185    Collective on MPI_Comm
3186 
3187    Input Parameters:
3188 +  comm - MPI communicator
3189 .  bs   - size of blockk
3190 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3191            This value should be the same as the local size used in creating the
3192            y vector for the matrix-vector product y = Ax.
3193 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3194            This value should be the same as the local size used in creating the
3195            x vector for the matrix-vector product y = Ax.
3196 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3197 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3198 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3199            submatrix  (same for all local rows)
3200 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3201            of the in diagonal portion of the local (possibly different for each block
3202            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3203            and set it even if it is zero.
3204 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3205            submatrix (same for all local rows).
3206 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3207            off-diagonal portion of the local submatrix (possibly different for
3208            each block row) or NULL.
3209 
3210    Output Parameter:
3211 .  A - the matrix
3212 
3213    Options Database Keys:
3214 +   -mat_block_size - size of the blocks to use
3215 -   -mat_use_hash_table <fact>
3216 
3217    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3218    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3219    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3220 
3221    Notes:
3222    If the *_nnz parameter is given then the *_nz parameter is ignored
3223 
3224    A nonzero block is any block that as 1 or more nonzeros in it
3225 
3226    The user MUST specify either the local or global matrix dimensions
3227    (possibly both).
3228 
3229    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3230    than it must be used on all processors that share the object for that argument.
3231 
3232    Storage Information:
3233    For a square global matrix we define each processor's diagonal portion
3234    to be its local rows and the corresponding columns (a square submatrix);
3235    each processor's off-diagonal portion encompasses the remainder of the
3236    local matrix (a rectangular submatrix).
3237 
3238    The user can specify preallocated storage for the diagonal part of
3239    the local submatrix with either d_nz or d_nnz (not both).  Set
3240    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3241    memory allocation.  Likewise, specify preallocated storage for the
3242    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3243 
3244    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3245    the figure below we depict these three local rows and all columns (0-11).
3246 
3247 .vb
3248            0 1 2 3 4 5 6 7 8 9 10 11
3249           --------------------------
3250    row 3  |o o o d d d o o o o  o  o
3251    row 4  |o o o d d d o o o o  o  o
3252    row 5  |o o o d d d o o o o  o  o
3253           --------------------------
3254 .ve
3255 
3256    Thus, any entries in the d locations are stored in the d (diagonal)
3257    submatrix, and any entries in the o locations are stored in the
3258    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3259    stored simply in the MATSEQBAIJ format for compressed row storage.
3260 
3261    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3262    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3263    In general, for PDE problems in which most nonzeros are near the diagonal,
3264    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3265    or you will get TERRIBLE performance; see the users' manual chapter on
3266    matrices.
3267 
3268    Level: intermediate
3269 
3270 .keywords: matrix, block, aij, compressed row, sparse, parallel
3271 
3272 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3273 @*/
3274 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3275 {
3276   PetscErrorCode ierr;
3277   PetscMPIInt    size;
3278 
3279   PetscFunctionBegin;
3280   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3281   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3282   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3283   if (size > 1) {
3284     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3285     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3286   } else {
3287     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3288     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3289   }
3290   PetscFunctionReturn(0);
3291 }
3292 
3293 #undef __FUNCT__
3294 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3295 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3296 {
3297   Mat            mat;
3298   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3299   PetscErrorCode ierr;
3300   PetscInt       len=0;
3301 
3302   PetscFunctionBegin;
3303   *newmat = 0;
3304   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3305   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3306   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3307   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3308 
3309   mat->factortype   = matin->factortype;
3310   mat->preallocated = PETSC_TRUE;
3311   mat->assembled    = PETSC_TRUE;
3312   mat->insertmode   = NOT_SET_VALUES;
3313 
3314   a             = (Mat_MPIBAIJ*)mat->data;
3315   mat->rmap->bs = matin->rmap->bs;
3316   a->bs2        = oldmat->bs2;
3317   a->mbs        = oldmat->mbs;
3318   a->nbs        = oldmat->nbs;
3319   a->Mbs        = oldmat->Mbs;
3320   a->Nbs        = oldmat->Nbs;
3321 
3322   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3323   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3324 
3325   a->size         = oldmat->size;
3326   a->rank         = oldmat->rank;
3327   a->donotstash   = oldmat->donotstash;
3328   a->roworiented  = oldmat->roworiented;
3329   a->rowindices   = 0;
3330   a->rowvalues    = 0;
3331   a->getrowactive = PETSC_FALSE;
3332   a->barray       = 0;
3333   a->rstartbs     = oldmat->rstartbs;
3334   a->rendbs       = oldmat->rendbs;
3335   a->cstartbs     = oldmat->cstartbs;
3336   a->cendbs       = oldmat->cendbs;
3337 
3338   /* hash table stuff */
3339   a->ht           = 0;
3340   a->hd           = 0;
3341   a->ht_size      = 0;
3342   a->ht_flag      = oldmat->ht_flag;
3343   a->ht_fact      = oldmat->ht_fact;
3344   a->ht_total_ct  = 0;
3345   a->ht_insert_ct = 0;
3346 
3347   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3348   if (oldmat->colmap) {
3349 #if defined(PETSC_USE_CTABLE)
3350     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3351 #else
3352     ierr = PetscMalloc1((a->Nbs),&a->colmap);CHKERRQ(ierr);
3353     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3354     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3355 #endif
3356   } else a->colmap = 0;
3357 
3358   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3359     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
3360     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3361     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3362   } else a->garray = 0;
3363 
3364   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3365   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3366   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3367   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3368   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3369 
3370   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3371   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3372   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3373   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3374   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3375   *newmat = mat;
3376   PetscFunctionReturn(0);
3377 }
3378 
3379 #undef __FUNCT__
3380 #define __FUNCT__ "MatLoad_MPIBAIJ"
3381 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3382 {
3383   PetscErrorCode ierr;
3384   int            fd;
3385   PetscInt       i,nz,j,rstart,rend;
3386   PetscScalar    *vals,*buf;
3387   MPI_Comm       comm;
3388   MPI_Status     status;
3389   PetscMPIInt    rank,size,maxnz;
3390   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3391   PetscInt       *locrowlens = NULL,*procsnz = NULL,*browners = NULL;
3392   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3393   PetscMPIInt    tag    = ((PetscObject)viewer)->tag;
3394   PetscInt       *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount;
3395   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3396 
3397   PetscFunctionBegin;
3398   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3399   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3400   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3401   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3402 
3403   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3404   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3405   if (!rank) {
3406     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3407     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3408     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3409   }
3410 
3411   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3412 
3413   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3414   M    = header[1]; N = header[2];
3415 
3416   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3417   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3418   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3419 
3420   /* If global sizes are set, check if they are consistent with that given in the file */
3421   if (sizesset) {
3422     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3423   }
3424   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3425   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3426 
3427   if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices");
3428 
3429   /*
3430      This code adds extra rows to make sure the number of rows is
3431      divisible by the blocksize
3432   */
3433   Mbs        = M/bs;
3434   extra_rows = bs - M + bs*Mbs;
3435   if (extra_rows == bs) extra_rows = 0;
3436   else                  Mbs++;
3437   if (extra_rows && !rank) {
3438     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3439   }
3440 
3441   /* determine ownership of all rows */
3442   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3443     mbs = Mbs/size + ((Mbs % size) > rank);
3444     m   = mbs*bs;
3445   } else { /* User set */
3446     m   = newmat->rmap->n;
3447     mbs = m/bs;
3448   }
3449   ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
3450   ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3451 
3452   /* process 0 needs enough room for process with most rows */
3453   if (!rank) {
3454     mmax = rowners[1];
3455     for (i=2; i<=size; i++) {
3456       mmax = PetscMax(mmax,rowners[i]);
3457     }
3458     mmax*=bs;
3459   } else mmax = -1;             /* unused, but compiler warns anyway */
3460 
3461   rowners[0] = 0;
3462   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
3463   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
3464   rstart = rowners[rank];
3465   rend   = rowners[rank+1];
3466 
3467   /* distribute row lengths to all processors */
3468   ierr = PetscMalloc1(m,&locrowlens);CHKERRQ(ierr);
3469   if (!rank) {
3470     mend = m;
3471     if (size == 1) mend = mend - extra_rows;
3472     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3473     for (j=mend; j<m; j++) locrowlens[j] = 1;
3474     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3475     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3476     for (j=0; j<m; j++) {
3477       procsnz[0] += locrowlens[j];
3478     }
3479     for (i=1; i<size; i++) {
3480       mend = browners[i+1] - browners[i];
3481       if (i == size-1) mend = mend - extra_rows;
3482       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3483       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3484       /* calculate the number of nonzeros on each processor */
3485       for (j=0; j<browners[i+1]-browners[i]; j++) {
3486         procsnz[i] += rowlengths[j];
3487       }
3488       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3489     }
3490     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3491   } else {
3492     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3493   }
3494 
3495   if (!rank) {
3496     /* determine max buffer needed and allocate it */
3497     maxnz = procsnz[0];
3498     for (i=1; i<size; i++) {
3499       maxnz = PetscMax(maxnz,procsnz[i]);
3500     }
3501     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3502 
3503     /* read in my part of the matrix column indices  */
3504     nz     = procsnz[0];
3505     ierr   = PetscMalloc1((nz+1),&ibuf);CHKERRQ(ierr);
3506     mycols = ibuf;
3507     if (size == 1) nz -= extra_rows;
3508     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3509     if (size == 1) {
3510       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
3511     }
3512 
3513     /* read in every ones (except the last) and ship off */
3514     for (i=1; i<size-1; i++) {
3515       nz   = procsnz[i];
3516       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3517       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3518     }
3519     /* read in the stuff for the last proc */
3520     if (size != 1) {
3521       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3522       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3523       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3524       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3525     }
3526     ierr = PetscFree(cols);CHKERRQ(ierr);
3527   } else {
3528     /* determine buffer space needed for message */
3529     nz = 0;
3530     for (i=0; i<m; i++) {
3531       nz += locrowlens[i];
3532     }
3533     ierr   = PetscMalloc1((nz+1),&ibuf);CHKERRQ(ierr);
3534     mycols = ibuf;
3535     /* receive message of column indices*/
3536     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3537     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3538     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3539   }
3540 
3541   /* loop over local rows, determining number of off diagonal entries */
3542   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
3543   ierr     = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
3544   rowcount = 0; nzcount = 0;
3545   for (i=0; i<mbs; i++) {
3546     dcount  = 0;
3547     odcount = 0;
3548     for (j=0; j<bs; j++) {
3549       kmax = locrowlens[rowcount];
3550       for (k=0; k<kmax; k++) {
3551         tmp = mycols[nzcount++]/bs;
3552         if (!mask[tmp]) {
3553           mask[tmp] = 1;
3554           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3555           else masked1[dcount++] = tmp;
3556         }
3557       }
3558       rowcount++;
3559     }
3560 
3561     dlens[i]  = dcount;
3562     odlens[i] = odcount;
3563 
3564     /* zero out the mask elements we set */
3565     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3566     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3567   }
3568 
3569 
3570   if (!sizesset) {
3571     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3572   }
3573   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3574 
3575   if (!rank) {
3576     ierr = PetscMalloc1((maxnz+1),&buf);CHKERRQ(ierr);
3577     /* read in my part of the matrix numerical values  */
3578     nz     = procsnz[0];
3579     vals   = buf;
3580     mycols = ibuf;
3581     if (size == 1) nz -= extra_rows;
3582     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3583     if (size == 1) {
3584       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
3585     }
3586 
3587     /* insert into matrix */
3588     jj = rstart*bs;
3589     for (i=0; i<m; i++) {
3590       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3591       mycols += locrowlens[i];
3592       vals   += locrowlens[i];
3593       jj++;
3594     }
3595     /* read in other processors (except the last one) and ship out */
3596     for (i=1; i<size-1; i++) {
3597       nz   = procsnz[i];
3598       vals = buf;
3599       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3600       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3601     }
3602     /* the last proc */
3603     if (size != 1) {
3604       nz   = procsnz[i] - extra_rows;
3605       vals = buf;
3606       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3607       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3608       ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3609     }
3610     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3611   } else {
3612     /* receive numeric values */
3613     ierr = PetscMalloc1((nz+1),&buf);CHKERRQ(ierr);
3614 
3615     /* receive message of values*/
3616     vals   = buf;
3617     mycols = ibuf;
3618     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3619 
3620     /* insert into matrix */
3621     jj = rstart*bs;
3622     for (i=0; i<m; i++) {
3623       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3624       mycols += locrowlens[i];
3625       vals   += locrowlens[i];
3626       jj++;
3627     }
3628   }
3629   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3630   ierr = PetscFree(buf);CHKERRQ(ierr);
3631   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3632   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3633   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3634   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3635   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3636   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3637   PetscFunctionReturn(0);
3638 }
3639 
3640 #undef __FUNCT__
3641 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3642 /*@
3643    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3644 
3645    Input Parameters:
3646 .  mat  - the matrix
3647 .  fact - factor
3648 
3649    Not Collective, each process can use a different factor
3650 
3651    Level: advanced
3652 
3653   Notes:
3654    This can also be set by the command line option: -mat_use_hash_table <fact>
3655 
3656 .keywords: matrix, hashtable, factor, HT
3657 
3658 .seealso: MatSetOption()
3659 @*/
3660 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3661 {
3662   PetscErrorCode ierr;
3663 
3664   PetscFunctionBegin;
3665   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3666   PetscFunctionReturn(0);
3667 }
3668 
3669 #undef __FUNCT__
3670 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3671 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3672 {
3673   Mat_MPIBAIJ *baij;
3674 
3675   PetscFunctionBegin;
3676   baij          = (Mat_MPIBAIJ*)mat->data;
3677   baij->ht_fact = fact;
3678   PetscFunctionReturn(0);
3679 }
3680 
3681 #undef __FUNCT__
3682 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3683 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3684 {
3685   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
3686 
3687   PetscFunctionBegin;
3688   *Ad     = a->A;
3689   *Ao     = a->B;
3690   *colmap = a->garray;
3691   PetscFunctionReturn(0);
3692 }
3693 
3694 /*
3695     Special version for direct calls from Fortran (to eliminate two function call overheads
3696 */
3697 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3698 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3699 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3700 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3701 #endif
3702 
3703 #undef __FUNCT__
3704 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3705 /*@C
3706   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3707 
3708   Collective on Mat
3709 
3710   Input Parameters:
3711 + mat - the matrix
3712 . min - number of input rows
3713 . im - input rows
3714 . nin - number of input columns
3715 . in - input columns
3716 . v - numerical values input
3717 - addvin - INSERT_VALUES or ADD_VALUES
3718 
3719   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3720 
3721   Level: advanced
3722 
3723 .seealso:   MatSetValuesBlocked()
3724 @*/
3725 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3726 {
3727   /* convert input arguments to C version */
3728   Mat        mat  = *matin;
3729   PetscInt   m    = *min, n = *nin;
3730   InsertMode addv = *addvin;
3731 
3732   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3733   const MatScalar *value;
3734   MatScalar       *barray     = baij->barray;
3735   PetscBool       roworiented = baij->roworiented;
3736   PetscErrorCode  ierr;
3737   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3738   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3739   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3740 
3741   PetscFunctionBegin;
3742   /* tasks normally handled by MatSetValuesBlocked() */
3743   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3744 #if defined(PETSC_USE_DEBUG)
3745   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3746   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3747 #endif
3748   if (mat->assembled) {
3749     mat->was_assembled = PETSC_TRUE;
3750     mat->assembled     = PETSC_FALSE;
3751   }
3752   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3753 
3754 
3755   if (!barray) {
3756     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
3757     baij->barray = barray;
3758   }
3759 
3760   if (roworiented) stepval = (n-1)*bs;
3761   else stepval = (m-1)*bs;
3762 
3763   for (i=0; i<m; i++) {
3764     if (im[i] < 0) continue;
3765 #if defined(PETSC_USE_DEBUG)
3766     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3767 #endif
3768     if (im[i] >= rstart && im[i] < rend) {
3769       row = im[i] - rstart;
3770       for (j=0; j<n; j++) {
3771         /* If NumCol = 1 then a copy is not required */
3772         if ((roworiented) && (n == 1)) {
3773           barray = (MatScalar*)v + i*bs2;
3774         } else if ((!roworiented) && (m == 1)) {
3775           barray = (MatScalar*)v + j*bs2;
3776         } else { /* Here a copy is required */
3777           if (roworiented) {
3778             value = v + i*(stepval+bs)*bs + j*bs;
3779           } else {
3780             value = v + j*(stepval+bs)*bs + i*bs;
3781           }
3782           for (ii=0; ii<bs; ii++,value+=stepval) {
3783             for (jj=0; jj<bs; jj++) {
3784               *barray++ = *value++;
3785             }
3786           }
3787           barray -=bs2;
3788         }
3789 
3790         if (in[j] >= cstart && in[j] < cend) {
3791           col  = in[j] - cstart;
3792           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3793         } else if (in[j] < 0) continue;
3794 #if defined(PETSC_USE_DEBUG)
3795         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3796 #endif
3797         else {
3798           if (mat->was_assembled) {
3799             if (!baij->colmap) {
3800               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3801             }
3802 
3803 #if defined(PETSC_USE_DEBUG)
3804 #if defined(PETSC_USE_CTABLE)
3805             { PetscInt data;
3806               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3807               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3808             }
3809 #else
3810             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3811 #endif
3812 #endif
3813 #if defined(PETSC_USE_CTABLE)
3814             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3815             col  = (col - 1)/bs;
3816 #else
3817             col = (baij->colmap[in[j]] - 1)/bs;
3818 #endif
3819             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3820               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3821               col  =  in[j];
3822             }
3823           } else col = in[j];
3824           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3825         }
3826       }
3827     } else {
3828       if (!baij->donotstash) {
3829         if (roworiented) {
3830           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3831         } else {
3832           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3833         }
3834       }
3835     }
3836   }
3837 
3838   /* task normally handled by MatSetValuesBlocked() */
3839   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3840   PetscFunctionReturn(0);
3841 }
3842 
3843 #undef __FUNCT__
3844 #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
3845 /*@
3846      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
3847          CSR format the local rows.
3848 
3849    Collective on MPI_Comm
3850 
3851    Input Parameters:
3852 +  comm - MPI communicator
3853 .  bs - the block size, only a block size of 1 is supported
3854 .  m - number of local rows (Cannot be PETSC_DECIDE)
3855 .  n - This value should be the same as the local size used in creating the
3856        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3857        calculated if N is given) For square matrices n is almost always m.
3858 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3859 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3860 .   i - row indices
3861 .   j - column indices
3862 -   a - matrix values
3863 
3864    Output Parameter:
3865 .   mat - the matrix
3866 
3867    Level: intermediate
3868 
3869    Notes:
3870        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3871      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3872      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3873 
3874      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3875      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3876      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3877      with column-major ordering within blocks.
3878 
3879        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3880 
3881 .keywords: matrix, aij, compressed row, sparse, parallel
3882 
3883 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3884           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3885 @*/
3886 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3887 {
3888   PetscErrorCode ierr;
3889 
3890   PetscFunctionBegin;
3891   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3892   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3893   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3894   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3895   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
3896   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3897   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3898   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3899   PetscFunctionReturn(0);
3900 }
3901