xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 7da1fb6e516b95fd231a3ef1ae9cb4a28f357e5e)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 #include <petscblaslapack.h>
4 #include <petscsf.h>
5 
6 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
7 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat);
8 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
9 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
10 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
11 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
12 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec);
14 
15 #undef __FUNCT__
16 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
17 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
18 {
19   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
20   PetscErrorCode ierr;
21   PetscInt       i,*idxb = 0;
22   PetscScalar    *va,*vb;
23   Vec            vtmp;
24 
25   PetscFunctionBegin;
26   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
27   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
28   if (idx) {
29     for (i=0; i<A->rmap->n; i++) {
30       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
31     }
32   }
33 
34   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
35   if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);}
36   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
37   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
38 
39   for (i=0; i<A->rmap->n; i++) {
40     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
41       va[i] = vb[i];
42       if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);
43     }
44   }
45 
46   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
47   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
48   ierr = PetscFree(idxb);CHKERRQ(ierr);
49   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
50   PetscFunctionReturn(0);
51 }
52 
53 #undef __FUNCT__
54 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
55 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
56 {
57   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
58   PetscErrorCode ierr;
59 
60   PetscFunctionBegin;
61   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
62   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
63   PetscFunctionReturn(0);
64 }
65 
66 #undef __FUNCT__
67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
68 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
69 {
70   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
71   PetscErrorCode ierr;
72 
73   PetscFunctionBegin;
74   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
75   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
76   PetscFunctionReturn(0);
77 }
78 
79 /*
80      Local utility routine that creates a mapping from the global column
81    number to the local number in the off-diagonal part of the local
82    storage of the matrix.  This is done in a non scalable way since the
83    length of colmap equals the global matrix length.
84 */
85 #undef __FUNCT__
86 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private"
87 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
88 {
89   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
90   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
91   PetscErrorCode ierr;
92   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
93 
94   PetscFunctionBegin;
95 #if defined(PETSC_USE_CTABLE)
96   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
97   for (i=0; i<nbs; i++) {
98     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
99   }
100 #else
101   ierr = PetscMalloc1((baij->Nbs+1),&baij->colmap);CHKERRQ(ierr);
102   ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
103   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
104   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
105 #endif
106   PetscFunctionReturn(0);
107 }
108 
109 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
110   { \
111  \
112     brow = row/bs;  \
113     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
114     rmax = aimax[brow]; nrow = ailen[brow]; \
115     bcol = col/bs; \
116     ridx = row % bs; cidx = col % bs; \
117     low  = 0; high = nrow; \
118     while (high-low > 3) { \
119       t = (low+high)/2; \
120       if (rp[t] > bcol) high = t; \
121       else              low  = t; \
122     } \
123     for (_i=low; _i<high; _i++) { \
124       if (rp[_i] > bcol) break; \
125       if (rp[_i] == bcol) { \
126         bap = ap +  bs2*_i + bs*cidx + ridx; \
127         if (addv == ADD_VALUES) *bap += value;  \
128         else                    *bap  = value;  \
129         goto a_noinsert; \
130       } \
131     } \
132     if (a->nonew == 1) goto a_noinsert; \
133     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
134     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
135     N = nrow++ - 1;  \
136     /* shift up all the later entries in this row */ \
137     for (ii=N; ii>=_i; ii--) { \
138       rp[ii+1] = rp[ii]; \
139       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
140     } \
141     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
142     rp[_i]                      = bcol;  \
143     ap[bs2*_i + bs*cidx + ridx] = value;  \
144 a_noinsert:; \
145     ailen[brow] = nrow; \
146   }
147 
148 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
149   { \
150     brow = row/bs;  \
151     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
152     rmax = bimax[brow]; nrow = bilen[brow]; \
153     bcol = col/bs; \
154     ridx = row % bs; cidx = col % bs; \
155     low  = 0; high = nrow; \
156     while (high-low > 3) { \
157       t = (low+high)/2; \
158       if (rp[t] > bcol) high = t; \
159       else              low  = t; \
160     } \
161     for (_i=low; _i<high; _i++) { \
162       if (rp[_i] > bcol) break; \
163       if (rp[_i] == bcol) { \
164         bap = ap +  bs2*_i + bs*cidx + ridx; \
165         if (addv == ADD_VALUES) *bap += value;  \
166         else                    *bap  = value;  \
167         goto b_noinsert; \
168       } \
169     } \
170     if (b->nonew == 1) goto b_noinsert; \
171     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
172     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
173     N = nrow++ - 1;  \
174     /* shift up all the later entries in this row */ \
175     for (ii=N; ii>=_i; ii--) { \
176       rp[ii+1] = rp[ii]; \
177       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
178     } \
179     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
180     rp[_i]                      = bcol;  \
181     ap[bs2*_i + bs*cidx + ridx] = value;  \
182 b_noinsert:; \
183     bilen[brow] = nrow; \
184   }
185 
186 #undef __FUNCT__
187 #define __FUNCT__ "MatSetValues_MPIBAIJ"
188 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
189 {
190   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
191   MatScalar      value;
192   PetscBool      roworiented = baij->roworiented;
193   PetscErrorCode ierr;
194   PetscInt       i,j,row,col;
195   PetscInt       rstart_orig=mat->rmap->rstart;
196   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
197   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
198 
199   /* Some Variables required in the macro */
200   Mat         A     = baij->A;
201   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
202   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
203   MatScalar   *aa   =a->a;
204 
205   Mat         B     = baij->B;
206   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
207   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
208   MatScalar   *ba   =b->a;
209 
210   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
211   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
212   MatScalar *ap,*bap;
213 
214   PetscFunctionBegin;
215   if (v) PetscValidScalarPointer(v,6);
216   for (i=0; i<m; i++) {
217     if (im[i] < 0) continue;
218 #if defined(PETSC_USE_DEBUG)
219     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
220 #endif
221     if (im[i] >= rstart_orig && im[i] < rend_orig) {
222       row = im[i] - rstart_orig;
223       for (j=0; j<n; j++) {
224         if (in[j] >= cstart_orig && in[j] < cend_orig) {
225           col = in[j] - cstart_orig;
226           if (roworiented) value = v[i*n+j];
227           else             value = v[i+j*m];
228           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
229           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
230         } else if (in[j] < 0) continue;
231 #if defined(PETSC_USE_DEBUG)
232         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
233 #endif
234         else {
235           if (mat->was_assembled) {
236             if (!baij->colmap) {
237               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
238             }
239 #if defined(PETSC_USE_CTABLE)
240             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
241             col  = col - 1;
242 #else
243             col = baij->colmap[in[j]/bs] - 1;
244 #endif
245             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
246               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
247               col  =  in[j];
248               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
249               B    = baij->B;
250               b    = (Mat_SeqBAIJ*)(B)->data;
251               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
252               ba   =b->a;
253             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
254             else col += in[j]%bs;
255           } else col = in[j];
256           if (roworiented) value = v[i*n+j];
257           else             value = v[i+j*m];
258           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
259           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
260         }
261       }
262     } else {
263       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
264       if (!baij->donotstash) {
265         mat->assembled = PETSC_FALSE;
266         if (roworiented) {
267           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
268         } else {
269           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
270         }
271       }
272     }
273   }
274   PetscFunctionReturn(0);
275 }
276 
277 #undef __FUNCT__
278 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
279 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
280 {
281   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
282   const PetscScalar *value;
283   MatScalar         *barray     = baij->barray;
284   PetscBool         roworiented = baij->roworiented;
285   PetscErrorCode    ierr;
286   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
287   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
288   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
289 
290   PetscFunctionBegin;
291   if (!barray) {
292     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
293     baij->barray = barray;
294   }
295 
296   if (roworiented) stepval = (n-1)*bs;
297   else stepval = (m-1)*bs;
298 
299   for (i=0; i<m; i++) {
300     if (im[i] < 0) continue;
301 #if defined(PETSC_USE_DEBUG)
302     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
303 #endif
304     if (im[i] >= rstart && im[i] < rend) {
305       row = im[i] - rstart;
306       for (j=0; j<n; j++) {
307         /* If NumCol = 1 then a copy is not required */
308         if ((roworiented) && (n == 1)) {
309           barray = (MatScalar*)v + i*bs2;
310         } else if ((!roworiented) && (m == 1)) {
311           barray = (MatScalar*)v + j*bs2;
312         } else { /* Here a copy is required */
313           if (roworiented) {
314             value = v + (i*(stepval+bs) + j)*bs;
315           } else {
316             value = v + (j*(stepval+bs) + i)*bs;
317           }
318           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
319             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
320             barray += bs;
321           }
322           barray -= bs2;
323         }
324 
325         if (in[j] >= cstart && in[j] < cend) {
326           col  = in[j] - cstart;
327           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
328         } else if (in[j] < 0) continue;
329 #if defined(PETSC_USE_DEBUG)
330         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
331 #endif
332         else {
333           if (mat->was_assembled) {
334             if (!baij->colmap) {
335               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
336             }
337 
338 #if defined(PETSC_USE_DEBUG)
339 #if defined(PETSC_USE_CTABLE)
340             { PetscInt data;
341               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
342               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
343             }
344 #else
345             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
346 #endif
347 #endif
348 #if defined(PETSC_USE_CTABLE)
349             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
350             col  = (col - 1)/bs;
351 #else
352             col = (baij->colmap[in[j]] - 1)/bs;
353 #endif
354             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
355               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
356               col  =  in[j];
357             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]);
358           } else col = in[j];
359           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
360         }
361       }
362     } else {
363       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
364       if (!baij->donotstash) {
365         if (roworiented) {
366           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
367         } else {
368           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
369         }
370       }
371     }
372   }
373   PetscFunctionReturn(0);
374 }
375 
376 #define HASH_KEY 0.6180339887
377 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
378 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
379 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
380 #undef __FUNCT__
381 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
382 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
383 {
384   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
385   PetscBool      roworiented = baij->roworiented;
386   PetscErrorCode ierr;
387   PetscInt       i,j,row,col;
388   PetscInt       rstart_orig=mat->rmap->rstart;
389   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
390   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
391   PetscReal      tmp;
392   MatScalar      **HD = baij->hd,value;
393 #if defined(PETSC_USE_DEBUG)
394   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
395 #endif
396 
397   PetscFunctionBegin;
398   if (v) PetscValidScalarPointer(v,6);
399   for (i=0; i<m; i++) {
400 #if defined(PETSC_USE_DEBUG)
401     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
402     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
403 #endif
404     row = im[i];
405     if (row >= rstart_orig && row < rend_orig) {
406       for (j=0; j<n; j++) {
407         col = in[j];
408         if (roworiented) value = v[i*n+j];
409         else             value = v[i+j*m];
410         /* Look up PetscInto the Hash Table */
411         key = (row/bs)*Nbs+(col/bs)+1;
412         h1  = HASH(size,key,tmp);
413 
414 
415         idx = h1;
416 #if defined(PETSC_USE_DEBUG)
417         insert_ct++;
418         total_ct++;
419         if (HT[idx] != key) {
420           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
421           if (idx == size) {
422             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
423             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
424           }
425         }
426 #else
427         if (HT[idx] != key) {
428           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
429           if (idx == size) {
430             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
431             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
432           }
433         }
434 #endif
435         /* A HASH table entry is found, so insert the values at the correct address */
436         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
437         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
438       }
439     } else if (!baij->donotstash) {
440       if (roworiented) {
441         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
442       } else {
443         ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
444       }
445     }
446   }
447 #if defined(PETSC_USE_DEBUG)
448   baij->ht_total_ct  = total_ct;
449   baij->ht_insert_ct = insert_ct;
450 #endif
451   PetscFunctionReturn(0);
452 }
453 
454 #undef __FUNCT__
455 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
456 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
457 {
458   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
459   PetscBool         roworiented = baij->roworiented;
460   PetscErrorCode    ierr;
461   PetscInt          i,j,ii,jj,row,col;
462   PetscInt          rstart=baij->rstartbs;
463   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
464   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
465   PetscReal         tmp;
466   MatScalar         **HD = baij->hd,*baij_a;
467   const PetscScalar *v_t,*value;
468 #if defined(PETSC_USE_DEBUG)
469   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
470 #endif
471 
472   PetscFunctionBegin;
473   if (roworiented) stepval = (n-1)*bs;
474   else stepval = (m-1)*bs;
475 
476   for (i=0; i<m; i++) {
477 #if defined(PETSC_USE_DEBUG)
478     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
479     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
480 #endif
481     row = im[i];
482     v_t = v + i*nbs2;
483     if (row >= rstart && row < rend) {
484       for (j=0; j<n; j++) {
485         col = in[j];
486 
487         /* Look up into the Hash Table */
488         key = row*Nbs+col+1;
489         h1  = HASH(size,key,tmp);
490 
491         idx = h1;
492 #if defined(PETSC_USE_DEBUG)
493         total_ct++;
494         insert_ct++;
495         if (HT[idx] != key) {
496           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
497           if (idx == size) {
498             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
499             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
500           }
501         }
502 #else
503         if (HT[idx] != key) {
504           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
505           if (idx == size) {
506             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
507             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
508           }
509         }
510 #endif
511         baij_a = HD[idx];
512         if (roworiented) {
513           /*value = v + i*(stepval+bs)*bs + j*bs;*/
514           /* value = v + (i*(stepval+bs)+j)*bs; */
515           value = v_t;
516           v_t  += bs;
517           if (addv == ADD_VALUES) {
518             for (ii=0; ii<bs; ii++,value+=stepval) {
519               for (jj=ii; jj<bs2; jj+=bs) {
520                 baij_a[jj] += *value++;
521               }
522             }
523           } else {
524             for (ii=0; ii<bs; ii++,value+=stepval) {
525               for (jj=ii; jj<bs2; jj+=bs) {
526                 baij_a[jj] = *value++;
527               }
528             }
529           }
530         } else {
531           value = v + j*(stepval+bs)*bs + i*bs;
532           if (addv == ADD_VALUES) {
533             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
534               for (jj=0; jj<bs; jj++) {
535                 baij_a[jj] += *value++;
536               }
537             }
538           } else {
539             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
540               for (jj=0; jj<bs; jj++) {
541                 baij_a[jj] = *value++;
542               }
543             }
544           }
545         }
546       }
547     } else {
548       if (!baij->donotstash) {
549         if (roworiented) {
550           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
551         } else {
552           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
553         }
554       }
555     }
556   }
557 #if defined(PETSC_USE_DEBUG)
558   baij->ht_total_ct  = total_ct;
559   baij->ht_insert_ct = insert_ct;
560 #endif
561   PetscFunctionReturn(0);
562 }
563 
564 #undef __FUNCT__
565 #define __FUNCT__ "MatGetValues_MPIBAIJ"
566 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
567 {
568   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
569   PetscErrorCode ierr;
570   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
571   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
572 
573   PetscFunctionBegin;
574   for (i=0; i<m; i++) {
575     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
576     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
577     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
578       row = idxm[i] - bsrstart;
579       for (j=0; j<n; j++) {
580         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
581         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
582         if (idxn[j] >= bscstart && idxn[j] < bscend) {
583           col  = idxn[j] - bscstart;
584           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
585         } else {
586           if (!baij->colmap) {
587             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
588           }
589 #if defined(PETSC_USE_CTABLE)
590           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
591           data--;
592 #else
593           data = baij->colmap[idxn[j]/bs]-1;
594 #endif
595           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
596           else {
597             col  = data + idxn[j]%bs;
598             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
599           }
600         }
601       }
602     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
603   }
604   PetscFunctionReturn(0);
605 }
606 
607 #undef __FUNCT__
608 #define __FUNCT__ "MatNorm_MPIBAIJ"
609 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
610 {
611   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
612   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
613   PetscErrorCode ierr;
614   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
615   PetscReal      sum = 0.0;
616   MatScalar      *v;
617 
618   PetscFunctionBegin;
619   if (baij->size == 1) {
620     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
621   } else {
622     if (type == NORM_FROBENIUS) {
623       v  = amat->a;
624       nz = amat->nz*bs2;
625       for (i=0; i<nz; i++) {
626         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
627       }
628       v  = bmat->a;
629       nz = bmat->nz*bs2;
630       for (i=0; i<nz; i++) {
631         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
632       }
633       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
634       *nrm = PetscSqrtReal(*nrm);
635     } else if (type == NORM_1) { /* max column sum */
636       PetscReal *tmp,*tmp2;
637       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
638       ierr = PetscMalloc2(mat->cmap->N,&tmp,mat->cmap->N,&tmp2);CHKERRQ(ierr);
639       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
640       v    = amat->a; jj = amat->j;
641       for (i=0; i<amat->nz; i++) {
642         for (j=0; j<bs; j++) {
643           col = bs*(cstart + *jj) + j; /* column index */
644           for (row=0; row<bs; row++) {
645             tmp[col] += PetscAbsScalar(*v);  v++;
646           }
647         }
648         jj++;
649       }
650       v = bmat->a; jj = bmat->j;
651       for (i=0; i<bmat->nz; i++) {
652         for (j=0; j<bs; j++) {
653           col = bs*garray[*jj] + j;
654           for (row=0; row<bs; row++) {
655             tmp[col] += PetscAbsScalar(*v); v++;
656           }
657         }
658         jj++;
659       }
660       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
661       *nrm = 0.0;
662       for (j=0; j<mat->cmap->N; j++) {
663         if (tmp2[j] > *nrm) *nrm = tmp2[j];
664       }
665       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
666     } else if (type == NORM_INFINITY) { /* max row sum */
667       PetscReal *sums;
668       ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr);
669       sum  = 0.0;
670       for (j=0; j<amat->mbs; j++) {
671         for (row=0; row<bs; row++) sums[row] = 0.0;
672         v  = amat->a + bs2*amat->i[j];
673         nz = amat->i[j+1]-amat->i[j];
674         for (i=0; i<nz; i++) {
675           for (col=0; col<bs; col++) {
676             for (row=0; row<bs; row++) {
677               sums[row] += PetscAbsScalar(*v); v++;
678             }
679           }
680         }
681         v  = bmat->a + bs2*bmat->i[j];
682         nz = bmat->i[j+1]-bmat->i[j];
683         for (i=0; i<nz; i++) {
684           for (col=0; col<bs; col++) {
685             for (row=0; row<bs; row++) {
686               sums[row] += PetscAbsScalar(*v); v++;
687             }
688           }
689         }
690         for (row=0; row<bs; row++) {
691           if (sums[row] > sum) sum = sums[row];
692         }
693       }
694       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
695       ierr = PetscFree(sums);CHKERRQ(ierr);
696     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
697   }
698   PetscFunctionReturn(0);
699 }
700 
701 /*
702   Creates the hash table, and sets the table
703   This table is created only once.
704   If new entried need to be added to the matrix
705   then the hash table has to be destroyed and
706   recreated.
707 */
708 #undef __FUNCT__
709 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
710 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
711 {
712   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
713   Mat            A     = baij->A,B=baij->B;
714   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
715   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
716   PetscErrorCode ierr;
717   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
718   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
719   PetscInt       *HT,key;
720   MatScalar      **HD;
721   PetscReal      tmp;
722 #if defined(PETSC_USE_INFO)
723   PetscInt ct=0,max=0;
724 #endif
725 
726   PetscFunctionBegin;
727   if (baij->ht) PetscFunctionReturn(0);
728 
729   baij->ht_size = (PetscInt)(factor*nz);
730   ht_size       = baij->ht_size;
731 
732   /* Allocate Memory for Hash Table */
733   ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr);
734   HD   = baij->hd;
735   HT   = baij->ht;
736 
737   /* Loop Over A */
738   for (i=0; i<a->mbs; i++) {
739     for (j=ai[i]; j<ai[i+1]; j++) {
740       row = i+rstart;
741       col = aj[j]+cstart;
742 
743       key = row*Nbs + col + 1;
744       h1  = HASH(ht_size,key,tmp);
745       for (k=0; k<ht_size; k++) {
746         if (!HT[(h1+k)%ht_size]) {
747           HT[(h1+k)%ht_size] = key;
748           HD[(h1+k)%ht_size] = a->a + j*bs2;
749           break;
750 #if defined(PETSC_USE_INFO)
751         } else {
752           ct++;
753 #endif
754         }
755       }
756 #if defined(PETSC_USE_INFO)
757       if (k> max) max = k;
758 #endif
759     }
760   }
761   /* Loop Over B */
762   for (i=0; i<b->mbs; i++) {
763     for (j=bi[i]; j<bi[i+1]; j++) {
764       row = i+rstart;
765       col = garray[bj[j]];
766       key = row*Nbs + col + 1;
767       h1  = HASH(ht_size,key,tmp);
768       for (k=0; k<ht_size; k++) {
769         if (!HT[(h1+k)%ht_size]) {
770           HT[(h1+k)%ht_size] = key;
771           HD[(h1+k)%ht_size] = b->a + j*bs2;
772           break;
773 #if defined(PETSC_USE_INFO)
774         } else {
775           ct++;
776 #endif
777         }
778       }
779 #if defined(PETSC_USE_INFO)
780       if (k> max) max = k;
781 #endif
782     }
783   }
784 
785   /* Print Summary */
786 #if defined(PETSC_USE_INFO)
787   for (i=0,j=0; i<ht_size; i++) {
788     if (HT[i]) j++;
789   }
790   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
791 #endif
792   PetscFunctionReturn(0);
793 }
794 
795 #undef __FUNCT__
796 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
797 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
798 {
799   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
800   PetscErrorCode ierr;
801   PetscInt       nstash,reallocs;
802   InsertMode     addv;
803 
804   PetscFunctionBegin;
805   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
806 
807   /* make sure all processors are either in INSERTMODE or ADDMODE */
808   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
809   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
810   mat->insertmode = addv; /* in case this processor had no cache */
811 
812   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
813   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
814   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
815   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
816   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
817   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
818   PetscFunctionReturn(0);
819 }
820 
821 #undef __FUNCT__
822 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
823 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
824 {
825   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
826   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
827   PetscErrorCode ierr;
828   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
829   PetscInt       *row,*col;
830   PetscBool      r1,r2,r3,other_disassembled;
831   MatScalar      *val;
832   InsertMode     addv = mat->insertmode;
833   PetscMPIInt    n;
834 
835   PetscFunctionBegin;
836   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
837   if (!baij->donotstash && !mat->nooffprocentries) {
838     while (1) {
839       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
840       if (!flg) break;
841 
842       for (i=0; i<n;) {
843         /* Now identify the consecutive vals belonging to the same row */
844         for (j=i,rstart=row[j]; j<n; j++) {
845           if (row[j] != rstart) break;
846         }
847         if (j < n) ncols = j-i;
848         else       ncols = n-i;
849         /* Now assemble all these values with a single function call */
850         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
851         i    = j;
852       }
853     }
854     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
855     /* Now process the block-stash. Since the values are stashed column-oriented,
856        set the roworiented flag to column oriented, and after MatSetValues()
857        restore the original flags */
858     r1 = baij->roworiented;
859     r2 = a->roworiented;
860     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
861 
862     baij->roworiented = PETSC_FALSE;
863     a->roworiented    = PETSC_FALSE;
864 
865     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
866     while (1) {
867       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
868       if (!flg) break;
869 
870       for (i=0; i<n;) {
871         /* Now identify the consecutive vals belonging to the same row */
872         for (j=i,rstart=row[j]; j<n; j++) {
873           if (row[j] != rstart) break;
874         }
875         if (j < n) ncols = j-i;
876         else       ncols = n-i;
877         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
878         i    = j;
879       }
880     }
881     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
882 
883     baij->roworiented = r1;
884     a->roworiented    = r2;
885 
886     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
887   }
888 
889   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
890   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
891 
892   /* determine if any processor has disassembled, if so we must
893      also disassemble ourselfs, in order that we may reassemble. */
894   /*
895      if nonzero structure of submatrix B cannot change then we know that
896      no processor disassembled thus we can skip this stuff
897   */
898   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
899     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
900     if (mat->was_assembled && !other_disassembled) {
901       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
902     }
903   }
904 
905   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
906     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
907   }
908   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
909   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
910 
911 #if defined(PETSC_USE_INFO)
912   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
913     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
914 
915     baij->ht_total_ct  = 0;
916     baij->ht_insert_ct = 0;
917   }
918 #endif
919   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
920     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
921 
922     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
923     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
924   }
925 
926   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
927 
928   baij->rowvalues = 0;
929   {
930     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
931     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
932   }
933   PetscFunctionReturn(0);
934 }
935 
936 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer);
937 #include <petscdraw.h>
938 #undef __FUNCT__
939 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
940 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
941 {
942   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
943   PetscErrorCode    ierr;
944   PetscMPIInt       rank = baij->rank;
945   PetscInt          bs   = mat->rmap->bs;
946   PetscBool         iascii,isdraw;
947   PetscViewer       sviewer;
948   PetscViewerFormat format;
949 
950   PetscFunctionBegin;
951   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
952   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
953   if (iascii) {
954     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
955     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
956       MatInfo info;
957       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
958       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
959       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
960       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
961                                                 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
962       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
963       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
964       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
965       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
966       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
967       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
968       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
969       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
970       PetscFunctionReturn(0);
971     } else if (format == PETSC_VIEWER_ASCII_INFO) {
972       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
973       PetscFunctionReturn(0);
974     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
975       PetscFunctionReturn(0);
976     }
977   }
978 
979   if (isdraw) {
980     PetscDraw draw;
981     PetscBool isnull;
982     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
983     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
984   }
985 
986   {
987     /* assemble the entire matrix onto first processor. */
988     Mat         A;
989     Mat_SeqBAIJ *Aloc;
990     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
991     MatScalar   *a;
992 
993     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
994     /* Perhaps this should be the type of mat? */
995     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
996     if (!rank) {
997       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
998     } else {
999       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1000     }
1001     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1002     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
1003     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1004     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1005 
1006     /* copy over the A part */
1007     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1008     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1009     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1010 
1011     for (i=0; i<mbs; i++) {
1012       rvals[0] = bs*(baij->rstartbs + i);
1013       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1014       for (j=ai[i]; j<ai[i+1]; j++) {
1015         col = (baij->cstartbs+aj[j])*bs;
1016         for (k=0; k<bs; k++) {
1017           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1018           col++; a += bs;
1019         }
1020       }
1021     }
1022     /* copy over the B part */
1023     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1024     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1025     for (i=0; i<mbs; i++) {
1026       rvals[0] = bs*(baij->rstartbs + i);
1027       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1028       for (j=ai[i]; j<ai[i+1]; j++) {
1029         col = baij->garray[aj[j]]*bs;
1030         for (k=0; k<bs; k++) {
1031           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1032           col++; a += bs;
1033         }
1034       }
1035     }
1036     ierr = PetscFree(rvals);CHKERRQ(ierr);
1037     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1038     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1039     /*
1040        Everyone has to call to draw the matrix since the graphics waits are
1041        synchronized across all processors that share the PetscDraw object
1042     */
1043     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1044     if (!rank) {
1045       ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1046     }
1047     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1048     ierr = MatDestroy(&A);CHKERRQ(ierr);
1049   }
1050   PetscFunctionReturn(0);
1051 }
1052 
1053 #undef __FUNCT__
1054 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1055 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1056 {
1057   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1058   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)a->A->data;
1059   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
1060   PetscErrorCode ierr;
1061   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1062   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1063   int            fd;
1064   PetscScalar    *column_values;
1065   FILE           *file;
1066   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1067   PetscInt       message_count,flowcontrolcount;
1068 
1069   PetscFunctionBegin;
1070   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1071   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1072   nz   = bs2*(A->nz + B->nz);
1073   rlen = mat->rmap->n;
1074   if (!rank) {
1075     header[0] = MAT_FILE_CLASSID;
1076     header[1] = mat->rmap->N;
1077     header[2] = mat->cmap->N;
1078 
1079     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1080     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1081     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1082     /* get largest number of rows any processor has */
1083     range = mat->rmap->range;
1084     for (i=1; i<size; i++) {
1085       rlen = PetscMax(rlen,range[i+1] - range[i]);
1086     }
1087   } else {
1088     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1089   }
1090 
1091   ierr = PetscMalloc1((rlen/bs),&crow_lens);CHKERRQ(ierr);
1092   /* compute lengths of each row  */
1093   for (i=0; i<a->mbs; i++) {
1094     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1095   }
1096   /* store the row lengths to the file */
1097   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1098   if (!rank) {
1099     MPI_Status status;
1100     ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr);
1101     rlen = (range[1] - range[0])/bs;
1102     for (i=0; i<rlen; i++) {
1103       for (j=0; j<bs; j++) {
1104         row_lens[i*bs+j] = bs*crow_lens[i];
1105       }
1106     }
1107     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1108     for (i=1; i<size; i++) {
1109       rlen = (range[i+1] - range[i])/bs;
1110       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1111       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1112       for (k=0; k<rlen; k++) {
1113         for (j=0; j<bs; j++) {
1114           row_lens[k*bs+j] = bs*crow_lens[k];
1115         }
1116       }
1117       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1118     }
1119     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1120     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1121   } else {
1122     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1123     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1124     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1125   }
1126   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1127 
1128   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1129      information needed to make it for each row from a block row. This does require more communication but still not more than
1130      the communication needed for the nonzero values  */
1131   nzmax = nz; /*  space a largest processor needs */
1132   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1133   ierr  = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr);
1134   cnt   = 0;
1135   for (i=0; i<a->mbs; i++) {
1136     pcnt = cnt;
1137     for (j=B->i[i]; j<B->i[i+1]; j++) {
1138       if ((col = garray[B->j[j]]) > cstart) break;
1139       for (l=0; l<bs; l++) {
1140         column_indices[cnt++] = bs*col+l;
1141       }
1142     }
1143     for (k=A->i[i]; k<A->i[i+1]; k++) {
1144       for (l=0; l<bs; l++) {
1145         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1146       }
1147     }
1148     for (; j<B->i[i+1]; j++) {
1149       for (l=0; l<bs; l++) {
1150         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1151       }
1152     }
1153     len = cnt - pcnt;
1154     for (k=1; k<bs; k++) {
1155       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1156       cnt += len;
1157     }
1158   }
1159   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1160 
1161   /* store the columns to the file */
1162   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1163   if (!rank) {
1164     MPI_Status status;
1165     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1166     for (i=1; i<size; i++) {
1167       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1168       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1169       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1170       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1171     }
1172     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1173   } else {
1174     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1175     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1176     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1177     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1178   }
1179   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1180 
1181   /* load up the numerical values */
1182   ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr);
1183   cnt  = 0;
1184   for (i=0; i<a->mbs; i++) {
1185     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1186     for (j=B->i[i]; j<B->i[i+1]; j++) {
1187       if (garray[B->j[j]] > cstart) break;
1188       for (l=0; l<bs; l++) {
1189         for (ll=0; ll<bs; ll++) {
1190           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1191         }
1192       }
1193       cnt += bs;
1194     }
1195     for (k=A->i[i]; k<A->i[i+1]; k++) {
1196       for (l=0; l<bs; l++) {
1197         for (ll=0; ll<bs; ll++) {
1198           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1199         }
1200       }
1201       cnt += bs;
1202     }
1203     for (; j<B->i[i+1]; j++) {
1204       for (l=0; l<bs; l++) {
1205         for (ll=0; ll<bs; ll++) {
1206           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1207         }
1208       }
1209       cnt += bs;
1210     }
1211     cnt += (bs-1)*rlen;
1212   }
1213   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1214 
1215   /* store the column values to the file */
1216   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1217   if (!rank) {
1218     MPI_Status status;
1219     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1220     for (i=1; i<size; i++) {
1221       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1222       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1223       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1224       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1225     }
1226     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1227   } else {
1228     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1229     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1230     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1231     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1232   }
1233   ierr = PetscFree(column_values);CHKERRQ(ierr);
1234 
1235   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1236   if (file) {
1237     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1238   }
1239   PetscFunctionReturn(0);
1240 }
1241 
1242 #undef __FUNCT__
1243 #define __FUNCT__ "MatView_MPIBAIJ"
1244 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1245 {
1246   PetscErrorCode ierr;
1247   PetscBool      iascii,isdraw,issocket,isbinary;
1248 
1249   PetscFunctionBegin;
1250   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1251   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1252   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1253   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1254   if (iascii || isdraw || issocket) {
1255     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1256   } else if (isbinary) {
1257     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1258   }
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 #undef __FUNCT__
1263 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1264 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1265 {
1266   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1267   PetscErrorCode ierr;
1268 
1269   PetscFunctionBegin;
1270 #if defined(PETSC_USE_LOG)
1271   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1272 #endif
1273   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1274   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1275   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1276   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1277 #if defined(PETSC_USE_CTABLE)
1278   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1279 #else
1280   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1281 #endif
1282   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1283   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1284   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1285   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1286   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1287   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1288   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1289   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1290 
1291   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1292   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1293   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1294   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1295   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1296   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr);
1301   PetscFunctionReturn(0);
1302 }
1303 
1304 #undef __FUNCT__
1305 #define __FUNCT__ "MatMult_MPIBAIJ"
1306 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1307 {
1308   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1309   PetscErrorCode ierr;
1310   PetscInt       nt;
1311 
1312   PetscFunctionBegin;
1313   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1314   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1315   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1316   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1317   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1318   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1319   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1320   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1321   PetscFunctionReturn(0);
1322 }
1323 
1324 #undef __FUNCT__
1325 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1326 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1327 {
1328   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1329   PetscErrorCode ierr;
1330 
1331   PetscFunctionBegin;
1332   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1333   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1334   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1335   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1336   PetscFunctionReturn(0);
1337 }
1338 
1339 #undef __FUNCT__
1340 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1341 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1342 {
1343   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1344   PetscErrorCode ierr;
1345   PetscBool      merged;
1346 
1347   PetscFunctionBegin;
1348   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1349   /* do nondiagonal part */
1350   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1351   if (!merged) {
1352     /* send it on its way */
1353     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1354     /* do local part */
1355     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1356     /* receive remote parts: note this assumes the values are not actually */
1357     /* inserted in yy until the next line */
1358     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1359   } else {
1360     /* do local part */
1361     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1362     /* send it on its way */
1363     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1364     /* values actually were received in the Begin() but we need to call this nop */
1365     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1366   }
1367   PetscFunctionReturn(0);
1368 }
1369 
1370 #undef __FUNCT__
1371 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1372 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1373 {
1374   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1375   PetscErrorCode ierr;
1376 
1377   PetscFunctionBegin;
1378   /* do nondiagonal part */
1379   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1380   /* send it on its way */
1381   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1382   /* do local part */
1383   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1384   /* receive remote parts: note this assumes the values are not actually */
1385   /* inserted in yy until the next line, which is true for my implementation*/
1386   /* but is not perhaps always true. */
1387   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1388   PetscFunctionReturn(0);
1389 }
1390 
1391 /*
1392   This only works correctly for square matrices where the subblock A->A is the
1393    diagonal block
1394 */
1395 #undef __FUNCT__
1396 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1397 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1398 {
1399   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1400   PetscErrorCode ierr;
1401 
1402   PetscFunctionBegin;
1403   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1404   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1405   PetscFunctionReturn(0);
1406 }
1407 
1408 #undef __FUNCT__
1409 #define __FUNCT__ "MatScale_MPIBAIJ"
1410 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1411 {
1412   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1413   PetscErrorCode ierr;
1414 
1415   PetscFunctionBegin;
1416   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1417   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1418   PetscFunctionReturn(0);
1419 }
1420 
1421 #undef __FUNCT__
1422 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1423 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1424 {
1425   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1426   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1427   PetscErrorCode ierr;
1428   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1429   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1430   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1431 
1432   PetscFunctionBegin;
1433   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1434   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1435   mat->getrowactive = PETSC_TRUE;
1436 
1437   if (!mat->rowvalues && (idx || v)) {
1438     /*
1439         allocate enough space to hold information from the longest row.
1440     */
1441     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1442     PetscInt    max = 1,mbs = mat->mbs,tmp;
1443     for (i=0; i<mbs; i++) {
1444       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1445       if (max < tmp) max = tmp;
1446     }
1447     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1448   }
1449   lrow = row - brstart;
1450 
1451   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1452   if (!v)   {pvA = 0; pvB = 0;}
1453   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1454   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1455   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1456   nztot = nzA + nzB;
1457 
1458   cmap = mat->garray;
1459   if (v  || idx) {
1460     if (nztot) {
1461       /* Sort by increasing column numbers, assuming A and B already sorted */
1462       PetscInt imark = -1;
1463       if (v) {
1464         *v = v_p = mat->rowvalues;
1465         for (i=0; i<nzB; i++) {
1466           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1467           else break;
1468         }
1469         imark = i;
1470         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1471         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1472       }
1473       if (idx) {
1474         *idx = idx_p = mat->rowindices;
1475         if (imark > -1) {
1476           for (i=0; i<imark; i++) {
1477             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1478           }
1479         } else {
1480           for (i=0; i<nzB; i++) {
1481             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1482             else break;
1483           }
1484           imark = i;
1485         }
1486         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1487         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1488       }
1489     } else {
1490       if (idx) *idx = 0;
1491       if (v)   *v   = 0;
1492     }
1493   }
1494   *nz  = nztot;
1495   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1496   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1497   PetscFunctionReturn(0);
1498 }
1499 
1500 #undef __FUNCT__
1501 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1502 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1503 {
1504   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1505 
1506   PetscFunctionBegin;
1507   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1508   baij->getrowactive = PETSC_FALSE;
1509   PetscFunctionReturn(0);
1510 }
1511 
1512 #undef __FUNCT__
1513 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1514 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1515 {
1516   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1517   PetscErrorCode ierr;
1518 
1519   PetscFunctionBegin;
1520   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1521   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1522   PetscFunctionReturn(0);
1523 }
1524 
1525 #undef __FUNCT__
1526 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1527 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1528 {
1529   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1530   Mat            A  = a->A,B = a->B;
1531   PetscErrorCode ierr;
1532   PetscReal      isend[5],irecv[5];
1533 
1534   PetscFunctionBegin;
1535   info->block_size = (PetscReal)matin->rmap->bs;
1536 
1537   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1538 
1539   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1540   isend[3] = info->memory;  isend[4] = info->mallocs;
1541 
1542   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1543 
1544   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1545   isend[3] += info->memory;  isend[4] += info->mallocs;
1546 
1547   if (flag == MAT_LOCAL) {
1548     info->nz_used      = isend[0];
1549     info->nz_allocated = isend[1];
1550     info->nz_unneeded  = isend[2];
1551     info->memory       = isend[3];
1552     info->mallocs      = isend[4];
1553   } else if (flag == MAT_GLOBAL_MAX) {
1554     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1555 
1556     info->nz_used      = irecv[0];
1557     info->nz_allocated = irecv[1];
1558     info->nz_unneeded  = irecv[2];
1559     info->memory       = irecv[3];
1560     info->mallocs      = irecv[4];
1561   } else if (flag == MAT_GLOBAL_SUM) {
1562     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1563 
1564     info->nz_used      = irecv[0];
1565     info->nz_allocated = irecv[1];
1566     info->nz_unneeded  = irecv[2];
1567     info->memory       = irecv[3];
1568     info->mallocs      = irecv[4];
1569   } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1570   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1571   info->fill_ratio_needed = 0;
1572   info->factor_mallocs    = 0;
1573   PetscFunctionReturn(0);
1574 }
1575 
1576 #undef __FUNCT__
1577 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1578 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1579 {
1580   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1581   PetscErrorCode ierr;
1582 
1583   PetscFunctionBegin;
1584   switch (op) {
1585   case MAT_NEW_NONZERO_LOCATIONS:
1586   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1587   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1588   case MAT_KEEP_NONZERO_PATTERN:
1589   case MAT_NEW_NONZERO_LOCATION_ERR:
1590     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1591     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1592     break;
1593   case MAT_ROW_ORIENTED:
1594     a->roworiented = flg;
1595 
1596     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1597     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1598     break;
1599   case MAT_NEW_DIAGONALS:
1600     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1601     break;
1602   case MAT_IGNORE_OFF_PROC_ENTRIES:
1603     a->donotstash = flg;
1604     break;
1605   case MAT_USE_HASH_TABLE:
1606     a->ht_flag = flg;
1607     break;
1608   case MAT_SYMMETRIC:
1609   case MAT_STRUCTURALLY_SYMMETRIC:
1610   case MAT_HERMITIAN:
1611   case MAT_SYMMETRY_ETERNAL:
1612     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1613     break;
1614   default:
1615     SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1616   }
1617   PetscFunctionReturn(0);
1618 }
1619 
1620 #undef __FUNCT__
1621 #define __FUNCT__ "MatTranspose_MPIBAIJ"
1622 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1623 {
1624   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1625   Mat_SeqBAIJ    *Aloc;
1626   Mat            B;
1627   PetscErrorCode ierr;
1628   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1629   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1630   MatScalar      *a;
1631 
1632   PetscFunctionBegin;
1633   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1634   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1635     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1636     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1637     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1638     /* Do not know preallocation information, but must set block size */
1639     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr);
1640   } else {
1641     B = *matout;
1642   }
1643 
1644   /* copy over the A part */
1645   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1646   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1647   ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1648 
1649   for (i=0; i<mbs; i++) {
1650     rvals[0] = bs*(baij->rstartbs + i);
1651     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1652     for (j=ai[i]; j<ai[i+1]; j++) {
1653       col = (baij->cstartbs+aj[j])*bs;
1654       for (k=0; k<bs; k++) {
1655         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1656 
1657         col++; a += bs;
1658       }
1659     }
1660   }
1661   /* copy over the B part */
1662   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1663   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1664   for (i=0; i<mbs; i++) {
1665     rvals[0] = bs*(baij->rstartbs + i);
1666     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1667     for (j=ai[i]; j<ai[i+1]; j++) {
1668       col = baij->garray[aj[j]]*bs;
1669       for (k=0; k<bs; k++) {
1670         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1671         col++;
1672         a += bs;
1673       }
1674     }
1675   }
1676   ierr = PetscFree(rvals);CHKERRQ(ierr);
1677   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1678   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1679 
1680   if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B;
1681   else {
1682     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1683   }
1684   PetscFunctionReturn(0);
1685 }
1686 
1687 #undef __FUNCT__
1688 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1689 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1690 {
1691   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1692   Mat            a     = baij->A,b = baij->B;
1693   PetscErrorCode ierr;
1694   PetscInt       s1,s2,s3;
1695 
1696   PetscFunctionBegin;
1697   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1698   if (rr) {
1699     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1700     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1701     /* Overlap communication with computation. */
1702     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1703   }
1704   if (ll) {
1705     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1706     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1707     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1708   }
1709   /* scale  the diagonal block */
1710   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1711 
1712   if (rr) {
1713     /* Do a scatter end and then right scale the off-diagonal block */
1714     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1715     ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1716   }
1717   PetscFunctionReturn(0);
1718 }
1719 
1720 #undef __FUNCT__
1721 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1722 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1723 {
1724   Mat_MPIBAIJ   *l      = (Mat_MPIBAIJ *) A->data;
1725   PetscInt      *owners = A->rmap->range;
1726   PetscInt       n      = A->rmap->n;
1727   PetscMPIInt    size   = l->size;
1728   PetscSF        sf;
1729   PetscInt      *lrows;
1730   PetscSFNode   *rrows;
1731   PetscInt       lastidx = -1, r, p = 0, len = 0;
1732   PetscErrorCode ierr;
1733 
1734   PetscFunctionBegin;
1735   /* Create SF where leaves are input rows and roots are owned rows */
1736   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1737   for (r = 0; r < n; ++r) lrows[r] = -1;
1738   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1739   for (r = 0; r < N; ++r) {
1740     const PetscInt idx   = rows[r];
1741     PetscBool      found = PETSC_FALSE;
1742     /* Trick for efficient searching for sorted rows */
1743     if (lastidx > idx) p = 0;
1744     lastidx = idx;
1745     for (; p < size; ++p) {
1746       if (idx >= owners[p] && idx < owners[p+1]) {
1747         rrows[r].rank  = p;
1748         rrows[r].index = rows[r] - owners[p];
1749         found = PETSC_TRUE;
1750         break;
1751       }
1752     }
1753     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
1754   }
1755   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1756   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1757   /* Collect flags for rows to be zeroed */
1758   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1759   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1760   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1761   /* Compress and put in row numbers */
1762   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1763   /* fix right hand side if needed */
1764   if (x && b) {
1765     const PetscScalar *xx;
1766     PetscScalar       *bb;
1767 
1768     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1769     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1770     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
1771     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1772     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1773   }
1774 
1775   /* actually zap the local rows */
1776   /*
1777         Zero the required rows. If the "diagonal block" of the matrix
1778      is square and the user wishes to set the diagonal we use separate
1779      code so that MatSetValues() is not called for each diagonal allocating
1780      new memory, thus calling lots of mallocs and slowing things down.
1781 
1782   */
1783   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1784   ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,0,0);CHKERRQ(ierr);
1785   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1786     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,0,0);CHKERRQ(ierr);
1787   } else if (diag != 0.0) {
1788     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr);
1789     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1790        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1791     for (r = 0; r < len; ++r) {
1792       const PetscInt row = lrows[r] + A->rmap->rstart;
1793       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1794     }
1795     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1796     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1797   } else {
1798     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr);
1799   }
1800   ierr = PetscFree(lrows);CHKERRQ(ierr);
1801   {
1802     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1803     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1804   }
1805   PetscFunctionReturn(0);
1806 }
1807 
1808 #undef __FUNCT__
1809 #define __FUNCT__ "MatZeroRowsColumns_MPIBAIJ"
1810 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1811 {
1812   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1813   PetscErrorCode    ierr;
1814   PetscMPIInt       size = l->size,n = A->rmap->n,lastidx = -1;
1815   PetscInt          i,j,k,r,p = 0,len = 0,row,col,count;
1816   PetscInt          *lrows,*owners = A->rmap->range;
1817   PetscSFNode       *rrows;
1818   PetscSF           sf;
1819   const PetscScalar *xx;
1820   PetscScalar       *bb,*mask;
1821   Vec               xmask,lmask;
1822   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ*)l->B->data;
1823   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1824   PetscScalar       *aa;
1825 #if defined(PETSC_DEBUG)
1826   PetscBool found = PETSC_FALSE;
1827 #endif
1828 
1829   PetscFunctionBegin;
1830   /* Create SF where leaves are input rows and roots are owned rows */
1831   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1832   for (r = 0; r < n; ++r) lrows[r] = -1;
1833   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1834   for (r = 0; r < N; ++r) {
1835     const PetscInt idx   = rows[r];
1836     PetscBool      found = PETSC_FALSE;
1837     /* Trick for efficient searching for sorted rows */
1838     if (lastidx > idx) p = 0;
1839     lastidx = idx;
1840     for (; p < size; ++p) {
1841       if (idx >= owners[p] && idx < owners[p+1]) {
1842         rrows[r].rank  = p;
1843         rrows[r].index = rows[r] - owners[p];
1844         found = PETSC_TRUE;
1845         break;
1846       }
1847     }
1848     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
1849   }
1850   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1851   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1852   /* Collect flags for rows to be zeroed */
1853   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1854   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1855   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1856   /* Compress and put in row numbers */
1857   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1858   /* zero diagonal part of matrix */
1859   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1860   /* handle off diagonal part of matrix */
1861   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
1862   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1863   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1864   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1865   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1866   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1867   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1868   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1869   if (x) {
1870     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1871     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1872     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1873     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1874   }
1875   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1876   /* remove zeroed rows of off diagonal matrix */
1877   for (i = 0; i < len; ++i) {
1878     row   = lrows[i];
1879     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1880     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1881     for (k = 0; k < count; ++k) {
1882       aa[0] = 0.0;
1883       aa   += bs;
1884     }
1885   }
1886   /* loop over all elements of off process part of matrix zeroing removed columns*/
1887   for (i = 0; i < l->B->rmap->N; ++i) {
1888     row = i/bs;
1889     for (j = baij->i[row]; j < baij->i[row+1]; ++j) {
1890       for (k = 0; k < bs; ++k) {
1891         col = bs*baij->j[j] + k;
1892         if (PetscAbsScalar(mask[col])) {
1893           aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
1894           if (b) bb[i] -= aa[0]*xx[col];
1895           aa[0] = 0.0;
1896         }
1897       }
1898     }
1899   }
1900   if (x) {
1901     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1902     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1903   }
1904   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1905   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1906   ierr = PetscFree(lrows);CHKERRQ(ierr);
1907   PetscFunctionReturn(0);
1908 }
1909 
1910 #undef __FUNCT__
1911 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1912 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1913 {
1914   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1915   PetscErrorCode ierr;
1916 
1917   PetscFunctionBegin;
1918   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1919   PetscFunctionReturn(0);
1920 }
1921 
1922 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
1923 
1924 #undef __FUNCT__
1925 #define __FUNCT__ "MatEqual_MPIBAIJ"
1926 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1927 {
1928   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1929   Mat            a,b,c,d;
1930   PetscBool      flg;
1931   PetscErrorCode ierr;
1932 
1933   PetscFunctionBegin;
1934   a = matA->A; b = matA->B;
1935   c = matB->A; d = matB->B;
1936 
1937   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1938   if (flg) {
1939     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1940   }
1941   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1942   PetscFunctionReturn(0);
1943 }
1944 
1945 #undef __FUNCT__
1946 #define __FUNCT__ "MatCopy_MPIBAIJ"
1947 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1948 {
1949   PetscErrorCode ierr;
1950   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1951   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
1952 
1953   PetscFunctionBegin;
1954   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1955   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1956     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1957   } else {
1958     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1959     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1960   }
1961   PetscFunctionReturn(0);
1962 }
1963 
1964 #undef __FUNCT__
1965 #define __FUNCT__ "MatSetUp_MPIBAIJ"
1966 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1967 {
1968   PetscErrorCode ierr;
1969 
1970   PetscFunctionBegin;
1971   ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1972   PetscFunctionReturn(0);
1973 }
1974 
1975 #undef __FUNCT__
1976 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1977 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1978 {
1979   PetscErrorCode ierr;
1980   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
1981   PetscBLASInt   bnz,one=1;
1982   Mat_SeqBAIJ    *x,*y;
1983 
1984   PetscFunctionBegin;
1985   if (str == SAME_NONZERO_PATTERN) {
1986     PetscScalar alpha = a;
1987     x    = (Mat_SeqBAIJ*)xx->A->data;
1988     y    = (Mat_SeqBAIJ*)yy->A->data;
1989     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
1990     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1991     x    = (Mat_SeqBAIJ*)xx->B->data;
1992     y    = (Mat_SeqBAIJ*)yy->B->data;
1993     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
1994     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1995   } else {
1996     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1997   }
1998   PetscFunctionReturn(0);
1999 }
2000 
2001 #undef __FUNCT__
2002 #define __FUNCT__ "MatRealPart_MPIBAIJ"
2003 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
2004 {
2005   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2006   PetscErrorCode ierr;
2007 
2008   PetscFunctionBegin;
2009   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2010   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2011   PetscFunctionReturn(0);
2012 }
2013 
2014 #undef __FUNCT__
2015 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
2016 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2017 {
2018   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2019   PetscErrorCode ierr;
2020 
2021   PetscFunctionBegin;
2022   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2023   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2024   PetscFunctionReturn(0);
2025 }
2026 
2027 #undef __FUNCT__
2028 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
2029 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2030 {
2031   PetscErrorCode ierr;
2032   IS             iscol_local;
2033   PetscInt       csize;
2034 
2035   PetscFunctionBegin;
2036   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2037   if (call == MAT_REUSE_MATRIX) {
2038     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2039     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2040   } else {
2041     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2042   }
2043   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2044   if (call == MAT_INITIAL_MATRIX) {
2045     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2046     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2047   }
2048   PetscFunctionReturn(0);
2049 }
2050 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*);
2051 #undef __FUNCT__
2052 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
2053 /*
2054   Not great since it makes two copies of the submatrix, first an SeqBAIJ
2055   in local and then by concatenating the local matrices the end result.
2056   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
2057 */
2058 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2059 {
2060   PetscErrorCode ierr;
2061   PetscMPIInt    rank,size;
2062   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2063   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow;
2064   Mat            M,Mreuse;
2065   MatScalar      *vwork,*aa;
2066   MPI_Comm       comm;
2067   IS             isrow_new, iscol_new;
2068   PetscBool      idflag,allrows, allcols;
2069   Mat_SeqBAIJ    *aij;
2070 
2071   PetscFunctionBegin;
2072   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2073   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2074   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2075   /* The compression and expansion should be avoided. Doesn't point
2076      out errors, might change the indices, hence buggey */
2077   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
2078   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
2079 
2080   /* Check for special case: each processor gets entire matrix columns */
2081   ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr);
2082   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
2083   if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE;
2084   else allcols = PETSC_FALSE;
2085 
2086   ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr);
2087   ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr);
2088   if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE;
2089   else allrows = PETSC_FALSE;
2090 
2091   if (call ==  MAT_REUSE_MATRIX) {
2092     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
2093     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2094     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2095   } else {
2096     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2097   }
2098   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
2099   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
2100   /*
2101       m - number of local rows
2102       n - number of columns (same on all processors)
2103       rstart - first row in new global matrix generated
2104   */
2105   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2106   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2107   m    = m/bs;
2108   n    = n/bs;
2109 
2110   if (call == MAT_INITIAL_MATRIX) {
2111     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2112     ii  = aij->i;
2113     jj  = aij->j;
2114 
2115     /*
2116         Determine the number of non-zeros in the diagonal and off-diagonal
2117         portions of the matrix in order to do correct preallocation
2118     */
2119 
2120     /* first get start and end of "diagonal" columns */
2121     if (csize == PETSC_DECIDE) {
2122       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2123       if (mglobal == n*bs) { /* square matrix */
2124         nlocal = m;
2125       } else {
2126         nlocal = n/size + ((n % size) > rank);
2127       }
2128     } else {
2129       nlocal = csize/bs;
2130     }
2131     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2132     rstart = rend - nlocal;
2133     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2134 
2135     /* next, compute all the lengths */
2136     ierr  = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr);
2137     for (i=0; i<m; i++) {
2138       jend = ii[i+1] - ii[i];
2139       olen = 0;
2140       dlen = 0;
2141       for (j=0; j<jend; j++) {
2142         if (*jj < rstart || *jj >= rend) olen++;
2143         else dlen++;
2144         jj++;
2145       }
2146       olens[i] = olen;
2147       dlens[i] = dlen;
2148     }
2149     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2150     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2151     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2152     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2153     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
2154   } else {
2155     PetscInt ml,nl;
2156 
2157     M    = *newmat;
2158     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2159     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2160     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2161     /*
2162          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2163        rather than the slower MatSetValues().
2164     */
2165     M->was_assembled = PETSC_TRUE;
2166     M->assembled     = PETSC_FALSE;
2167   }
2168   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2169   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2170   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
2171   ii   = aij->i;
2172   jj   = aij->j;
2173   aa   = aij->a;
2174   for (i=0; i<m; i++) {
2175     row   = rstart/bs + i;
2176     nz    = ii[i+1] - ii[i];
2177     cwork = jj;     jj += nz;
2178     vwork = aa;     aa += nz*bs*bs;
2179     ierr  = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2180   }
2181 
2182   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2183   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2184   *newmat = M;
2185 
2186   /* save submatrix used in processor for next request */
2187   if (call ==  MAT_INITIAL_MATRIX) {
2188     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2189     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2190   }
2191   PetscFunctionReturn(0);
2192 }
2193 
2194 #undef __FUNCT__
2195 #define __FUNCT__ "MatPermute_MPIBAIJ"
2196 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2197 {
2198   MPI_Comm       comm,pcomm;
2199   PetscInt       clocal_size,nrows;
2200   const PetscInt *rows;
2201   PetscMPIInt    size;
2202   IS             crowp,lcolp;
2203   PetscErrorCode ierr;
2204 
2205   PetscFunctionBegin;
2206   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2207   /* make a collective version of 'rowp' */
2208   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2209   if (pcomm==comm) {
2210     crowp = rowp;
2211   } else {
2212     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2213     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2214     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2215     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2216   }
2217   ierr = ISSetPermutation(crowp);CHKERRQ(ierr);
2218   /* make a local version of 'colp' */
2219   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2220   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2221   if (size==1) {
2222     lcolp = colp;
2223   } else {
2224     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2225   }
2226   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2227   /* now we just get the submatrix */
2228   ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr);
2229   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2230   /* clean up */
2231   if (pcomm!=comm) {
2232     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2233   }
2234   if (size>1) {
2235     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2236   }
2237   PetscFunctionReturn(0);
2238 }
2239 
2240 #undef __FUNCT__
2241 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2242 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2243 {
2244   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2245   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
2246 
2247   PetscFunctionBegin;
2248   if (nghosts) *nghosts = B->nbs;
2249   if (ghosts) *ghosts = baij->garray;
2250   PetscFunctionReturn(0);
2251 }
2252 
2253 #undef __FUNCT__
2254 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ"
2255 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2256 {
2257   Mat            B;
2258   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2259   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2260   Mat_SeqAIJ     *b;
2261   PetscErrorCode ierr;
2262   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2263   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2264   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2265 
2266   PetscFunctionBegin;
2267   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2268   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2269 
2270   /* ----------------------------------------------------------------
2271      Tell every processor the number of nonzeros per row
2272   */
2273   ierr = PetscMalloc1((A->rmap->N/bs),&lens);CHKERRQ(ierr);
2274   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2275     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2276   }
2277   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2278   ierr      = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr);
2279   displs    = recvcounts + size;
2280   for (i=0; i<size; i++) {
2281     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2282     displs[i]     = A->rmap->range[i]/bs;
2283   }
2284 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2285   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2286 #else
2287   ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2288 #endif
2289   /* ---------------------------------------------------------------
2290      Create the sequential matrix of the same type as the local block diagonal
2291   */
2292   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2293   ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2294   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2295   ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2296   b    = (Mat_SeqAIJ*)B->data;
2297 
2298   /*--------------------------------------------------------------------
2299     Copy my part of matrix column indices over
2300   */
2301   sendcount  = ad->nz + bd->nz;
2302   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2303   a_jsendbuf = ad->j;
2304   b_jsendbuf = bd->j;
2305   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2306   cnt        = 0;
2307   for (i=0; i<n; i++) {
2308 
2309     /* put in lower diagonal portion */
2310     m = bd->i[i+1] - bd->i[i];
2311     while (m > 0) {
2312       /* is it above diagonal (in bd (compressed) numbering) */
2313       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2314       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2315       m--;
2316     }
2317 
2318     /* put in diagonal portion */
2319     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2320       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2321     }
2322 
2323     /* put in upper diagonal portion */
2324     while (m-- > 0) {
2325       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2326     }
2327   }
2328   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2329 
2330   /*--------------------------------------------------------------------
2331     Gather all column indices to all processors
2332   */
2333   for (i=0; i<size; i++) {
2334     recvcounts[i] = 0;
2335     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2336       recvcounts[i] += lens[j];
2337     }
2338   }
2339   displs[0] = 0;
2340   for (i=1; i<size; i++) {
2341     displs[i] = displs[i-1] + recvcounts[i-1];
2342   }
2343 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2344   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2345 #else
2346   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2347 #endif
2348   /*--------------------------------------------------------------------
2349     Assemble the matrix into useable form (note numerical values not yet set)
2350   */
2351   /* set the b->ilen (length of each row) values */
2352   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2353   /* set the b->i indices */
2354   b->i[0] = 0;
2355   for (i=1; i<=A->rmap->N/bs; i++) {
2356     b->i[i] = b->i[i-1] + lens[i-1];
2357   }
2358   ierr = PetscFree(lens);CHKERRQ(ierr);
2359   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2360   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2361   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2362 
2363   if (A->symmetric) {
2364     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2365   } else if (A->hermitian) {
2366     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2367   } else if (A->structurally_symmetric) {
2368     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2369   }
2370   *newmat = B;
2371   PetscFunctionReturn(0);
2372 }
2373 
2374 #undef __FUNCT__
2375 #define __FUNCT__ "MatSOR_MPIBAIJ"
2376 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2377 {
2378   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2379   PetscErrorCode ierr;
2380   Vec            bb1 = 0;
2381 
2382   PetscFunctionBegin;
2383   if (flag == SOR_APPLY_UPPER) {
2384     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2385     PetscFunctionReturn(0);
2386   }
2387 
2388   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2389     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2390   }
2391 
2392   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2393     if (flag & SOR_ZERO_INITIAL_GUESS) {
2394       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2395       its--;
2396     }
2397 
2398     while (its--) {
2399       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2400       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2401 
2402       /* update rhs: bb1 = bb - B*x */
2403       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2404       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2405 
2406       /* local sweep */
2407       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2408     }
2409   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2410     if (flag & SOR_ZERO_INITIAL_GUESS) {
2411       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2412       its--;
2413     }
2414     while (its--) {
2415       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2416       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2417 
2418       /* update rhs: bb1 = bb - B*x */
2419       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2420       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2421 
2422       /* local sweep */
2423       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2424     }
2425   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2426     if (flag & SOR_ZERO_INITIAL_GUESS) {
2427       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2428       its--;
2429     }
2430     while (its--) {
2431       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2432       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2433 
2434       /* update rhs: bb1 = bb - B*x */
2435       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2436       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2437 
2438       /* local sweep */
2439       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2440     }
2441   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2442 
2443   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2444   PetscFunctionReturn(0);
2445 }
2446 
2447 #undef __FUNCT__
2448 #define __FUNCT__ "MatGetColumnNorms_MPIBAIJ"
2449 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms)
2450 {
2451   PetscErrorCode ierr;
2452   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)A->data;
2453   PetscInt       N,i,*garray = aij->garray;
2454   PetscInt       ib,jb,bs = A->rmap->bs;
2455   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) aij->A->data;
2456   MatScalar      *a_val = a_aij->a;
2457   Mat_SeqBAIJ    *b_aij = (Mat_SeqBAIJ*) aij->B->data;
2458   MatScalar      *b_val = b_aij->a;
2459   PetscReal      *work;
2460 
2461   PetscFunctionBegin;
2462   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
2463   ierr = PetscCalloc1(N,&work);CHKERRQ(ierr);
2464   if (type == NORM_2) {
2465     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2466       for (jb=0; jb<bs; jb++) {
2467         for (ib=0; ib<bs; ib++) {
2468           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2469           a_val++;
2470         }
2471       }
2472     }
2473     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2474       for (jb=0; jb<bs; jb++) {
2475         for (ib=0; ib<bs; ib++) {
2476           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2477           b_val++;
2478         }
2479       }
2480     }
2481   } else if (type == NORM_1) {
2482     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2483       for (jb=0; jb<bs; jb++) {
2484         for (ib=0; ib<bs; ib++) {
2485           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2486           a_val++;
2487         }
2488       }
2489     }
2490     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2491       for (jb=0; jb<bs; jb++) {
2492        for (ib=0; ib<bs; ib++) {
2493           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2494           b_val++;
2495         }
2496       }
2497     }
2498   } else if (type == NORM_INFINITY) {
2499     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2500       for (jb=0; jb<bs; jb++) {
2501         for (ib=0; ib<bs; ib++) {
2502           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2503           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2504           a_val++;
2505         }
2506       }
2507     }
2508     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2509       for (jb=0; jb<bs; jb++) {
2510         for (ib=0; ib<bs; ib++) {
2511           int col = garray[b_aij->j[i]] * bs + jb;
2512           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2513           b_val++;
2514         }
2515       }
2516     }
2517   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
2518   if (type == NORM_INFINITY) {
2519     ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2520   } else {
2521     ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2522   }
2523   ierr = PetscFree(work);CHKERRQ(ierr);
2524   if (type == NORM_2) {
2525     for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]);
2526   }
2527   PetscFunctionReturn(0);
2528 }
2529 
2530 #undef __FUNCT__
2531 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ"
2532 PetscErrorCode  MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2533 {
2534   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2535   PetscErrorCode ierr;
2536 
2537   PetscFunctionBegin;
2538   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2539   PetscFunctionReturn(0);
2540 }
2541 
2542 
2543 /* -------------------------------------------------------------------*/
2544 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2545                                        MatGetRow_MPIBAIJ,
2546                                        MatRestoreRow_MPIBAIJ,
2547                                        MatMult_MPIBAIJ,
2548                                 /* 4*/ MatMultAdd_MPIBAIJ,
2549                                        MatMultTranspose_MPIBAIJ,
2550                                        MatMultTransposeAdd_MPIBAIJ,
2551                                        0,
2552                                        0,
2553                                        0,
2554                                 /*10*/ 0,
2555                                        0,
2556                                        0,
2557                                        MatSOR_MPIBAIJ,
2558                                        MatTranspose_MPIBAIJ,
2559                                 /*15*/ MatGetInfo_MPIBAIJ,
2560                                        MatEqual_MPIBAIJ,
2561                                        MatGetDiagonal_MPIBAIJ,
2562                                        MatDiagonalScale_MPIBAIJ,
2563                                        MatNorm_MPIBAIJ,
2564                                 /*20*/ MatAssemblyBegin_MPIBAIJ,
2565                                        MatAssemblyEnd_MPIBAIJ,
2566                                        MatSetOption_MPIBAIJ,
2567                                        MatZeroEntries_MPIBAIJ,
2568                                 /*24*/ MatZeroRows_MPIBAIJ,
2569                                        0,
2570                                        0,
2571                                        0,
2572                                        0,
2573                                 /*29*/ MatSetUp_MPIBAIJ,
2574                                        0,
2575                                        0,
2576                                        0,
2577                                        0,
2578                                 /*34*/ MatDuplicate_MPIBAIJ,
2579                                        0,
2580                                        0,
2581                                        0,
2582                                        0,
2583                                 /*39*/ MatAXPY_MPIBAIJ,
2584                                        MatGetSubMatrices_MPIBAIJ,
2585                                        MatIncreaseOverlap_MPIBAIJ,
2586                                        MatGetValues_MPIBAIJ,
2587                                        MatCopy_MPIBAIJ,
2588                                 /*44*/ 0,
2589                                        MatScale_MPIBAIJ,
2590                                        0,
2591                                        0,
2592                                        MatZeroRowsColumns_MPIBAIJ,
2593                                 /*49*/ 0,
2594                                        0,
2595                                        0,
2596                                        0,
2597                                        0,
2598                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2599                                        0,
2600                                        MatSetUnfactored_MPIBAIJ,
2601                                        MatPermute_MPIBAIJ,
2602                                        MatSetValuesBlocked_MPIBAIJ,
2603                                 /*59*/ MatGetSubMatrix_MPIBAIJ,
2604                                        MatDestroy_MPIBAIJ,
2605                                        MatView_MPIBAIJ,
2606                                        0,
2607                                        0,
2608                                 /*64*/ 0,
2609                                        0,
2610                                        0,
2611                                        0,
2612                                        0,
2613                                 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2614                                        0,
2615                                        0,
2616                                        0,
2617                                        0,
2618                                 /*74*/ 0,
2619                                        MatFDColoringApply_BAIJ,
2620                                        0,
2621                                        0,
2622                                        0,
2623                                 /*79*/ 0,
2624                                        0,
2625                                        0,
2626                                        0,
2627                                        MatLoad_MPIBAIJ,
2628                                 /*84*/ 0,
2629                                        0,
2630                                        0,
2631                                        0,
2632                                        0,
2633                                 /*89*/ 0,
2634                                        0,
2635                                        0,
2636                                        0,
2637                                        0,
2638                                 /*94*/ 0,
2639                                        0,
2640                                        0,
2641                                        0,
2642                                        0,
2643                                 /*99*/ 0,
2644                                        0,
2645                                        0,
2646                                        0,
2647                                        0,
2648                                 /*104*/0,
2649                                        MatRealPart_MPIBAIJ,
2650                                        MatImaginaryPart_MPIBAIJ,
2651                                        0,
2652                                        0,
2653                                 /*109*/0,
2654                                        0,
2655                                        0,
2656                                        0,
2657                                        0,
2658                                 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2659                                        0,
2660                                        MatGetGhosts_MPIBAIJ,
2661                                        0,
2662                                        0,
2663                                 /*119*/0,
2664                                        0,
2665                                        0,
2666                                        0,
2667                                        MatGetMultiProcBlock_MPIBAIJ,
2668                                 /*124*/0,
2669                                        MatGetColumnNorms_MPIBAIJ,
2670                                        MatInvertBlockDiagonal_MPIBAIJ,
2671                                        0,
2672                                        0,
2673                                /*129*/ 0,
2674                                        0,
2675                                        0,
2676                                        0,
2677                                        0,
2678                                /*134*/ 0,
2679                                        0,
2680                                        0,
2681                                        0,
2682                                        0,
2683                                /*139*/ 0,
2684                                        0,
2685                                        0,
2686                                        MatFDColoringSetUp_MPIXAIJ
2687 };
2688 
2689 #undef __FUNCT__
2690 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2691 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2692 {
2693   PetscFunctionBegin;
2694   *a = ((Mat_MPIBAIJ*)A->data)->A;
2695   PetscFunctionReturn(0);
2696 }
2697 
2698 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2699 
2700 #undef __FUNCT__
2701 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2702 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2703 {
2704   PetscInt       m,rstart,cstart,cend;
2705   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2706   const PetscInt *JJ    =0;
2707   PetscScalar    *values=0;
2708   PetscBool      roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2709   PetscErrorCode ierr;
2710 
2711   PetscFunctionBegin;
2712   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2713   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2714   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2715   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2716   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2717   m      = B->rmap->n/bs;
2718   rstart = B->rmap->rstart/bs;
2719   cstart = B->cmap->rstart/bs;
2720   cend   = B->cmap->rend/bs;
2721 
2722   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2723   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2724   for (i=0; i<m; i++) {
2725     nz = ii[i+1] - ii[i];
2726     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2727     nz_max = PetscMax(nz_max,nz);
2728     JJ     = jj + ii[i];
2729     for (j=0; j<nz; j++) {
2730       if (*JJ >= cstart) break;
2731       JJ++;
2732     }
2733     d = 0;
2734     for (; j<nz; j++) {
2735       if (*JJ++ >= cend) break;
2736       d++;
2737     }
2738     d_nnz[i] = d;
2739     o_nnz[i] = nz - d;
2740   }
2741   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2742   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2743 
2744   values = (PetscScalar*)V;
2745   if (!values) {
2746     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2747     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2748   }
2749   for (i=0; i<m; i++) {
2750     PetscInt          row    = i + rstart;
2751     PetscInt          ncols  = ii[i+1] - ii[i];
2752     const PetscInt    *icols = jj + ii[i];
2753     if (!roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2754       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2755       ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2756     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2757       PetscInt j;
2758       for (j=0; j<ncols; j++) {
2759         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2760         ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2761       }
2762     }
2763   }
2764 
2765   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2766   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2767   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2768   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2769   PetscFunctionReturn(0);
2770 }
2771 
2772 #undef __FUNCT__
2773 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2774 /*@C
2775    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2776    (the default parallel PETSc format).
2777 
2778    Collective on MPI_Comm
2779 
2780    Input Parameters:
2781 +  A - the matrix
2782 .  bs - the block size
2783 .  i - the indices into j for the start of each local row (starts with zero)
2784 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2785 -  v - optional values in the matrix
2786 
2787    Level: developer
2788 
2789    Notes: The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
2790    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
2791    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2792    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2793    block column and the second index is over columns within a block.
2794 
2795 .keywords: matrix, aij, compressed row, sparse, parallel
2796 
2797 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ
2798 @*/
2799 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2800 {
2801   PetscErrorCode ierr;
2802 
2803   PetscFunctionBegin;
2804   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2805   PetscValidType(B,1);
2806   PetscValidLogicalCollectiveInt(B,bs,2);
2807   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2808   PetscFunctionReturn(0);
2809 }
2810 
2811 #undef __FUNCT__
2812 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2813 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2814 {
2815   Mat_MPIBAIJ    *b;
2816   PetscErrorCode ierr;
2817   PetscInt       i;
2818 
2819   PetscFunctionBegin;
2820   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2821   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2822   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2823   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2824 
2825   if (d_nnz) {
2826     for (i=0; i<B->rmap->n/bs; i++) {
2827       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2828     }
2829   }
2830   if (o_nnz) {
2831     for (i=0; i<B->rmap->n/bs; i++) {
2832       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2833     }
2834   }
2835 
2836   b      = (Mat_MPIBAIJ*)B->data;
2837   b->bs2 = bs*bs;
2838   b->mbs = B->rmap->n/bs;
2839   b->nbs = B->cmap->n/bs;
2840   b->Mbs = B->rmap->N/bs;
2841   b->Nbs = B->cmap->N/bs;
2842 
2843   for (i=0; i<=b->size; i++) {
2844     b->rangebs[i] = B->rmap->range[i]/bs;
2845   }
2846   b->rstartbs = B->rmap->rstart/bs;
2847   b->rendbs   = B->rmap->rend/bs;
2848   b->cstartbs = B->cmap->rstart/bs;
2849   b->cendbs   = B->cmap->rend/bs;
2850 
2851   if (!B->preallocated) {
2852     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2853     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2854     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2855     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2856     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2857     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2858     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2859     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2860     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2861   }
2862 
2863   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2864   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2865   B->preallocated = PETSC_TRUE;
2866   PetscFunctionReturn(0);
2867 }
2868 
2869 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2870 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2871 
2872 #undef __FUNCT__
2873 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
2874 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
2875 {
2876   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2877   PetscErrorCode ierr;
2878   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2879   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2880   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2881 
2882   PetscFunctionBegin;
2883   ierr  = PetscMalloc1((M+1),&ii);CHKERRQ(ierr);
2884   ii[0] = 0;
2885   for (i=0; i<M; i++) {
2886     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2887     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
2888     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2889     /* remove one from count of matrix has diagonal */
2890     for (j=id[i]; j<id[i+1]; j++) {
2891       if (jd[j] == i) {ii[i+1]--;break;}
2892     }
2893   }
2894   ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr);
2895   cnt  = 0;
2896   for (i=0; i<M; i++) {
2897     for (j=io[i]; j<io[i+1]; j++) {
2898       if (garray[jo[j]] > rstart) break;
2899       jj[cnt++] = garray[jo[j]];
2900     }
2901     for (k=id[i]; k<id[i+1]; k++) {
2902       if (jd[k] != i) {
2903         jj[cnt++] = rstart + jd[k];
2904       }
2905     }
2906     for (; j<io[i+1]; j++) {
2907       jj[cnt++] = garray[jo[j]];
2908     }
2909   }
2910   ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr);
2911   PetscFunctionReturn(0);
2912 }
2913 
2914 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2915 
2916 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
2917 
2918 #undef __FUNCT__
2919 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ"
2920 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
2921 {
2922   PetscErrorCode ierr;
2923   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2924   Mat            B;
2925   Mat_MPIAIJ     *b;
2926 
2927   PetscFunctionBegin;
2928   if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
2929 
2930   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2931   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2932   ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2933   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
2934   ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
2935   b    = (Mat_MPIAIJ*) B->data;
2936 
2937   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
2938   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2939   ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
2940   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
2941   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
2942   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2943   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2944   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2945   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2946   if (reuse == MAT_REUSE_MATRIX) {
2947     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
2948   } else {
2949    *newmat = B;
2950   }
2951   PetscFunctionReturn(0);
2952 }
2953 
2954 #if defined(PETSC_HAVE_MUMPS)
2955 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
2956 #endif
2957 
2958 /*MC
2959    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2960 
2961    Options Database Keys:
2962 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2963 . -mat_block_size <bs> - set the blocksize used to store the matrix
2964 - -mat_use_hash_table <fact>
2965 
2966   Level: beginner
2967 
2968 .seealso: MatCreateMPIBAIJ
2969 M*/
2970 
2971 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
2972 
2973 #undef __FUNCT__
2974 #define __FUNCT__ "MatCreate_MPIBAIJ"
2975 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2976 {
2977   Mat_MPIBAIJ    *b;
2978   PetscErrorCode ierr;
2979   PetscBool      flg;
2980 
2981   PetscFunctionBegin;
2982   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2983   B->data = (void*)b;
2984 
2985   ierr         = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2986   B->assembled = PETSC_FALSE;
2987 
2988   B->insertmode = NOT_SET_VALUES;
2989   ierr          = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
2990   ierr          = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
2991 
2992   /* build local table of row and column ownerships */
2993   ierr = PetscMalloc1((b->size+1),&b->rangebs);CHKERRQ(ierr);
2994 
2995   /* build cache for off array entries formed */
2996   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
2997 
2998   b->donotstash  = PETSC_FALSE;
2999   b->colmap      = NULL;
3000   b->garray      = NULL;
3001   b->roworiented = PETSC_TRUE;
3002 
3003   /* stuff used in block assembly */
3004   b->barray = 0;
3005 
3006   /* stuff used for matrix vector multiply */
3007   b->lvec  = 0;
3008   b->Mvctx = 0;
3009 
3010   /* stuff for MatGetRow() */
3011   b->rowindices   = 0;
3012   b->rowvalues    = 0;
3013   b->getrowactive = PETSC_FALSE;
3014 
3015   /* hash table stuff */
3016   b->ht           = 0;
3017   b->hd           = 0;
3018   b->ht_size      = 0;
3019   b->ht_flag      = PETSC_FALSE;
3020   b->ht_fact      = 0;
3021   b->ht_total_ct  = 0;
3022   b->ht_insert_ct = 0;
3023 
3024   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
3025   b->ijonly = PETSC_FALSE;
3026 
3027   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3028   ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr);
3029   if (flg) {
3030     PetscReal fact = 1.39;
3031     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3032     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
3033     if (fact <= 1.0) fact = 1.39;
3034     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3035     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3036   }
3037   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3038 
3039 #if defined(PETSC_HAVE_MUMPS)
3040   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_baij_mumps);CHKERRQ(ierr);
3041 #endif
3042   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3043   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3044   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3045   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3046   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3047   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3048   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3049   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3050   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3051   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3052   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr);
3053   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3054   PetscFunctionReturn(0);
3055 }
3056 
3057 /*MC
3058    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3059 
3060    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3061    and MATMPIBAIJ otherwise.
3062 
3063    Options Database Keys:
3064 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3065 
3066   Level: beginner
3067 
3068 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3069 M*/
3070 
3071 #undef __FUNCT__
3072 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3073 /*@C
3074    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3075    (block compressed row).  For good matrix assembly performance
3076    the user should preallocate the matrix storage by setting the parameters
3077    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3078    performance can be increased by more than a factor of 50.
3079 
3080    Collective on Mat
3081 
3082    Input Parameters:
3083 +  A - the matrix
3084 .  bs   - size of block
3085 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3086            submatrix  (same for all local rows)
3087 .  d_nnz - array containing the number of block nonzeros in the various block rows
3088            of the in diagonal portion of the local (possibly different for each block
3089            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3090            set it even if it is zero.
3091 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3092            submatrix (same for all local rows).
3093 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3094            off-diagonal portion of the local submatrix (possibly different for
3095            each block row) or NULL.
3096 
3097    If the *_nnz parameter is given then the *_nz parameter is ignored
3098 
3099    Options Database Keys:
3100 +   -mat_block_size - size of the blocks to use
3101 -   -mat_use_hash_table <fact>
3102 
3103    Notes:
3104    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3105    than it must be used on all processors that share the object for that argument.
3106 
3107    Storage Information:
3108    For a square global matrix we define each processor's diagonal portion
3109    to be its local rows and the corresponding columns (a square submatrix);
3110    each processor's off-diagonal portion encompasses the remainder of the
3111    local matrix (a rectangular submatrix).
3112 
3113    The user can specify preallocated storage for the diagonal part of
3114    the local submatrix with either d_nz or d_nnz (not both).  Set
3115    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3116    memory allocation.  Likewise, specify preallocated storage for the
3117    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3118 
3119    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3120    the figure below we depict these three local rows and all columns (0-11).
3121 
3122 .vb
3123            0 1 2 3 4 5 6 7 8 9 10 11
3124           --------------------------
3125    row 3  |o o o d d d o o o o  o  o
3126    row 4  |o o o d d d o o o o  o  o
3127    row 5  |o o o d d d o o o o  o  o
3128           --------------------------
3129 .ve
3130 
3131    Thus, any entries in the d locations are stored in the d (diagonal)
3132    submatrix, and any entries in the o locations are stored in the
3133    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3134    stored simply in the MATSEQBAIJ format for compressed row storage.
3135 
3136    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3137    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3138    In general, for PDE problems in which most nonzeros are near the diagonal,
3139    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3140    or you will get TERRIBLE performance; see the users' manual chapter on
3141    matrices.
3142 
3143    You can call MatGetInfo() to get information on how effective the preallocation was;
3144    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3145    You can also run with the option -info and look for messages with the string
3146    malloc in them to see if additional memory allocation was needed.
3147 
3148    Level: intermediate
3149 
3150 .keywords: matrix, block, aij, compressed row, sparse, parallel
3151 
3152 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3153 @*/
3154 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3155 {
3156   PetscErrorCode ierr;
3157 
3158   PetscFunctionBegin;
3159   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3160   PetscValidType(B,1);
3161   PetscValidLogicalCollectiveInt(B,bs,2);
3162   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3163   PetscFunctionReturn(0);
3164 }
3165 
3166 #undef __FUNCT__
3167 #define __FUNCT__ "MatCreateBAIJ"
3168 /*@C
3169    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3170    (block compressed row).  For good matrix assembly performance
3171    the user should preallocate the matrix storage by setting the parameters
3172    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3173    performance can be increased by more than a factor of 50.
3174 
3175    Collective on MPI_Comm
3176 
3177    Input Parameters:
3178 +  comm - MPI communicator
3179 .  bs   - size of blockk
3180 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3181            This value should be the same as the local size used in creating the
3182            y vector for the matrix-vector product y = Ax.
3183 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3184            This value should be the same as the local size used in creating the
3185            x vector for the matrix-vector product y = Ax.
3186 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3187 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3188 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3189            submatrix  (same for all local rows)
3190 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3191            of the in diagonal portion of the local (possibly different for each block
3192            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3193            and set it even if it is zero.
3194 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3195            submatrix (same for all local rows).
3196 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3197            off-diagonal portion of the local submatrix (possibly different for
3198            each block row) or NULL.
3199 
3200    Output Parameter:
3201 .  A - the matrix
3202 
3203    Options Database Keys:
3204 +   -mat_block_size - size of the blocks to use
3205 -   -mat_use_hash_table <fact>
3206 
3207    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3208    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3209    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3210 
3211    Notes:
3212    If the *_nnz parameter is given then the *_nz parameter is ignored
3213 
3214    A nonzero block is any block that as 1 or more nonzeros in it
3215 
3216    The user MUST specify either the local or global matrix dimensions
3217    (possibly both).
3218 
3219    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3220    than it must be used on all processors that share the object for that argument.
3221 
3222    Storage Information:
3223    For a square global matrix we define each processor's diagonal portion
3224    to be its local rows and the corresponding columns (a square submatrix);
3225    each processor's off-diagonal portion encompasses the remainder of the
3226    local matrix (a rectangular submatrix).
3227 
3228    The user can specify preallocated storage for the diagonal part of
3229    the local submatrix with either d_nz or d_nnz (not both).  Set
3230    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3231    memory allocation.  Likewise, specify preallocated storage for the
3232    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3233 
3234    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3235    the figure below we depict these three local rows and all columns (0-11).
3236 
3237 .vb
3238            0 1 2 3 4 5 6 7 8 9 10 11
3239           --------------------------
3240    row 3  |o o o d d d o o o o  o  o
3241    row 4  |o o o d d d o o o o  o  o
3242    row 5  |o o o d d d o o o o  o  o
3243           --------------------------
3244 .ve
3245 
3246    Thus, any entries in the d locations are stored in the d (diagonal)
3247    submatrix, and any entries in the o locations are stored in the
3248    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3249    stored simply in the MATSEQBAIJ format for compressed row storage.
3250 
3251    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3252    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3253    In general, for PDE problems in which most nonzeros are near the diagonal,
3254    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3255    or you will get TERRIBLE performance; see the users' manual chapter on
3256    matrices.
3257 
3258    Level: intermediate
3259 
3260 .keywords: matrix, block, aij, compressed row, sparse, parallel
3261 
3262 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3263 @*/
3264 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3265 {
3266   PetscErrorCode ierr;
3267   PetscMPIInt    size;
3268 
3269   PetscFunctionBegin;
3270   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3271   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3272   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3273   if (size > 1) {
3274     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3275     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3276   } else {
3277     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3278     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3279   }
3280   PetscFunctionReturn(0);
3281 }
3282 
3283 #undef __FUNCT__
3284 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3285 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3286 {
3287   Mat            mat;
3288   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3289   PetscErrorCode ierr;
3290   PetscInt       len=0;
3291 
3292   PetscFunctionBegin;
3293   *newmat = 0;
3294   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3295   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3296   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3297   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3298 
3299   mat->factortype   = matin->factortype;
3300   mat->preallocated = PETSC_TRUE;
3301   mat->assembled    = PETSC_TRUE;
3302   mat->insertmode   = NOT_SET_VALUES;
3303 
3304   a             = (Mat_MPIBAIJ*)mat->data;
3305   mat->rmap->bs = matin->rmap->bs;
3306   a->bs2        = oldmat->bs2;
3307   a->mbs        = oldmat->mbs;
3308   a->nbs        = oldmat->nbs;
3309   a->Mbs        = oldmat->Mbs;
3310   a->Nbs        = oldmat->Nbs;
3311 
3312   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3313   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3314 
3315   a->size         = oldmat->size;
3316   a->rank         = oldmat->rank;
3317   a->donotstash   = oldmat->donotstash;
3318   a->roworiented  = oldmat->roworiented;
3319   a->rowindices   = 0;
3320   a->rowvalues    = 0;
3321   a->getrowactive = PETSC_FALSE;
3322   a->barray       = 0;
3323   a->rstartbs     = oldmat->rstartbs;
3324   a->rendbs       = oldmat->rendbs;
3325   a->cstartbs     = oldmat->cstartbs;
3326   a->cendbs       = oldmat->cendbs;
3327 
3328   /* hash table stuff */
3329   a->ht           = 0;
3330   a->hd           = 0;
3331   a->ht_size      = 0;
3332   a->ht_flag      = oldmat->ht_flag;
3333   a->ht_fact      = oldmat->ht_fact;
3334   a->ht_total_ct  = 0;
3335   a->ht_insert_ct = 0;
3336 
3337   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3338   if (oldmat->colmap) {
3339 #if defined(PETSC_USE_CTABLE)
3340     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3341 #else
3342     ierr = PetscMalloc1((a->Nbs),&a->colmap);CHKERRQ(ierr);
3343     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3344     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3345 #endif
3346   } else a->colmap = 0;
3347 
3348   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3349     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
3350     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3351     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3352   } else a->garray = 0;
3353 
3354   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3355   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3356   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3357   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3358   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3359 
3360   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3361   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3362   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3363   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3364   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3365   *newmat = mat;
3366   PetscFunctionReturn(0);
3367 }
3368 
3369 #undef __FUNCT__
3370 #define __FUNCT__ "MatLoad_MPIBAIJ"
3371 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3372 {
3373   PetscErrorCode ierr;
3374   int            fd;
3375   PetscInt       i,nz,j,rstart,rend;
3376   PetscScalar    *vals,*buf;
3377   MPI_Comm       comm;
3378   MPI_Status     status;
3379   PetscMPIInt    rank,size,maxnz;
3380   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3381   PetscInt       *locrowlens = NULL,*procsnz = NULL,*browners = NULL;
3382   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3383   PetscMPIInt    tag    = ((PetscObject)viewer)->tag;
3384   PetscInt       *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount;
3385   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3386 
3387   PetscFunctionBegin;
3388   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3389   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3390   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3391   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3392 
3393   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3394   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3395   if (!rank) {
3396     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3397     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3398     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3399   }
3400 
3401   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3402 
3403   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3404   M    = header[1]; N = header[2];
3405 
3406   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3407   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3408   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3409 
3410   /* If global sizes are set, check if they are consistent with that given in the file */
3411   if (sizesset) {
3412     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3413   }
3414   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3415   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3416 
3417   if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices");
3418 
3419   /*
3420      This code adds extra rows to make sure the number of rows is
3421      divisible by the blocksize
3422   */
3423   Mbs        = M/bs;
3424   extra_rows = bs - M + bs*Mbs;
3425   if (extra_rows == bs) extra_rows = 0;
3426   else                  Mbs++;
3427   if (extra_rows && !rank) {
3428     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3429   }
3430 
3431   /* determine ownership of all rows */
3432   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3433     mbs = Mbs/size + ((Mbs % size) > rank);
3434     m   = mbs*bs;
3435   } else { /* User set */
3436     m   = newmat->rmap->n;
3437     mbs = m/bs;
3438   }
3439   ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
3440   ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3441 
3442   /* process 0 needs enough room for process with most rows */
3443   if (!rank) {
3444     mmax = rowners[1];
3445     for (i=2; i<=size; i++) {
3446       mmax = PetscMax(mmax,rowners[i]);
3447     }
3448     mmax*=bs;
3449   } else mmax = -1;             /* unused, but compiler warns anyway */
3450 
3451   rowners[0] = 0;
3452   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
3453   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
3454   rstart = rowners[rank];
3455   rend   = rowners[rank+1];
3456 
3457   /* distribute row lengths to all processors */
3458   ierr = PetscMalloc1(m,&locrowlens);CHKERRQ(ierr);
3459   if (!rank) {
3460     mend = m;
3461     if (size == 1) mend = mend - extra_rows;
3462     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3463     for (j=mend; j<m; j++) locrowlens[j] = 1;
3464     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3465     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3466     for (j=0; j<m; j++) {
3467       procsnz[0] += locrowlens[j];
3468     }
3469     for (i=1; i<size; i++) {
3470       mend = browners[i+1] - browners[i];
3471       if (i == size-1) mend = mend - extra_rows;
3472       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3473       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3474       /* calculate the number of nonzeros on each processor */
3475       for (j=0; j<browners[i+1]-browners[i]; j++) {
3476         procsnz[i] += rowlengths[j];
3477       }
3478       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3479     }
3480     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3481   } else {
3482     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3483   }
3484 
3485   if (!rank) {
3486     /* determine max buffer needed and allocate it */
3487     maxnz = procsnz[0];
3488     for (i=1; i<size; i++) {
3489       maxnz = PetscMax(maxnz,procsnz[i]);
3490     }
3491     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3492 
3493     /* read in my part of the matrix column indices  */
3494     nz     = procsnz[0];
3495     ierr   = PetscMalloc1((nz+1),&ibuf);CHKERRQ(ierr);
3496     mycols = ibuf;
3497     if (size == 1) nz -= extra_rows;
3498     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3499     if (size == 1) {
3500       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
3501     }
3502 
3503     /* read in every ones (except the last) and ship off */
3504     for (i=1; i<size-1; i++) {
3505       nz   = procsnz[i];
3506       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3507       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3508     }
3509     /* read in the stuff for the last proc */
3510     if (size != 1) {
3511       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3512       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3513       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3514       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3515     }
3516     ierr = PetscFree(cols);CHKERRQ(ierr);
3517   } else {
3518     /* determine buffer space needed for message */
3519     nz = 0;
3520     for (i=0; i<m; i++) {
3521       nz += locrowlens[i];
3522     }
3523     ierr   = PetscMalloc1((nz+1),&ibuf);CHKERRQ(ierr);
3524     mycols = ibuf;
3525     /* receive message of column indices*/
3526     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3527     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3528     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3529   }
3530 
3531   /* loop over local rows, determining number of off diagonal entries */
3532   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
3533   ierr     = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
3534   rowcount = 0; nzcount = 0;
3535   for (i=0; i<mbs; i++) {
3536     dcount  = 0;
3537     odcount = 0;
3538     for (j=0; j<bs; j++) {
3539       kmax = locrowlens[rowcount];
3540       for (k=0; k<kmax; k++) {
3541         tmp = mycols[nzcount++]/bs;
3542         if (!mask[tmp]) {
3543           mask[tmp] = 1;
3544           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3545           else masked1[dcount++] = tmp;
3546         }
3547       }
3548       rowcount++;
3549     }
3550 
3551     dlens[i]  = dcount;
3552     odlens[i] = odcount;
3553 
3554     /* zero out the mask elements we set */
3555     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3556     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3557   }
3558 
3559 
3560   if (!sizesset) {
3561     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3562   }
3563   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3564 
3565   if (!rank) {
3566     ierr = PetscMalloc1((maxnz+1),&buf);CHKERRQ(ierr);
3567     /* read in my part of the matrix numerical values  */
3568     nz     = procsnz[0];
3569     vals   = buf;
3570     mycols = ibuf;
3571     if (size == 1) nz -= extra_rows;
3572     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3573     if (size == 1) {
3574       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
3575     }
3576 
3577     /* insert into matrix */
3578     jj = rstart*bs;
3579     for (i=0; i<m; i++) {
3580       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3581       mycols += locrowlens[i];
3582       vals   += locrowlens[i];
3583       jj++;
3584     }
3585     /* read in other processors (except the last one) and ship out */
3586     for (i=1; i<size-1; i++) {
3587       nz   = procsnz[i];
3588       vals = buf;
3589       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3590       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3591     }
3592     /* the last proc */
3593     if (size != 1) {
3594       nz   = procsnz[i] - extra_rows;
3595       vals = buf;
3596       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3597       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3598       ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3599     }
3600     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3601   } else {
3602     /* receive numeric values */
3603     ierr = PetscMalloc1((nz+1),&buf);CHKERRQ(ierr);
3604 
3605     /* receive message of values*/
3606     vals   = buf;
3607     mycols = ibuf;
3608     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3609 
3610     /* insert into matrix */
3611     jj = rstart*bs;
3612     for (i=0; i<m; i++) {
3613       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3614       mycols += locrowlens[i];
3615       vals   += locrowlens[i];
3616       jj++;
3617     }
3618   }
3619   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3620   ierr = PetscFree(buf);CHKERRQ(ierr);
3621   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3622   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3623   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3624   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3625   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3626   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3627   PetscFunctionReturn(0);
3628 }
3629 
3630 #undef __FUNCT__
3631 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3632 /*@
3633    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3634 
3635    Input Parameters:
3636 .  mat  - the matrix
3637 .  fact - factor
3638 
3639    Not Collective, each process can use a different factor
3640 
3641    Level: advanced
3642 
3643   Notes:
3644    This can also be set by the command line option: -mat_use_hash_table <fact>
3645 
3646 .keywords: matrix, hashtable, factor, HT
3647 
3648 .seealso: MatSetOption()
3649 @*/
3650 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3651 {
3652   PetscErrorCode ierr;
3653 
3654   PetscFunctionBegin;
3655   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3656   PetscFunctionReturn(0);
3657 }
3658 
3659 #undef __FUNCT__
3660 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3661 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3662 {
3663   Mat_MPIBAIJ *baij;
3664 
3665   PetscFunctionBegin;
3666   baij          = (Mat_MPIBAIJ*)mat->data;
3667   baij->ht_fact = fact;
3668   PetscFunctionReturn(0);
3669 }
3670 
3671 #undef __FUNCT__
3672 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3673 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3674 {
3675   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
3676 
3677   PetscFunctionBegin;
3678   *Ad     = a->A;
3679   *Ao     = a->B;
3680   *colmap = a->garray;
3681   PetscFunctionReturn(0);
3682 }
3683 
3684 /*
3685     Special version for direct calls from Fortran (to eliminate two function call overheads
3686 */
3687 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3688 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3689 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3690 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3691 #endif
3692 
3693 #undef __FUNCT__
3694 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3695 /*@C
3696   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3697 
3698   Collective on Mat
3699 
3700   Input Parameters:
3701 + mat - the matrix
3702 . min - number of input rows
3703 . im - input rows
3704 . nin - number of input columns
3705 . in - input columns
3706 . v - numerical values input
3707 - addvin - INSERT_VALUES or ADD_VALUES
3708 
3709   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3710 
3711   Level: advanced
3712 
3713 .seealso:   MatSetValuesBlocked()
3714 @*/
3715 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3716 {
3717   /* convert input arguments to C version */
3718   Mat        mat  = *matin;
3719   PetscInt   m    = *min, n = *nin;
3720   InsertMode addv = *addvin;
3721 
3722   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3723   const MatScalar *value;
3724   MatScalar       *barray     = baij->barray;
3725   PetscBool       roworiented = baij->roworiented;
3726   PetscErrorCode  ierr;
3727   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3728   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3729   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3730 
3731   PetscFunctionBegin;
3732   /* tasks normally handled by MatSetValuesBlocked() */
3733   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3734 #if defined(PETSC_USE_DEBUG)
3735   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3736   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3737 #endif
3738   if (mat->assembled) {
3739     mat->was_assembled = PETSC_TRUE;
3740     mat->assembled     = PETSC_FALSE;
3741   }
3742   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3743 
3744 
3745   if (!barray) {
3746     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
3747     baij->barray = barray;
3748   }
3749 
3750   if (roworiented) stepval = (n-1)*bs;
3751   else stepval = (m-1)*bs;
3752 
3753   for (i=0; i<m; i++) {
3754     if (im[i] < 0) continue;
3755 #if defined(PETSC_USE_DEBUG)
3756     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3757 #endif
3758     if (im[i] >= rstart && im[i] < rend) {
3759       row = im[i] - rstart;
3760       for (j=0; j<n; j++) {
3761         /* If NumCol = 1 then a copy is not required */
3762         if ((roworiented) && (n == 1)) {
3763           barray = (MatScalar*)v + i*bs2;
3764         } else if ((!roworiented) && (m == 1)) {
3765           barray = (MatScalar*)v + j*bs2;
3766         } else { /* Here a copy is required */
3767           if (roworiented) {
3768             value = v + i*(stepval+bs)*bs + j*bs;
3769           } else {
3770             value = v + j*(stepval+bs)*bs + i*bs;
3771           }
3772           for (ii=0; ii<bs; ii++,value+=stepval) {
3773             for (jj=0; jj<bs; jj++) {
3774               *barray++ = *value++;
3775             }
3776           }
3777           barray -=bs2;
3778         }
3779 
3780         if (in[j] >= cstart && in[j] < cend) {
3781           col  = in[j] - cstart;
3782           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3783         } else if (in[j] < 0) continue;
3784 #if defined(PETSC_USE_DEBUG)
3785         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3786 #endif
3787         else {
3788           if (mat->was_assembled) {
3789             if (!baij->colmap) {
3790               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3791             }
3792 
3793 #if defined(PETSC_USE_DEBUG)
3794 #if defined(PETSC_USE_CTABLE)
3795             { PetscInt data;
3796               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3797               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3798             }
3799 #else
3800             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3801 #endif
3802 #endif
3803 #if defined(PETSC_USE_CTABLE)
3804             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3805             col  = (col - 1)/bs;
3806 #else
3807             col = (baij->colmap[in[j]] - 1)/bs;
3808 #endif
3809             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3810               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3811               col  =  in[j];
3812             }
3813           } else col = in[j];
3814           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3815         }
3816       }
3817     } else {
3818       if (!baij->donotstash) {
3819         if (roworiented) {
3820           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3821         } else {
3822           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3823         }
3824       }
3825     }
3826   }
3827 
3828   /* task normally handled by MatSetValuesBlocked() */
3829   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3830   PetscFunctionReturn(0);
3831 }
3832 
3833 #undef __FUNCT__
3834 #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
3835 /*@
3836      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
3837          CSR format the local rows.
3838 
3839    Collective on MPI_Comm
3840 
3841    Input Parameters:
3842 +  comm - MPI communicator
3843 .  bs - the block size, only a block size of 1 is supported
3844 .  m - number of local rows (Cannot be PETSC_DECIDE)
3845 .  n - This value should be the same as the local size used in creating the
3846        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3847        calculated if N is given) For square matrices n is almost always m.
3848 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3849 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3850 .   i - row indices
3851 .   j - column indices
3852 -   a - matrix values
3853 
3854    Output Parameter:
3855 .   mat - the matrix
3856 
3857    Level: intermediate
3858 
3859    Notes:
3860        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3861      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3862      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3863 
3864      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3865      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3866      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3867      with column-major ordering within blocks.
3868 
3869        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3870 
3871 .keywords: matrix, aij, compressed row, sparse, parallel
3872 
3873 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3874           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3875 @*/
3876 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3877 {
3878   PetscErrorCode ierr;
3879 
3880   PetscFunctionBegin;
3881   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3882   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3883   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3884   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3885   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
3886   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3887   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3888   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3889   PetscFunctionReturn(0);
3890 }
3891