xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision f66eea085cceefc5191b4b3d61f096e7c3d8e689)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 #include <petscblaslapack.h>
4 #include <petscsf.h>
5 
6 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
7 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat);
8 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
9 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
10 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
11 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
12 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec);
14 
15 #undef __FUNCT__
16 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
17 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
18 {
19   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
20   PetscErrorCode ierr;
21   PetscInt       i,*idxb = 0;
22   PetscScalar    *va,*vb;
23   Vec            vtmp;
24 
25   PetscFunctionBegin;
26   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
27   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
28   if (idx) {
29     for (i=0; i<A->rmap->n; i++) {
30       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
31     }
32   }
33 
34   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
35   if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);}
36   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
37   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
38 
39   for (i=0; i<A->rmap->n; i++) {
40     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
41       va[i] = vb[i];
42       if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);
43     }
44   }
45 
46   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
47   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
48   ierr = PetscFree(idxb);CHKERRQ(ierr);
49   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
50   PetscFunctionReturn(0);
51 }
52 
53 #undef __FUNCT__
54 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
55 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
56 {
57   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
58   PetscErrorCode ierr;
59 
60   PetscFunctionBegin;
61   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
62   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
63   PetscFunctionReturn(0);
64 }
65 
66 #undef __FUNCT__
67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
68 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
69 {
70   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
71   PetscErrorCode ierr;
72 
73   PetscFunctionBegin;
74   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
75   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
76   PetscFunctionReturn(0);
77 }
78 
79 /*
80      Local utility routine that creates a mapping from the global column
81    number to the local number in the off-diagonal part of the local
82    storage of the matrix.  This is done in a non scalable way since the
83    length of colmap equals the global matrix length.
84 */
85 #undef __FUNCT__
86 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private"
87 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
88 {
89   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
90   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
91   PetscErrorCode ierr;
92   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
93 
94   PetscFunctionBegin;
95 #if defined(PETSC_USE_CTABLE)
96   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
97   for (i=0; i<nbs; i++) {
98     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
99   }
100 #else
101   ierr = PetscMalloc1((baij->Nbs+1),&baij->colmap);CHKERRQ(ierr);
102   ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
103   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
104   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
105 #endif
106   PetscFunctionReturn(0);
107 }
108 
109 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
110   { \
111  \
112     brow = row/bs;  \
113     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
114     rmax = aimax[brow]; nrow = ailen[brow]; \
115     bcol = col/bs; \
116     ridx = row % bs; cidx = col % bs; \
117     low  = 0; high = nrow; \
118     while (high-low > 3) { \
119       t = (low+high)/2; \
120       if (rp[t] > bcol) high = t; \
121       else              low  = t; \
122     } \
123     for (_i=low; _i<high; _i++) { \
124       if (rp[_i] > bcol) break; \
125       if (rp[_i] == bcol) { \
126         bap = ap +  bs2*_i + bs*cidx + ridx; \
127         if (addv == ADD_VALUES) *bap += value;  \
128         else                    *bap  = value;  \
129         goto a_noinsert; \
130       } \
131     } \
132     if (a->nonew == 1) goto a_noinsert; \
133     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
134     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
135     N = nrow++ - 1;  \
136     /* shift up all the later entries in this row */ \
137     for (ii=N; ii>=_i; ii--) { \
138       rp[ii+1] = rp[ii]; \
139       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
140     } \
141     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
142     rp[_i]                      = bcol;  \
143     ap[bs2*_i + bs*cidx + ridx] = value;  \
144 a_noinsert:; \
145     ailen[brow] = nrow; \
146   }
147 
148 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
149   { \
150     brow = row/bs;  \
151     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
152     rmax = bimax[brow]; nrow = bilen[brow]; \
153     bcol = col/bs; \
154     ridx = row % bs; cidx = col % bs; \
155     low  = 0; high = nrow; \
156     while (high-low > 3) { \
157       t = (low+high)/2; \
158       if (rp[t] > bcol) high = t; \
159       else              low  = t; \
160     } \
161     for (_i=low; _i<high; _i++) { \
162       if (rp[_i] > bcol) break; \
163       if (rp[_i] == bcol) { \
164         bap = ap +  bs2*_i + bs*cidx + ridx; \
165         if (addv == ADD_VALUES) *bap += value;  \
166         else                    *bap  = value;  \
167         goto b_noinsert; \
168       } \
169     } \
170     if (b->nonew == 1) goto b_noinsert; \
171     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
172     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
173     N = nrow++ - 1;  \
174     /* shift up all the later entries in this row */ \
175     for (ii=N; ii>=_i; ii--) { \
176       rp[ii+1] = rp[ii]; \
177       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
178     } \
179     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
180     rp[_i]                      = bcol;  \
181     ap[bs2*_i + bs*cidx + ridx] = value;  \
182 b_noinsert:; \
183     bilen[brow] = nrow; \
184   }
185 
186 #undef __FUNCT__
187 #define __FUNCT__ "MatSetValues_MPIBAIJ"
188 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
189 {
190   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
191   MatScalar      value;
192   PetscBool      roworiented = baij->roworiented;
193   PetscErrorCode ierr;
194   PetscInt       i,j,row,col;
195   PetscInt       rstart_orig=mat->rmap->rstart;
196   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
197   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
198 
199   /* Some Variables required in the macro */
200   Mat         A     = baij->A;
201   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
202   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
203   MatScalar   *aa   =a->a;
204 
205   Mat         B     = baij->B;
206   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
207   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
208   MatScalar   *ba   =b->a;
209 
210   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
211   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
212   MatScalar *ap,*bap;
213 
214   PetscFunctionBegin;
215   for (i=0; i<m; i++) {
216     if (im[i] < 0) continue;
217 #if defined(PETSC_USE_DEBUG)
218     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
219 #endif
220     if (im[i] >= rstart_orig && im[i] < rend_orig) {
221       row = im[i] - rstart_orig;
222       for (j=0; j<n; j++) {
223         if (in[j] >= cstart_orig && in[j] < cend_orig) {
224           col = in[j] - cstart_orig;
225           if (roworiented) value = v[i*n+j];
226           else             value = v[i+j*m];
227           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
228           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
229         } else if (in[j] < 0) continue;
230 #if defined(PETSC_USE_DEBUG)
231         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
232 #endif
233         else {
234           if (mat->was_assembled) {
235             if (!baij->colmap) {
236               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
237             }
238 #if defined(PETSC_USE_CTABLE)
239             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
240             col  = col - 1;
241 #else
242             col = baij->colmap[in[j]/bs] - 1;
243 #endif
244             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
245               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
246               col  =  in[j];
247               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
248               B    = baij->B;
249               b    = (Mat_SeqBAIJ*)(B)->data;
250               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
251               ba   =b->a;
252             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
253             else col += in[j]%bs;
254           } else col = in[j];
255           if (roworiented) value = v[i*n+j];
256           else             value = v[i+j*m];
257           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
258           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
259         }
260       }
261     } else {
262       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
263       if (!baij->donotstash) {
264         mat->assembled = PETSC_FALSE;
265         if (roworiented) {
266           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
267         } else {
268           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
269         }
270       }
271     }
272   }
273   PetscFunctionReturn(0);
274 }
275 
276 #undef __FUNCT__
277 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
278 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
279 {
280   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
281   const PetscScalar *value;
282   MatScalar         *barray     = baij->barray;
283   PetscBool         roworiented = baij->roworiented;
284   PetscErrorCode    ierr;
285   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
286   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
287   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
288 
289   PetscFunctionBegin;
290   if (!barray) {
291     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
292     baij->barray = barray;
293   }
294 
295   if (roworiented) stepval = (n-1)*bs;
296   else stepval = (m-1)*bs;
297 
298   for (i=0; i<m; i++) {
299     if (im[i] < 0) continue;
300 #if defined(PETSC_USE_DEBUG)
301     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
302 #endif
303     if (im[i] >= rstart && im[i] < rend) {
304       row = im[i] - rstart;
305       for (j=0; j<n; j++) {
306         /* If NumCol = 1 then a copy is not required */
307         if ((roworiented) && (n == 1)) {
308           barray = (MatScalar*)v + i*bs2;
309         } else if ((!roworiented) && (m == 1)) {
310           barray = (MatScalar*)v + j*bs2;
311         } else { /* Here a copy is required */
312           if (roworiented) {
313             value = v + (i*(stepval+bs) + j)*bs;
314           } else {
315             value = v + (j*(stepval+bs) + i)*bs;
316           }
317           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
318             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
319             barray += bs;
320           }
321           barray -= bs2;
322         }
323 
324         if (in[j] >= cstart && in[j] < cend) {
325           col  = in[j] - cstart;
326           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
327         } else if (in[j] < 0) continue;
328 #if defined(PETSC_USE_DEBUG)
329         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
330 #endif
331         else {
332           if (mat->was_assembled) {
333             if (!baij->colmap) {
334               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
335             }
336 
337 #if defined(PETSC_USE_DEBUG)
338 #if defined(PETSC_USE_CTABLE)
339             { PetscInt data;
340               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
341               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
342             }
343 #else
344             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
345 #endif
346 #endif
347 #if defined(PETSC_USE_CTABLE)
348             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
349             col  = (col - 1)/bs;
350 #else
351             col = (baij->colmap[in[j]] - 1)/bs;
352 #endif
353             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
354               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
355               col  =  in[j];
356             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]);
357           } else col = in[j];
358           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
359         }
360       }
361     } else {
362       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
363       if (!baij->donotstash) {
364         if (roworiented) {
365           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
366         } else {
367           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
368         }
369       }
370     }
371   }
372   PetscFunctionReturn(0);
373 }
374 
375 #define HASH_KEY 0.6180339887
376 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
377 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
378 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
379 #undef __FUNCT__
380 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
381 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
382 {
383   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
384   PetscBool      roworiented = baij->roworiented;
385   PetscErrorCode ierr;
386   PetscInt       i,j,row,col;
387   PetscInt       rstart_orig=mat->rmap->rstart;
388   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
389   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
390   PetscReal      tmp;
391   MatScalar      **HD = baij->hd,value;
392 #if defined(PETSC_USE_DEBUG)
393   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
394 #endif
395 
396   PetscFunctionBegin;
397   for (i=0; i<m; i++) {
398 #if defined(PETSC_USE_DEBUG)
399     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
400     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
401 #endif
402     row = im[i];
403     if (row >= rstart_orig && row < rend_orig) {
404       for (j=0; j<n; j++) {
405         col = in[j];
406         if (roworiented) value = v[i*n+j];
407         else             value = v[i+j*m];
408         /* Look up PetscInto the Hash Table */
409         key = (row/bs)*Nbs+(col/bs)+1;
410         h1  = HASH(size,key,tmp);
411 
412 
413         idx = h1;
414 #if defined(PETSC_USE_DEBUG)
415         insert_ct++;
416         total_ct++;
417         if (HT[idx] != key) {
418           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
419           if (idx == size) {
420             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
421             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
422           }
423         }
424 #else
425         if (HT[idx] != key) {
426           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
427           if (idx == size) {
428             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
429             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
430           }
431         }
432 #endif
433         /* A HASH table entry is found, so insert the values at the correct address */
434         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
435         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
436       }
437     } else if (!baij->donotstash) {
438       if (roworiented) {
439         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
440       } else {
441         ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
442       }
443     }
444   }
445 #if defined(PETSC_USE_DEBUG)
446   baij->ht_total_ct  = total_ct;
447   baij->ht_insert_ct = insert_ct;
448 #endif
449   PetscFunctionReturn(0);
450 }
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
454 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
455 {
456   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
457   PetscBool         roworiented = baij->roworiented;
458   PetscErrorCode    ierr;
459   PetscInt          i,j,ii,jj,row,col;
460   PetscInt          rstart=baij->rstartbs;
461   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
462   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
463   PetscReal         tmp;
464   MatScalar         **HD = baij->hd,*baij_a;
465   const PetscScalar *v_t,*value;
466 #if defined(PETSC_USE_DEBUG)
467   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
468 #endif
469 
470   PetscFunctionBegin;
471   if (roworiented) stepval = (n-1)*bs;
472   else stepval = (m-1)*bs;
473 
474   for (i=0; i<m; i++) {
475 #if defined(PETSC_USE_DEBUG)
476     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
477     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
478 #endif
479     row = im[i];
480     v_t = v + i*nbs2;
481     if (row >= rstart && row < rend) {
482       for (j=0; j<n; j++) {
483         col = in[j];
484 
485         /* Look up into the Hash Table */
486         key = row*Nbs+col+1;
487         h1  = HASH(size,key,tmp);
488 
489         idx = h1;
490 #if defined(PETSC_USE_DEBUG)
491         total_ct++;
492         insert_ct++;
493         if (HT[idx] != key) {
494           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
495           if (idx == size) {
496             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
497             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
498           }
499         }
500 #else
501         if (HT[idx] != key) {
502           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
503           if (idx == size) {
504             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
505             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
506           }
507         }
508 #endif
509         baij_a = HD[idx];
510         if (roworiented) {
511           /*value = v + i*(stepval+bs)*bs + j*bs;*/
512           /* value = v + (i*(stepval+bs)+j)*bs; */
513           value = v_t;
514           v_t  += bs;
515           if (addv == ADD_VALUES) {
516             for (ii=0; ii<bs; ii++,value+=stepval) {
517               for (jj=ii; jj<bs2; jj+=bs) {
518                 baij_a[jj] += *value++;
519               }
520             }
521           } else {
522             for (ii=0; ii<bs; ii++,value+=stepval) {
523               for (jj=ii; jj<bs2; jj+=bs) {
524                 baij_a[jj] = *value++;
525               }
526             }
527           }
528         } else {
529           value = v + j*(stepval+bs)*bs + i*bs;
530           if (addv == ADD_VALUES) {
531             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
532               for (jj=0; jj<bs; jj++) {
533                 baij_a[jj] += *value++;
534               }
535             }
536           } else {
537             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
538               for (jj=0; jj<bs; jj++) {
539                 baij_a[jj] = *value++;
540               }
541             }
542           }
543         }
544       }
545     } else {
546       if (!baij->donotstash) {
547         if (roworiented) {
548           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
549         } else {
550           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
551         }
552       }
553     }
554   }
555 #if defined(PETSC_USE_DEBUG)
556   baij->ht_total_ct  = total_ct;
557   baij->ht_insert_ct = insert_ct;
558 #endif
559   PetscFunctionReturn(0);
560 }
561 
562 #undef __FUNCT__
563 #define __FUNCT__ "MatGetValues_MPIBAIJ"
564 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
565 {
566   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
567   PetscErrorCode ierr;
568   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
569   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
570 
571   PetscFunctionBegin;
572   for (i=0; i<m; i++) {
573     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
574     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
575     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
576       row = idxm[i] - bsrstart;
577       for (j=0; j<n; j++) {
578         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
579         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
580         if (idxn[j] >= bscstart && idxn[j] < bscend) {
581           col  = idxn[j] - bscstart;
582           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
583         } else {
584           if (!baij->colmap) {
585             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
586           }
587 #if defined(PETSC_USE_CTABLE)
588           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
589           data--;
590 #else
591           data = baij->colmap[idxn[j]/bs]-1;
592 #endif
593           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
594           else {
595             col  = data + idxn[j]%bs;
596             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
597           }
598         }
599       }
600     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
601   }
602   PetscFunctionReturn(0);
603 }
604 
605 #undef __FUNCT__
606 #define __FUNCT__ "MatNorm_MPIBAIJ"
607 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
608 {
609   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
610   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
611   PetscErrorCode ierr;
612   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
613   PetscReal      sum = 0.0;
614   MatScalar      *v;
615 
616   PetscFunctionBegin;
617   if (baij->size == 1) {
618     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
619   } else {
620     if (type == NORM_FROBENIUS) {
621       v  = amat->a;
622       nz = amat->nz*bs2;
623       for (i=0; i<nz; i++) {
624         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
625       }
626       v  = bmat->a;
627       nz = bmat->nz*bs2;
628       for (i=0; i<nz; i++) {
629         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
630       }
631       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
632       *nrm = PetscSqrtReal(*nrm);
633     } else if (type == NORM_1) { /* max column sum */
634       PetscReal *tmp,*tmp2;
635       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
636       ierr = PetscMalloc2(mat->cmap->N,&tmp,mat->cmap->N,&tmp2);CHKERRQ(ierr);
637       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
638       v    = amat->a; jj = amat->j;
639       for (i=0; i<amat->nz; i++) {
640         for (j=0; j<bs; j++) {
641           col = bs*(cstart + *jj) + j; /* column index */
642           for (row=0; row<bs; row++) {
643             tmp[col] += PetscAbsScalar(*v);  v++;
644           }
645         }
646         jj++;
647       }
648       v = bmat->a; jj = bmat->j;
649       for (i=0; i<bmat->nz; i++) {
650         for (j=0; j<bs; j++) {
651           col = bs*garray[*jj] + j;
652           for (row=0; row<bs; row++) {
653             tmp[col] += PetscAbsScalar(*v); v++;
654           }
655         }
656         jj++;
657       }
658       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
659       *nrm = 0.0;
660       for (j=0; j<mat->cmap->N; j++) {
661         if (tmp2[j] > *nrm) *nrm = tmp2[j];
662       }
663       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
664     } else if (type == NORM_INFINITY) { /* max row sum */
665       PetscReal *sums;
666       ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr);
667       sum  = 0.0;
668       for (j=0; j<amat->mbs; j++) {
669         for (row=0; row<bs; row++) sums[row] = 0.0;
670         v  = amat->a + bs2*amat->i[j];
671         nz = amat->i[j+1]-amat->i[j];
672         for (i=0; i<nz; i++) {
673           for (col=0; col<bs; col++) {
674             for (row=0; row<bs; row++) {
675               sums[row] += PetscAbsScalar(*v); v++;
676             }
677           }
678         }
679         v  = bmat->a + bs2*bmat->i[j];
680         nz = bmat->i[j+1]-bmat->i[j];
681         for (i=0; i<nz; i++) {
682           for (col=0; col<bs; col++) {
683             for (row=0; row<bs; row++) {
684               sums[row] += PetscAbsScalar(*v); v++;
685             }
686           }
687         }
688         for (row=0; row<bs; row++) {
689           if (sums[row] > sum) sum = sums[row];
690         }
691       }
692       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
693       ierr = PetscFree(sums);CHKERRQ(ierr);
694     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
695   }
696   PetscFunctionReturn(0);
697 }
698 
699 /*
700   Creates the hash table, and sets the table
701   This table is created only once.
702   If new entried need to be added to the matrix
703   then the hash table has to be destroyed and
704   recreated.
705 */
706 #undef __FUNCT__
707 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
708 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
709 {
710   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
711   Mat            A     = baij->A,B=baij->B;
712   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
713   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
714   PetscErrorCode ierr;
715   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
716   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
717   PetscInt       *HT,key;
718   MatScalar      **HD;
719   PetscReal      tmp;
720 #if defined(PETSC_USE_INFO)
721   PetscInt ct=0,max=0;
722 #endif
723 
724   PetscFunctionBegin;
725   if (baij->ht) PetscFunctionReturn(0);
726 
727   baij->ht_size = (PetscInt)(factor*nz);
728   ht_size       = baij->ht_size;
729 
730   /* Allocate Memory for Hash Table */
731   ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr);
732   HD   = baij->hd;
733   HT   = baij->ht;
734 
735   /* Loop Over A */
736   for (i=0; i<a->mbs; i++) {
737     for (j=ai[i]; j<ai[i+1]; j++) {
738       row = i+rstart;
739       col = aj[j]+cstart;
740 
741       key = row*Nbs + col + 1;
742       h1  = HASH(ht_size,key,tmp);
743       for (k=0; k<ht_size; k++) {
744         if (!HT[(h1+k)%ht_size]) {
745           HT[(h1+k)%ht_size] = key;
746           HD[(h1+k)%ht_size] = a->a + j*bs2;
747           break;
748 #if defined(PETSC_USE_INFO)
749         } else {
750           ct++;
751 #endif
752         }
753       }
754 #if defined(PETSC_USE_INFO)
755       if (k> max) max = k;
756 #endif
757     }
758   }
759   /* Loop Over B */
760   for (i=0; i<b->mbs; i++) {
761     for (j=bi[i]; j<bi[i+1]; j++) {
762       row = i+rstart;
763       col = garray[bj[j]];
764       key = row*Nbs + col + 1;
765       h1  = HASH(ht_size,key,tmp);
766       for (k=0; k<ht_size; k++) {
767         if (!HT[(h1+k)%ht_size]) {
768           HT[(h1+k)%ht_size] = key;
769           HD[(h1+k)%ht_size] = b->a + j*bs2;
770           break;
771 #if defined(PETSC_USE_INFO)
772         } else {
773           ct++;
774 #endif
775         }
776       }
777 #if defined(PETSC_USE_INFO)
778       if (k> max) max = k;
779 #endif
780     }
781   }
782 
783   /* Print Summary */
784 #if defined(PETSC_USE_INFO)
785   for (i=0,j=0; i<ht_size; i++) {
786     if (HT[i]) j++;
787   }
788   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
789 #endif
790   PetscFunctionReturn(0);
791 }
792 
793 #undef __FUNCT__
794 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
795 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
796 {
797   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
798   PetscErrorCode ierr;
799   PetscInt       nstash,reallocs;
800   InsertMode     addv;
801 
802   PetscFunctionBegin;
803   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
804 
805   /* make sure all processors are either in INSERTMODE or ADDMODE */
806   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
807   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
808   mat->insertmode = addv; /* in case this processor had no cache */
809 
810   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
811   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
812   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
813   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
814   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
815   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
816   PetscFunctionReturn(0);
817 }
818 
819 #undef __FUNCT__
820 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
821 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
822 {
823   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
824   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
825   PetscErrorCode ierr;
826   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
827   PetscInt       *row,*col;
828   PetscBool      r1,r2,r3,other_disassembled;
829   MatScalar      *val;
830   InsertMode     addv = mat->insertmode;
831   PetscMPIInt    n;
832 
833   PetscFunctionBegin;
834   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
835   if (!baij->donotstash && !mat->nooffprocentries) {
836     while (1) {
837       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
838       if (!flg) break;
839 
840       for (i=0; i<n;) {
841         /* Now identify the consecutive vals belonging to the same row */
842         for (j=i,rstart=row[j]; j<n; j++) {
843           if (row[j] != rstart) break;
844         }
845         if (j < n) ncols = j-i;
846         else       ncols = n-i;
847         /* Now assemble all these values with a single function call */
848         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
849         i    = j;
850       }
851     }
852     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
853     /* Now process the block-stash. Since the values are stashed column-oriented,
854        set the roworiented flag to column oriented, and after MatSetValues()
855        restore the original flags */
856     r1 = baij->roworiented;
857     r2 = a->roworiented;
858     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
859 
860     baij->roworiented = PETSC_FALSE;
861     a->roworiented    = PETSC_FALSE;
862 
863     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
864     while (1) {
865       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
866       if (!flg) break;
867 
868       for (i=0; i<n;) {
869         /* Now identify the consecutive vals belonging to the same row */
870         for (j=i,rstart=row[j]; j<n; j++) {
871           if (row[j] != rstart) break;
872         }
873         if (j < n) ncols = j-i;
874         else       ncols = n-i;
875         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
876         i    = j;
877       }
878     }
879     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
880 
881     baij->roworiented = r1;
882     a->roworiented    = r2;
883 
884     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
885   }
886 
887   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
888   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
889 
890   /* determine if any processor has disassembled, if so we must
891      also disassemble ourselfs, in order that we may reassemble. */
892   /*
893      if nonzero structure of submatrix B cannot change then we know that
894      no processor disassembled thus we can skip this stuff
895   */
896   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
897     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
898     if (mat->was_assembled && !other_disassembled) {
899       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
900     }
901   }
902 
903   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
904     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
905   }
906   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
907   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
908 
909 #if defined(PETSC_USE_INFO)
910   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
911     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
912 
913     baij->ht_total_ct  = 0;
914     baij->ht_insert_ct = 0;
915   }
916 #endif
917   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
918     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
919 
920     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
921     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
922   }
923 
924   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
925 
926   baij->rowvalues = 0;
927 
928   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
929   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
930     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
931     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
932   }
933   PetscFunctionReturn(0);
934 }
935 
936 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer);
937 #include <petscdraw.h>
938 #undef __FUNCT__
939 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
940 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
941 {
942   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
943   PetscErrorCode    ierr;
944   PetscMPIInt       rank = baij->rank;
945   PetscInt          bs   = mat->rmap->bs;
946   PetscBool         iascii,isdraw;
947   PetscViewer       sviewer;
948   PetscViewerFormat format;
949 
950   PetscFunctionBegin;
951   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
952   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
953   if (iascii) {
954     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
955     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
956       MatInfo info;
957       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
958       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
959       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
960       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
961                                                 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
962       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
963       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
964       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
965       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
966       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
967       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
968       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
969       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
970       PetscFunctionReturn(0);
971     } else if (format == PETSC_VIEWER_ASCII_INFO) {
972       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
973       PetscFunctionReturn(0);
974     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
975       PetscFunctionReturn(0);
976     }
977   }
978 
979   if (isdraw) {
980     PetscDraw draw;
981     PetscBool isnull;
982     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
983     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
984   }
985 
986   {
987     /* assemble the entire matrix onto first processor. */
988     Mat         A;
989     Mat_SeqBAIJ *Aloc;
990     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
991     MatScalar   *a;
992 
993     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
994     /* Perhaps this should be the type of mat? */
995     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
996     if (!rank) {
997       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
998     } else {
999       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1000     }
1001     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1002     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
1003     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1004     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1005 
1006     /* copy over the A part */
1007     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1008     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1009     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1010 
1011     for (i=0; i<mbs; i++) {
1012       rvals[0] = bs*(baij->rstartbs + i);
1013       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1014       for (j=ai[i]; j<ai[i+1]; j++) {
1015         col = (baij->cstartbs+aj[j])*bs;
1016         for (k=0; k<bs; k++) {
1017           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1018           col++; a += bs;
1019         }
1020       }
1021     }
1022     /* copy over the B part */
1023     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1024     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1025     for (i=0; i<mbs; i++) {
1026       rvals[0] = bs*(baij->rstartbs + i);
1027       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1028       for (j=ai[i]; j<ai[i+1]; j++) {
1029         col = baij->garray[aj[j]]*bs;
1030         for (k=0; k<bs; k++) {
1031           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1032           col++; a += bs;
1033         }
1034       }
1035     }
1036     ierr = PetscFree(rvals);CHKERRQ(ierr);
1037     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1038     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1039     /*
1040        Everyone has to call to draw the matrix since the graphics waits are
1041        synchronized across all processors that share the PetscDraw object
1042     */
1043     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1044     if (!rank) {
1045       ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1046     }
1047     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1048     ierr = MatDestroy(&A);CHKERRQ(ierr);
1049   }
1050   PetscFunctionReturn(0);
1051 }
1052 
1053 #undef __FUNCT__
1054 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1055 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1056 {
1057   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1058   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)a->A->data;
1059   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
1060   PetscErrorCode ierr;
1061   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1062   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1063   int            fd;
1064   PetscScalar    *column_values;
1065   FILE           *file;
1066   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1067   PetscInt       message_count,flowcontrolcount;
1068 
1069   PetscFunctionBegin;
1070   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1071   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1072   nz   = bs2*(A->nz + B->nz);
1073   rlen = mat->rmap->n;
1074   if (!rank) {
1075     header[0] = MAT_FILE_CLASSID;
1076     header[1] = mat->rmap->N;
1077     header[2] = mat->cmap->N;
1078 
1079     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1080     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1081     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1082     /* get largest number of rows any processor has */
1083     range = mat->rmap->range;
1084     for (i=1; i<size; i++) {
1085       rlen = PetscMax(rlen,range[i+1] - range[i]);
1086     }
1087   } else {
1088     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1089   }
1090 
1091   ierr = PetscMalloc1((rlen/bs),&crow_lens);CHKERRQ(ierr);
1092   /* compute lengths of each row  */
1093   for (i=0; i<a->mbs; i++) {
1094     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1095   }
1096   /* store the row lengths to the file */
1097   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1098   if (!rank) {
1099     MPI_Status status;
1100     ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr);
1101     rlen = (range[1] - range[0])/bs;
1102     for (i=0; i<rlen; i++) {
1103       for (j=0; j<bs; j++) {
1104         row_lens[i*bs+j] = bs*crow_lens[i];
1105       }
1106     }
1107     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1108     for (i=1; i<size; i++) {
1109       rlen = (range[i+1] - range[i])/bs;
1110       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1111       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1112       for (k=0; k<rlen; k++) {
1113         for (j=0; j<bs; j++) {
1114           row_lens[k*bs+j] = bs*crow_lens[k];
1115         }
1116       }
1117       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1118     }
1119     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1120     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1121   } else {
1122     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1123     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1124     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1125   }
1126   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1127 
1128   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1129      information needed to make it for each row from a block row. This does require more communication but still not more than
1130      the communication needed for the nonzero values  */
1131   nzmax = nz; /*  space a largest processor needs */
1132   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1133   ierr  = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr);
1134   cnt   = 0;
1135   for (i=0; i<a->mbs; i++) {
1136     pcnt = cnt;
1137     for (j=B->i[i]; j<B->i[i+1]; j++) {
1138       if ((col = garray[B->j[j]]) > cstart) break;
1139       for (l=0; l<bs; l++) {
1140         column_indices[cnt++] = bs*col+l;
1141       }
1142     }
1143     for (k=A->i[i]; k<A->i[i+1]; k++) {
1144       for (l=0; l<bs; l++) {
1145         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1146       }
1147     }
1148     for (; j<B->i[i+1]; j++) {
1149       for (l=0; l<bs; l++) {
1150         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1151       }
1152     }
1153     len = cnt - pcnt;
1154     for (k=1; k<bs; k++) {
1155       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1156       cnt += len;
1157     }
1158   }
1159   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1160 
1161   /* store the columns to the file */
1162   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1163   if (!rank) {
1164     MPI_Status status;
1165     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1166     for (i=1; i<size; i++) {
1167       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1168       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1169       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1170       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1171     }
1172     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1173   } else {
1174     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1175     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1176     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1177     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1178   }
1179   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1180 
1181   /* load up the numerical values */
1182   ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr);
1183   cnt  = 0;
1184   for (i=0; i<a->mbs; i++) {
1185     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1186     for (j=B->i[i]; j<B->i[i+1]; j++) {
1187       if (garray[B->j[j]] > cstart) break;
1188       for (l=0; l<bs; l++) {
1189         for (ll=0; ll<bs; ll++) {
1190           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1191         }
1192       }
1193       cnt += bs;
1194     }
1195     for (k=A->i[i]; k<A->i[i+1]; k++) {
1196       for (l=0; l<bs; l++) {
1197         for (ll=0; ll<bs; ll++) {
1198           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1199         }
1200       }
1201       cnt += bs;
1202     }
1203     for (; j<B->i[i+1]; j++) {
1204       for (l=0; l<bs; l++) {
1205         for (ll=0; ll<bs; ll++) {
1206           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1207         }
1208       }
1209       cnt += bs;
1210     }
1211     cnt += (bs-1)*rlen;
1212   }
1213   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1214 
1215   /* store the column values to the file */
1216   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1217   if (!rank) {
1218     MPI_Status status;
1219     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1220     for (i=1; i<size; i++) {
1221       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1222       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1223       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1224       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1225     }
1226     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1227   } else {
1228     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1229     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1230     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1231     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1232   }
1233   ierr = PetscFree(column_values);CHKERRQ(ierr);
1234 
1235   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1236   if (file) {
1237     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1238   }
1239   PetscFunctionReturn(0);
1240 }
1241 
1242 #undef __FUNCT__
1243 #define __FUNCT__ "MatView_MPIBAIJ"
1244 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1245 {
1246   PetscErrorCode ierr;
1247   PetscBool      iascii,isdraw,issocket,isbinary;
1248 
1249   PetscFunctionBegin;
1250   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1251   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1252   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1253   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1254   if (iascii || isdraw || issocket) {
1255     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1256   } else if (isbinary) {
1257     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1258   }
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 #undef __FUNCT__
1263 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1264 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1265 {
1266   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1267   PetscErrorCode ierr;
1268 
1269   PetscFunctionBegin;
1270 #if defined(PETSC_USE_LOG)
1271   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1272 #endif
1273   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1274   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1275   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1276   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1277 #if defined(PETSC_USE_CTABLE)
1278   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1279 #else
1280   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1281 #endif
1282   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1283   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1284   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1285   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1286   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1287   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1288   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1289   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1290 
1291   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1292   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1293   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1294   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1295   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1296   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr);
1301   PetscFunctionReturn(0);
1302 }
1303 
1304 #undef __FUNCT__
1305 #define __FUNCT__ "MatMult_MPIBAIJ"
1306 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1307 {
1308   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1309   PetscErrorCode ierr;
1310   PetscInt       nt;
1311 
1312   PetscFunctionBegin;
1313   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1314   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1315   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1316   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1317   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1318   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1319   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1320   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1321   PetscFunctionReturn(0);
1322 }
1323 
1324 #undef __FUNCT__
1325 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1326 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1327 {
1328   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1329   PetscErrorCode ierr;
1330 
1331   PetscFunctionBegin;
1332   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1333   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1334   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1335   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1336   PetscFunctionReturn(0);
1337 }
1338 
1339 #undef __FUNCT__
1340 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1341 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1342 {
1343   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1344   PetscErrorCode ierr;
1345   PetscBool      merged;
1346 
1347   PetscFunctionBegin;
1348   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1349   /* do nondiagonal part */
1350   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1351   if (!merged) {
1352     /* send it on its way */
1353     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1354     /* do local part */
1355     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1356     /* receive remote parts: note this assumes the values are not actually */
1357     /* inserted in yy until the next line */
1358     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1359   } else {
1360     /* do local part */
1361     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1362     /* send it on its way */
1363     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1364     /* values actually were received in the Begin() but we need to call this nop */
1365     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1366   }
1367   PetscFunctionReturn(0);
1368 }
1369 
1370 #undef __FUNCT__
1371 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1372 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1373 {
1374   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1375   PetscErrorCode ierr;
1376 
1377   PetscFunctionBegin;
1378   /* do nondiagonal part */
1379   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1380   /* send it on its way */
1381   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1382   /* do local part */
1383   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1384   /* receive remote parts: note this assumes the values are not actually */
1385   /* inserted in yy until the next line, which is true for my implementation*/
1386   /* but is not perhaps always true. */
1387   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1388   PetscFunctionReturn(0);
1389 }
1390 
1391 /*
1392   This only works correctly for square matrices where the subblock A->A is the
1393    diagonal block
1394 */
1395 #undef __FUNCT__
1396 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1397 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1398 {
1399   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1400   PetscErrorCode ierr;
1401 
1402   PetscFunctionBegin;
1403   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1404   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1405   PetscFunctionReturn(0);
1406 }
1407 
1408 #undef __FUNCT__
1409 #define __FUNCT__ "MatScale_MPIBAIJ"
1410 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1411 {
1412   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1413   PetscErrorCode ierr;
1414 
1415   PetscFunctionBegin;
1416   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1417   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1418   PetscFunctionReturn(0);
1419 }
1420 
1421 #undef __FUNCT__
1422 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1423 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1424 {
1425   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1426   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1427   PetscErrorCode ierr;
1428   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1429   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1430   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1431 
1432   PetscFunctionBegin;
1433   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1434   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1435   mat->getrowactive = PETSC_TRUE;
1436 
1437   if (!mat->rowvalues && (idx || v)) {
1438     /*
1439         allocate enough space to hold information from the longest row.
1440     */
1441     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1442     PetscInt    max = 1,mbs = mat->mbs,tmp;
1443     for (i=0; i<mbs; i++) {
1444       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1445       if (max < tmp) max = tmp;
1446     }
1447     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1448   }
1449   lrow = row - brstart;
1450 
1451   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1452   if (!v)   {pvA = 0; pvB = 0;}
1453   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1454   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1455   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1456   nztot = nzA + nzB;
1457 
1458   cmap = mat->garray;
1459   if (v  || idx) {
1460     if (nztot) {
1461       /* Sort by increasing column numbers, assuming A and B already sorted */
1462       PetscInt imark = -1;
1463       if (v) {
1464         *v = v_p = mat->rowvalues;
1465         for (i=0; i<nzB; i++) {
1466           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1467           else break;
1468         }
1469         imark = i;
1470         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1471         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1472       }
1473       if (idx) {
1474         *idx = idx_p = mat->rowindices;
1475         if (imark > -1) {
1476           for (i=0; i<imark; i++) {
1477             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1478           }
1479         } else {
1480           for (i=0; i<nzB; i++) {
1481             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1482             else break;
1483           }
1484           imark = i;
1485         }
1486         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1487         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1488       }
1489     } else {
1490       if (idx) *idx = 0;
1491       if (v)   *v   = 0;
1492     }
1493   }
1494   *nz  = nztot;
1495   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1496   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1497   PetscFunctionReturn(0);
1498 }
1499 
1500 #undef __FUNCT__
1501 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1502 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1503 {
1504   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1505 
1506   PetscFunctionBegin;
1507   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1508   baij->getrowactive = PETSC_FALSE;
1509   PetscFunctionReturn(0);
1510 }
1511 
1512 #undef __FUNCT__
1513 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1514 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1515 {
1516   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1517   PetscErrorCode ierr;
1518 
1519   PetscFunctionBegin;
1520   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1521   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1522   PetscFunctionReturn(0);
1523 }
1524 
1525 #undef __FUNCT__
1526 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1527 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1528 {
1529   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1530   Mat            A  = a->A,B = a->B;
1531   PetscErrorCode ierr;
1532   PetscReal      isend[5],irecv[5];
1533 
1534   PetscFunctionBegin;
1535   info->block_size = (PetscReal)matin->rmap->bs;
1536 
1537   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1538 
1539   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1540   isend[3] = info->memory;  isend[4] = info->mallocs;
1541 
1542   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1543 
1544   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1545   isend[3] += info->memory;  isend[4] += info->mallocs;
1546 
1547   if (flag == MAT_LOCAL) {
1548     info->nz_used      = isend[0];
1549     info->nz_allocated = isend[1];
1550     info->nz_unneeded  = isend[2];
1551     info->memory       = isend[3];
1552     info->mallocs      = isend[4];
1553   } else if (flag == MAT_GLOBAL_MAX) {
1554     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1555 
1556     info->nz_used      = irecv[0];
1557     info->nz_allocated = irecv[1];
1558     info->nz_unneeded  = irecv[2];
1559     info->memory       = irecv[3];
1560     info->mallocs      = irecv[4];
1561   } else if (flag == MAT_GLOBAL_SUM) {
1562     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1563 
1564     info->nz_used      = irecv[0];
1565     info->nz_allocated = irecv[1];
1566     info->nz_unneeded  = irecv[2];
1567     info->memory       = irecv[3];
1568     info->mallocs      = irecv[4];
1569   } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1570   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1571   info->fill_ratio_needed = 0;
1572   info->factor_mallocs    = 0;
1573   PetscFunctionReturn(0);
1574 }
1575 
1576 #undef __FUNCT__
1577 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1578 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1579 {
1580   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1581   PetscErrorCode ierr;
1582 
1583   PetscFunctionBegin;
1584   switch (op) {
1585   case MAT_NEW_NONZERO_LOCATIONS:
1586   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1587   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1588   case MAT_KEEP_NONZERO_PATTERN:
1589   case MAT_NEW_NONZERO_LOCATION_ERR:
1590     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1591     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1592     break;
1593   case MAT_ROW_ORIENTED:
1594     a->roworiented = flg;
1595 
1596     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1597     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1598     break;
1599   case MAT_NEW_DIAGONALS:
1600     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1601     break;
1602   case MAT_IGNORE_OFF_PROC_ENTRIES:
1603     a->donotstash = flg;
1604     break;
1605   case MAT_USE_HASH_TABLE:
1606     a->ht_flag = flg;
1607     break;
1608   case MAT_SYMMETRIC:
1609   case MAT_STRUCTURALLY_SYMMETRIC:
1610   case MAT_HERMITIAN:
1611   case MAT_SYMMETRY_ETERNAL:
1612     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1613     break;
1614   default:
1615     SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1616   }
1617   PetscFunctionReturn(0);
1618 }
1619 
1620 #undef __FUNCT__
1621 #define __FUNCT__ "MatTranspose_MPIBAIJ"
1622 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1623 {
1624   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1625   Mat_SeqBAIJ    *Aloc;
1626   Mat            B;
1627   PetscErrorCode ierr;
1628   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1629   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1630   MatScalar      *a;
1631 
1632   PetscFunctionBegin;
1633   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1634   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1635     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1636     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1637     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1638     /* Do not know preallocation information, but must set block size */
1639     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr);
1640   } else {
1641     B = *matout;
1642   }
1643 
1644   /* copy over the A part */
1645   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1646   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1647   ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1648 
1649   for (i=0; i<mbs; i++) {
1650     rvals[0] = bs*(baij->rstartbs + i);
1651     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1652     for (j=ai[i]; j<ai[i+1]; j++) {
1653       col = (baij->cstartbs+aj[j])*bs;
1654       for (k=0; k<bs; k++) {
1655         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1656 
1657         col++; a += bs;
1658       }
1659     }
1660   }
1661   /* copy over the B part */
1662   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1663   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1664   for (i=0; i<mbs; i++) {
1665     rvals[0] = bs*(baij->rstartbs + i);
1666     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1667     for (j=ai[i]; j<ai[i+1]; j++) {
1668       col = baij->garray[aj[j]]*bs;
1669       for (k=0; k<bs; k++) {
1670         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1671         col++;
1672         a += bs;
1673       }
1674     }
1675   }
1676   ierr = PetscFree(rvals);CHKERRQ(ierr);
1677   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1678   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1679 
1680   if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B;
1681   else {
1682     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1683   }
1684   PetscFunctionReturn(0);
1685 }
1686 
1687 #undef __FUNCT__
1688 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1689 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1690 {
1691   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1692   Mat            a     = baij->A,b = baij->B;
1693   PetscErrorCode ierr;
1694   PetscInt       s1,s2,s3;
1695 
1696   PetscFunctionBegin;
1697   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1698   if (rr) {
1699     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1700     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1701     /* Overlap communication with computation. */
1702     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1703   }
1704   if (ll) {
1705     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1706     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1707     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1708   }
1709   /* scale  the diagonal block */
1710   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1711 
1712   if (rr) {
1713     /* Do a scatter end and then right scale the off-diagonal block */
1714     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1715     ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1716   }
1717   PetscFunctionReturn(0);
1718 }
1719 
1720 #undef __FUNCT__
1721 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1722 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1723 {
1724   Mat_MPIBAIJ   *l      = (Mat_MPIBAIJ *) A->data;
1725   PetscInt      *owners = A->rmap->range;
1726   PetscInt       n      = A->rmap->n;
1727   PetscSF        sf;
1728   PetscInt      *lrows;
1729   PetscSFNode   *rrows;
1730   PetscInt       r, p = 0, len = 0;
1731   PetscErrorCode ierr;
1732 
1733   PetscFunctionBegin;
1734   /* Create SF where leaves are input rows and roots are owned rows */
1735   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1736   for (r = 0; r < n; ++r) lrows[r] = -1;
1737   if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);}
1738   for (r = 0; r < N; ++r) {
1739     const PetscInt idx   = rows[r];
1740     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1741     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1742       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1743     }
1744     if (A->nooffproczerorows) {
1745       if (p != l->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,l->rank);
1746       lrows[len++] = idx - owners[p];
1747     } else {
1748       rrows[r].rank = p;
1749       rrows[r].index = rows[r] - owners[p];
1750     }
1751   }
1752   if (!A->nooffproczerorows) {
1753     ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1754     ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1755     /* Collect flags for rows to be zeroed */
1756     ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1757     ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1758     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1759     /* Compress and put in row numbers */
1760     for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1761   }
1762   /* fix right hand side if needed */
1763   if (x && b) {
1764     const PetscScalar *xx;
1765     PetscScalar       *bb;
1766 
1767     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1768     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1769     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
1770     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1771     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1772   }
1773 
1774   /* actually zap the local rows */
1775   /*
1776         Zero the required rows. If the "diagonal block" of the matrix
1777      is square and the user wishes to set the diagonal we use separate
1778      code so that MatSetValues() is not called for each diagonal allocating
1779      new memory, thus calling lots of mallocs and slowing things down.
1780 
1781   */
1782   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1783   ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1784   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1785     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr);
1786   } else if (diag != 0.0) {
1787     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr);
1788     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1789        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1790     for (r = 0; r < len; ++r) {
1791       const PetscInt row = lrows[r] + A->rmap->rstart;
1792       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1793     }
1794     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1795     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1796   } else {
1797     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1798   }
1799   ierr = PetscFree(lrows);CHKERRQ(ierr);
1800 
1801   /* only change matrix nonzero state if pattern was allowed to be changed */
1802   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1803     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1804     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1805   }
1806   PetscFunctionReturn(0);
1807 }
1808 
1809 #undef __FUNCT__
1810 #define __FUNCT__ "MatZeroRowsColumns_MPIBAIJ"
1811 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1812 {
1813   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1814   PetscErrorCode    ierr;
1815   PetscMPIInt       n = A->rmap->n;
1816   PetscInt          i,j,k,r,p = 0,len = 0,row,col,count;
1817   PetscInt          *lrows,*owners = A->rmap->range;
1818   PetscSFNode       *rrows;
1819   PetscSF           sf;
1820   const PetscScalar *xx;
1821   PetscScalar       *bb,*mask;
1822   Vec               xmask,lmask;
1823   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ*)l->B->data;
1824   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1825   PetscScalar       *aa;
1826 
1827   PetscFunctionBegin;
1828   /* Create SF where leaves are input rows and roots are owned rows */
1829   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1830   for (r = 0; r < n; ++r) lrows[r] = -1;
1831   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1832   for (r = 0; r < N; ++r) {
1833     const PetscInt idx   = rows[r];
1834     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1835     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1836       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1837     }
1838     rrows[r].rank  = p;
1839     rrows[r].index = rows[r] - owners[p];
1840   }
1841   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1842   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1843   /* Collect flags for rows to be zeroed */
1844   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1845   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1846   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1847   /* Compress and put in row numbers */
1848   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1849   /* zero diagonal part of matrix */
1850   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1851   /* handle off diagonal part of matrix */
1852   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1853   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1854   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1855   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1856   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1857   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1858   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1859   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1860   if (x) {
1861     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1862     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1863     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1864     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1865   }
1866   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1867   /* remove zeroed rows of off diagonal matrix */
1868   for (i = 0; i < len; ++i) {
1869     row   = lrows[i];
1870     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1871     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1872     for (k = 0; k < count; ++k) {
1873       aa[0] = 0.0;
1874       aa   += bs;
1875     }
1876   }
1877   /* loop over all elements of off process part of matrix zeroing removed columns*/
1878   for (i = 0; i < l->B->rmap->N; ++i) {
1879     row = i/bs;
1880     for (j = baij->i[row]; j < baij->i[row+1]; ++j) {
1881       for (k = 0; k < bs; ++k) {
1882         col = bs*baij->j[j] + k;
1883         if (PetscAbsScalar(mask[col])) {
1884           aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
1885           if (b) bb[i] -= aa[0]*xx[col];
1886           aa[0] = 0.0;
1887         }
1888       }
1889     }
1890   }
1891   if (x) {
1892     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1893     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1894   }
1895   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1896   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1897   ierr = PetscFree(lrows);CHKERRQ(ierr);
1898 
1899   /* only change matrix nonzero state if pattern was allowed to be changed */
1900   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1901     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1902     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1903   }
1904   PetscFunctionReturn(0);
1905 }
1906 
1907 #undef __FUNCT__
1908 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1909 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1910 {
1911   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1912   PetscErrorCode ierr;
1913 
1914   PetscFunctionBegin;
1915   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1916   PetscFunctionReturn(0);
1917 }
1918 
1919 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
1920 
1921 #undef __FUNCT__
1922 #define __FUNCT__ "MatEqual_MPIBAIJ"
1923 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1924 {
1925   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1926   Mat            a,b,c,d;
1927   PetscBool      flg;
1928   PetscErrorCode ierr;
1929 
1930   PetscFunctionBegin;
1931   a = matA->A; b = matA->B;
1932   c = matB->A; d = matB->B;
1933 
1934   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1935   if (flg) {
1936     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1937   }
1938   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1939   PetscFunctionReturn(0);
1940 }
1941 
1942 #undef __FUNCT__
1943 #define __FUNCT__ "MatCopy_MPIBAIJ"
1944 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1945 {
1946   PetscErrorCode ierr;
1947   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1948   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
1949 
1950   PetscFunctionBegin;
1951   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1952   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1953     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1954   } else {
1955     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1956     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1957   }
1958   PetscFunctionReturn(0);
1959 }
1960 
1961 #undef __FUNCT__
1962 #define __FUNCT__ "MatSetUp_MPIBAIJ"
1963 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1964 {
1965   PetscErrorCode ierr;
1966 
1967   PetscFunctionBegin;
1968   ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1969   PetscFunctionReturn(0);
1970 }
1971 
1972 #undef __FUNCT__
1973 #define __FUNCT__ "MatAXPYGetPreallocation_MPIBAIJ"
1974 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
1975 {
1976   PetscErrorCode ierr;
1977   PetscInt       bs = Y->rmap->bs,m = Y->rmap->N/bs;
1978   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
1979   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
1980 
1981   PetscFunctionBegin;
1982   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
1983   PetscFunctionReturn(0);
1984 }
1985 
1986 #undef __FUNCT__
1987 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1988 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1989 {
1990   PetscErrorCode ierr;
1991   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
1992   PetscBLASInt   bnz,one=1;
1993   Mat_SeqBAIJ    *x,*y;
1994 
1995   PetscFunctionBegin;
1996   if (str == SAME_NONZERO_PATTERN) {
1997     PetscScalar alpha = a;
1998     x    = (Mat_SeqBAIJ*)xx->A->data;
1999     y    = (Mat_SeqBAIJ*)yy->A->data;
2000     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2001     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2002     x    = (Mat_SeqBAIJ*)xx->B->data;
2003     y    = (Mat_SeqBAIJ*)yy->B->data;
2004     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2005     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2006     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2007   } else {
2008     Mat      B;
2009     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
2010     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2011     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2012     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2013     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2014     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2015     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2016     ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr);
2017     ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2018     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2019     ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2020     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
2021     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2022     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2023     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2024     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2025   }
2026   PetscFunctionReturn(0);
2027 }
2028 
2029 #undef __FUNCT__
2030 #define __FUNCT__ "MatRealPart_MPIBAIJ"
2031 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
2032 {
2033   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2034   PetscErrorCode ierr;
2035 
2036   PetscFunctionBegin;
2037   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2038   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2039   PetscFunctionReturn(0);
2040 }
2041 
2042 #undef __FUNCT__
2043 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
2044 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2045 {
2046   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2047   PetscErrorCode ierr;
2048 
2049   PetscFunctionBegin;
2050   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2051   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2052   PetscFunctionReturn(0);
2053 }
2054 
2055 #undef __FUNCT__
2056 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
2057 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2058 {
2059   PetscErrorCode ierr;
2060   IS             iscol_local;
2061   PetscInt       csize;
2062 
2063   PetscFunctionBegin;
2064   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2065   if (call == MAT_REUSE_MATRIX) {
2066     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2067     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2068   } else {
2069     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2070   }
2071   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2072   if (call == MAT_INITIAL_MATRIX) {
2073     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2074     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2075   }
2076   PetscFunctionReturn(0);
2077 }
2078 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*);
2079 #undef __FUNCT__
2080 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
2081 /*
2082   Not great since it makes two copies of the submatrix, first an SeqBAIJ
2083   in local and then by concatenating the local matrices the end result.
2084   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
2085 */
2086 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2087 {
2088   PetscErrorCode ierr;
2089   PetscMPIInt    rank,size;
2090   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2091   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow;
2092   Mat            M,Mreuse;
2093   MatScalar      *vwork,*aa;
2094   MPI_Comm       comm;
2095   IS             isrow_new, iscol_new;
2096   PetscBool      idflag,allrows, allcols;
2097   Mat_SeqBAIJ    *aij;
2098 
2099   PetscFunctionBegin;
2100   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2101   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2102   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2103   /* The compression and expansion should be avoided. Doesn't point
2104      out errors, might change the indices, hence buggey */
2105   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
2106   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
2107 
2108   /* Check for special case: each processor gets entire matrix columns */
2109   ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr);
2110   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
2111   if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE;
2112   else allcols = PETSC_FALSE;
2113 
2114   ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr);
2115   ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr);
2116   if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE;
2117   else allrows = PETSC_FALSE;
2118 
2119   if (call ==  MAT_REUSE_MATRIX) {
2120     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
2121     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2122     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2123   } else {
2124     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2125   }
2126   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
2127   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
2128   /*
2129       m - number of local rows
2130       n - number of columns (same on all processors)
2131       rstart - first row in new global matrix generated
2132   */
2133   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2134   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2135   m    = m/bs;
2136   n    = n/bs;
2137 
2138   if (call == MAT_INITIAL_MATRIX) {
2139     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2140     ii  = aij->i;
2141     jj  = aij->j;
2142 
2143     /*
2144         Determine the number of non-zeros in the diagonal and off-diagonal
2145         portions of the matrix in order to do correct preallocation
2146     */
2147 
2148     /* first get start and end of "diagonal" columns */
2149     if (csize == PETSC_DECIDE) {
2150       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2151       if (mglobal == n*bs) { /* square matrix */
2152         nlocal = m;
2153       } else {
2154         nlocal = n/size + ((n % size) > rank);
2155       }
2156     } else {
2157       nlocal = csize/bs;
2158     }
2159     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2160     rstart = rend - nlocal;
2161     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2162 
2163     /* next, compute all the lengths */
2164     ierr  = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr);
2165     for (i=0; i<m; i++) {
2166       jend = ii[i+1] - ii[i];
2167       olen = 0;
2168       dlen = 0;
2169       for (j=0; j<jend; j++) {
2170         if (*jj < rstart || *jj >= rend) olen++;
2171         else dlen++;
2172         jj++;
2173       }
2174       olens[i] = olen;
2175       dlens[i] = dlen;
2176     }
2177     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2178     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2179     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2180     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2181     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
2182   } else {
2183     PetscInt ml,nl;
2184 
2185     M    = *newmat;
2186     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2187     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2188     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2189     /*
2190          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2191        rather than the slower MatSetValues().
2192     */
2193     M->was_assembled = PETSC_TRUE;
2194     M->assembled     = PETSC_FALSE;
2195   }
2196   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2197   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2198   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
2199   ii   = aij->i;
2200   jj   = aij->j;
2201   aa   = aij->a;
2202   for (i=0; i<m; i++) {
2203     row   = rstart/bs + i;
2204     nz    = ii[i+1] - ii[i];
2205     cwork = jj;     jj += nz;
2206     vwork = aa;     aa += nz*bs*bs;
2207     ierr  = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2208   }
2209 
2210   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2211   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2212   *newmat = M;
2213 
2214   /* save submatrix used in processor for next request */
2215   if (call ==  MAT_INITIAL_MATRIX) {
2216     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2217     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2218   }
2219   PetscFunctionReturn(0);
2220 }
2221 
2222 #undef __FUNCT__
2223 #define __FUNCT__ "MatPermute_MPIBAIJ"
2224 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2225 {
2226   MPI_Comm       comm,pcomm;
2227   PetscInt       clocal_size,nrows;
2228   const PetscInt *rows;
2229   PetscMPIInt    size;
2230   IS             crowp,lcolp;
2231   PetscErrorCode ierr;
2232 
2233   PetscFunctionBegin;
2234   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2235   /* make a collective version of 'rowp' */
2236   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2237   if (pcomm==comm) {
2238     crowp = rowp;
2239   } else {
2240     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2241     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2242     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2243     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2244   }
2245   ierr = ISSetPermutation(crowp);CHKERRQ(ierr);
2246   /* make a local version of 'colp' */
2247   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2248   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2249   if (size==1) {
2250     lcolp = colp;
2251   } else {
2252     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2253   }
2254   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2255   /* now we just get the submatrix */
2256   ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr);
2257   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2258   /* clean up */
2259   if (pcomm!=comm) {
2260     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2261   }
2262   if (size>1) {
2263     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2264   }
2265   PetscFunctionReturn(0);
2266 }
2267 
2268 #undef __FUNCT__
2269 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2270 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2271 {
2272   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2273   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
2274 
2275   PetscFunctionBegin;
2276   if (nghosts) *nghosts = B->nbs;
2277   if (ghosts) *ghosts = baij->garray;
2278   PetscFunctionReturn(0);
2279 }
2280 
2281 #undef __FUNCT__
2282 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ"
2283 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2284 {
2285   Mat            B;
2286   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2287   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2288   Mat_SeqAIJ     *b;
2289   PetscErrorCode ierr;
2290   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2291   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2292   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2293 
2294   PetscFunctionBegin;
2295   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2296   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2297 
2298   /* ----------------------------------------------------------------
2299      Tell every processor the number of nonzeros per row
2300   */
2301   ierr = PetscMalloc1((A->rmap->N/bs),&lens);CHKERRQ(ierr);
2302   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2303     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2304   }
2305   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2306   ierr      = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr);
2307   displs    = recvcounts + size;
2308   for (i=0; i<size; i++) {
2309     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2310     displs[i]     = A->rmap->range[i]/bs;
2311   }
2312 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2313   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2314 #else
2315   ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2316 #endif
2317   /* ---------------------------------------------------------------
2318      Create the sequential matrix of the same type as the local block diagonal
2319   */
2320   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2321   ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2322   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2323   ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2324   b    = (Mat_SeqAIJ*)B->data;
2325 
2326   /*--------------------------------------------------------------------
2327     Copy my part of matrix column indices over
2328   */
2329   sendcount  = ad->nz + bd->nz;
2330   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2331   a_jsendbuf = ad->j;
2332   b_jsendbuf = bd->j;
2333   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2334   cnt        = 0;
2335   for (i=0; i<n; i++) {
2336 
2337     /* put in lower diagonal portion */
2338     m = bd->i[i+1] - bd->i[i];
2339     while (m > 0) {
2340       /* is it above diagonal (in bd (compressed) numbering) */
2341       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2342       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2343       m--;
2344     }
2345 
2346     /* put in diagonal portion */
2347     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2348       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2349     }
2350 
2351     /* put in upper diagonal portion */
2352     while (m-- > 0) {
2353       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2354     }
2355   }
2356   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2357 
2358   /*--------------------------------------------------------------------
2359     Gather all column indices to all processors
2360   */
2361   for (i=0; i<size; i++) {
2362     recvcounts[i] = 0;
2363     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2364       recvcounts[i] += lens[j];
2365     }
2366   }
2367   displs[0] = 0;
2368   for (i=1; i<size; i++) {
2369     displs[i] = displs[i-1] + recvcounts[i-1];
2370   }
2371 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2372   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2373 #else
2374   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2375 #endif
2376   /*--------------------------------------------------------------------
2377     Assemble the matrix into useable form (note numerical values not yet set)
2378   */
2379   /* set the b->ilen (length of each row) values */
2380   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2381   /* set the b->i indices */
2382   b->i[0] = 0;
2383   for (i=1; i<=A->rmap->N/bs; i++) {
2384     b->i[i] = b->i[i-1] + lens[i-1];
2385   }
2386   ierr = PetscFree(lens);CHKERRQ(ierr);
2387   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2388   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2389   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2390 
2391   if (A->symmetric) {
2392     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2393   } else if (A->hermitian) {
2394     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2395   } else if (A->structurally_symmetric) {
2396     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2397   }
2398   *newmat = B;
2399   PetscFunctionReturn(0);
2400 }
2401 
2402 #undef __FUNCT__
2403 #define __FUNCT__ "MatSOR_MPIBAIJ"
2404 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2405 {
2406   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2407   PetscErrorCode ierr;
2408   Vec            bb1 = 0;
2409 
2410   PetscFunctionBegin;
2411   if (flag == SOR_APPLY_UPPER) {
2412     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2413     PetscFunctionReturn(0);
2414   }
2415 
2416   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2417     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2418   }
2419 
2420   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2421     if (flag & SOR_ZERO_INITIAL_GUESS) {
2422       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2423       its--;
2424     }
2425 
2426     while (its--) {
2427       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2428       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2429 
2430       /* update rhs: bb1 = bb - B*x */
2431       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2432       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2433 
2434       /* local sweep */
2435       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2436     }
2437   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2438     if (flag & SOR_ZERO_INITIAL_GUESS) {
2439       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2440       its--;
2441     }
2442     while (its--) {
2443       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2444       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2445 
2446       /* update rhs: bb1 = bb - B*x */
2447       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2448       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2449 
2450       /* local sweep */
2451       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2452     }
2453   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2454     if (flag & SOR_ZERO_INITIAL_GUESS) {
2455       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2456       its--;
2457     }
2458     while (its--) {
2459       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2460       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2461 
2462       /* update rhs: bb1 = bb - B*x */
2463       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2464       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2465 
2466       /* local sweep */
2467       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2468     }
2469   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2470 
2471   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2472   PetscFunctionReturn(0);
2473 }
2474 
2475 #undef __FUNCT__
2476 #define __FUNCT__ "MatGetColumnNorms_MPIBAIJ"
2477 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms)
2478 {
2479   PetscErrorCode ierr;
2480   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)A->data;
2481   PetscInt       N,i,*garray = aij->garray;
2482   PetscInt       ib,jb,bs = A->rmap->bs;
2483   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) aij->A->data;
2484   MatScalar      *a_val = a_aij->a;
2485   Mat_SeqBAIJ    *b_aij = (Mat_SeqBAIJ*) aij->B->data;
2486   MatScalar      *b_val = b_aij->a;
2487   PetscReal      *work;
2488 
2489   PetscFunctionBegin;
2490   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
2491   ierr = PetscCalloc1(N,&work);CHKERRQ(ierr);
2492   if (type == NORM_2) {
2493     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2494       for (jb=0; jb<bs; jb++) {
2495         for (ib=0; ib<bs; ib++) {
2496           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2497           a_val++;
2498         }
2499       }
2500     }
2501     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2502       for (jb=0; jb<bs; jb++) {
2503         for (ib=0; ib<bs; ib++) {
2504           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2505           b_val++;
2506         }
2507       }
2508     }
2509   } else if (type == NORM_1) {
2510     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2511       for (jb=0; jb<bs; jb++) {
2512         for (ib=0; ib<bs; ib++) {
2513           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2514           a_val++;
2515         }
2516       }
2517     }
2518     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2519       for (jb=0; jb<bs; jb++) {
2520        for (ib=0; ib<bs; ib++) {
2521           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2522           b_val++;
2523         }
2524       }
2525     }
2526   } else if (type == NORM_INFINITY) {
2527     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2528       for (jb=0; jb<bs; jb++) {
2529         for (ib=0; ib<bs; ib++) {
2530           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2531           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2532           a_val++;
2533         }
2534       }
2535     }
2536     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2537       for (jb=0; jb<bs; jb++) {
2538         for (ib=0; ib<bs; ib++) {
2539           int col = garray[b_aij->j[i]] * bs + jb;
2540           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2541           b_val++;
2542         }
2543       }
2544     }
2545   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
2546   if (type == NORM_INFINITY) {
2547     ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2548   } else {
2549     ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2550   }
2551   ierr = PetscFree(work);CHKERRQ(ierr);
2552   if (type == NORM_2) {
2553     for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]);
2554   }
2555   PetscFunctionReturn(0);
2556 }
2557 
2558 #undef __FUNCT__
2559 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ"
2560 PetscErrorCode  MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2561 {
2562   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2563   PetscErrorCode ierr;
2564 
2565   PetscFunctionBegin;
2566   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2567   PetscFunctionReturn(0);
2568 }
2569 
2570 
2571 /* -------------------------------------------------------------------*/
2572 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2573                                        MatGetRow_MPIBAIJ,
2574                                        MatRestoreRow_MPIBAIJ,
2575                                        MatMult_MPIBAIJ,
2576                                 /* 4*/ MatMultAdd_MPIBAIJ,
2577                                        MatMultTranspose_MPIBAIJ,
2578                                        MatMultTransposeAdd_MPIBAIJ,
2579                                        0,
2580                                        0,
2581                                        0,
2582                                 /*10*/ 0,
2583                                        0,
2584                                        0,
2585                                        MatSOR_MPIBAIJ,
2586                                        MatTranspose_MPIBAIJ,
2587                                 /*15*/ MatGetInfo_MPIBAIJ,
2588                                        MatEqual_MPIBAIJ,
2589                                        MatGetDiagonal_MPIBAIJ,
2590                                        MatDiagonalScale_MPIBAIJ,
2591                                        MatNorm_MPIBAIJ,
2592                                 /*20*/ MatAssemblyBegin_MPIBAIJ,
2593                                        MatAssemblyEnd_MPIBAIJ,
2594                                        MatSetOption_MPIBAIJ,
2595                                        MatZeroEntries_MPIBAIJ,
2596                                 /*24*/ MatZeroRows_MPIBAIJ,
2597                                        0,
2598                                        0,
2599                                        0,
2600                                        0,
2601                                 /*29*/ MatSetUp_MPIBAIJ,
2602                                        0,
2603                                        0,
2604                                        0,
2605                                        0,
2606                                 /*34*/ MatDuplicate_MPIBAIJ,
2607                                        0,
2608                                        0,
2609                                        0,
2610                                        0,
2611                                 /*39*/ MatAXPY_MPIBAIJ,
2612                                        MatGetSubMatrices_MPIBAIJ,
2613                                        MatIncreaseOverlap_MPIBAIJ,
2614                                        MatGetValues_MPIBAIJ,
2615                                        MatCopy_MPIBAIJ,
2616                                 /*44*/ 0,
2617                                        MatScale_MPIBAIJ,
2618                                        0,
2619                                        0,
2620                                        MatZeroRowsColumns_MPIBAIJ,
2621                                 /*49*/ 0,
2622                                        0,
2623                                        0,
2624                                        0,
2625                                        0,
2626                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2627                                        0,
2628                                        MatSetUnfactored_MPIBAIJ,
2629                                        MatPermute_MPIBAIJ,
2630                                        MatSetValuesBlocked_MPIBAIJ,
2631                                 /*59*/ MatGetSubMatrix_MPIBAIJ,
2632                                        MatDestroy_MPIBAIJ,
2633                                        MatView_MPIBAIJ,
2634                                        0,
2635                                        0,
2636                                 /*64*/ 0,
2637                                        0,
2638                                        0,
2639                                        0,
2640                                        0,
2641                                 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2642                                        0,
2643                                        0,
2644                                        0,
2645                                        0,
2646                                 /*74*/ 0,
2647                                        MatFDColoringApply_BAIJ,
2648                                        0,
2649                                        0,
2650                                        0,
2651                                 /*79*/ 0,
2652                                        0,
2653                                        0,
2654                                        0,
2655                                        MatLoad_MPIBAIJ,
2656                                 /*84*/ 0,
2657                                        0,
2658                                        0,
2659                                        0,
2660                                        0,
2661                                 /*89*/ 0,
2662                                        0,
2663                                        0,
2664                                        0,
2665                                        0,
2666                                 /*94*/ 0,
2667                                        0,
2668                                        0,
2669                                        0,
2670                                        0,
2671                                 /*99*/ 0,
2672                                        0,
2673                                        0,
2674                                        0,
2675                                        0,
2676                                 /*104*/0,
2677                                        MatRealPart_MPIBAIJ,
2678                                        MatImaginaryPart_MPIBAIJ,
2679                                        0,
2680                                        0,
2681                                 /*109*/0,
2682                                        0,
2683                                        0,
2684                                        0,
2685                                        0,
2686                                 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2687                                        0,
2688                                        MatGetGhosts_MPIBAIJ,
2689                                        0,
2690                                        0,
2691                                 /*119*/0,
2692                                        0,
2693                                        0,
2694                                        0,
2695                                        MatGetMultiProcBlock_MPIBAIJ,
2696                                 /*124*/0,
2697                                        MatGetColumnNorms_MPIBAIJ,
2698                                        MatInvertBlockDiagonal_MPIBAIJ,
2699                                        0,
2700                                        0,
2701                                /*129*/ 0,
2702                                        0,
2703                                        0,
2704                                        0,
2705                                        0,
2706                                /*134*/ 0,
2707                                        0,
2708                                        0,
2709                                        0,
2710                                        0,
2711                                /*139*/ 0,
2712                                        0,
2713                                        0,
2714                                        MatFDColoringSetUp_MPIXAIJ,
2715                                        0,
2716                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ
2717 };
2718 
2719 #undef __FUNCT__
2720 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2721 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2722 {
2723   PetscFunctionBegin;
2724   *a = ((Mat_MPIBAIJ*)A->data)->A;
2725   PetscFunctionReturn(0);
2726 }
2727 
2728 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2729 
2730 #undef __FUNCT__
2731 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2732 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2733 {
2734   PetscInt       m,rstart,cstart,cend;
2735   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2736   const PetscInt *JJ    =0;
2737   PetscScalar    *values=0;
2738   PetscBool      roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2739   PetscErrorCode ierr;
2740 
2741   PetscFunctionBegin;
2742   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2743   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2744   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2745   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2746   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2747   m      = B->rmap->n/bs;
2748   rstart = B->rmap->rstart/bs;
2749   cstart = B->cmap->rstart/bs;
2750   cend   = B->cmap->rend/bs;
2751 
2752   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2753   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2754   for (i=0; i<m; i++) {
2755     nz = ii[i+1] - ii[i];
2756     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2757     nz_max = PetscMax(nz_max,nz);
2758     JJ     = jj + ii[i];
2759     for (j=0; j<nz; j++) {
2760       if (*JJ >= cstart) break;
2761       JJ++;
2762     }
2763     d = 0;
2764     for (; j<nz; j++) {
2765       if (*JJ++ >= cend) break;
2766       d++;
2767     }
2768     d_nnz[i] = d;
2769     o_nnz[i] = nz - d;
2770   }
2771   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2772   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2773 
2774   values = (PetscScalar*)V;
2775   if (!values) {
2776     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2777     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2778   }
2779   for (i=0; i<m; i++) {
2780     PetscInt          row    = i + rstart;
2781     PetscInt          ncols  = ii[i+1] - ii[i];
2782     const PetscInt    *icols = jj + ii[i];
2783     if (!roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2784       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2785       ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2786     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2787       PetscInt j;
2788       for (j=0; j<ncols; j++) {
2789         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2790         ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2791       }
2792     }
2793   }
2794 
2795   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2796   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2797   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2798   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2799   PetscFunctionReturn(0);
2800 }
2801 
2802 #undef __FUNCT__
2803 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2804 /*@C
2805    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2806    (the default parallel PETSc format).
2807 
2808    Collective on MPI_Comm
2809 
2810    Input Parameters:
2811 +  B - the matrix
2812 .  bs - the block size
2813 .  i - the indices into j for the start of each local row (starts with zero)
2814 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2815 -  v - optional values in the matrix
2816 
2817    Level: developer
2818 
2819    Notes: The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
2820    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
2821    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2822    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2823    block column and the second index is over columns within a block.
2824 
2825 .keywords: matrix, aij, compressed row, sparse, parallel
2826 
2827 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ
2828 @*/
2829 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2830 {
2831   PetscErrorCode ierr;
2832 
2833   PetscFunctionBegin;
2834   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2835   PetscValidType(B,1);
2836   PetscValidLogicalCollectiveInt(B,bs,2);
2837   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2838   PetscFunctionReturn(0);
2839 }
2840 
2841 #undef __FUNCT__
2842 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2843 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2844 {
2845   Mat_MPIBAIJ    *b;
2846   PetscErrorCode ierr;
2847   PetscInt       i;
2848 
2849   PetscFunctionBegin;
2850   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2851   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2852   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2853   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2854 
2855   if (d_nnz) {
2856     for (i=0; i<B->rmap->n/bs; i++) {
2857       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2858     }
2859   }
2860   if (o_nnz) {
2861     for (i=0; i<B->rmap->n/bs; i++) {
2862       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2863     }
2864   }
2865 
2866   b      = (Mat_MPIBAIJ*)B->data;
2867   b->bs2 = bs*bs;
2868   b->mbs = B->rmap->n/bs;
2869   b->nbs = B->cmap->n/bs;
2870   b->Mbs = B->rmap->N/bs;
2871   b->Nbs = B->cmap->N/bs;
2872 
2873   for (i=0; i<=b->size; i++) {
2874     b->rangebs[i] = B->rmap->range[i]/bs;
2875   }
2876   b->rstartbs = B->rmap->rstart/bs;
2877   b->rendbs   = B->rmap->rend/bs;
2878   b->cstartbs = B->cmap->rstart/bs;
2879   b->cendbs   = B->cmap->rend/bs;
2880 
2881   if (!B->preallocated) {
2882     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2883     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2884     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2885     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2886     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2887     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2888     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2889     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2890     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2891   }
2892 
2893   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2894   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2895   B->preallocated = PETSC_TRUE;
2896   PetscFunctionReturn(0);
2897 }
2898 
2899 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2900 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2901 
2902 #undef __FUNCT__
2903 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
2904 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
2905 {
2906   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2907   PetscErrorCode ierr;
2908   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2909   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2910   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2911 
2912   PetscFunctionBegin;
2913   ierr  = PetscMalloc1((M+1),&ii);CHKERRQ(ierr);
2914   ii[0] = 0;
2915   for (i=0; i<M; i++) {
2916     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2917     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
2918     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2919     /* remove one from count of matrix has diagonal */
2920     for (j=id[i]; j<id[i+1]; j++) {
2921       if (jd[j] == i) {ii[i+1]--;break;}
2922     }
2923   }
2924   ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr);
2925   cnt  = 0;
2926   for (i=0; i<M; i++) {
2927     for (j=io[i]; j<io[i+1]; j++) {
2928       if (garray[jo[j]] > rstart) break;
2929       jj[cnt++] = garray[jo[j]];
2930     }
2931     for (k=id[i]; k<id[i+1]; k++) {
2932       if (jd[k] != i) {
2933         jj[cnt++] = rstart + jd[k];
2934       }
2935     }
2936     for (; j<io[i+1]; j++) {
2937       jj[cnt++] = garray[jo[j]];
2938     }
2939   }
2940   ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr);
2941   PetscFunctionReturn(0);
2942 }
2943 
2944 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2945 
2946 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
2947 
2948 #undef __FUNCT__
2949 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ"
2950 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
2951 {
2952   PetscErrorCode ierr;
2953   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2954   Mat            B;
2955   Mat_MPIAIJ     *b;
2956 
2957   PetscFunctionBegin;
2958   if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
2959 
2960   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2961   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2962   ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2963   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
2964   ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
2965   b    = (Mat_MPIAIJ*) B->data;
2966 
2967   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
2968   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2969   ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
2970   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
2971   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
2972   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2973   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2974   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2975   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2976   if (reuse == MAT_REUSE_MATRIX) {
2977     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
2978   } else {
2979    *newmat = B;
2980   }
2981   PetscFunctionReturn(0);
2982 }
2983 
2984 /*MC
2985    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2986 
2987    Options Database Keys:
2988 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2989 . -mat_block_size <bs> - set the blocksize used to store the matrix
2990 - -mat_use_hash_table <fact>
2991 
2992   Level: beginner
2993 
2994 .seealso: MatCreateMPIBAIJ
2995 M*/
2996 
2997 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
2998 
2999 #undef __FUNCT__
3000 #define __FUNCT__ "MatCreate_MPIBAIJ"
3001 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
3002 {
3003   Mat_MPIBAIJ    *b;
3004   PetscErrorCode ierr;
3005   PetscBool      flg = PETSC_FALSE;
3006 
3007   PetscFunctionBegin;
3008   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3009   B->data = (void*)b;
3010 
3011   ierr         = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3012   B->assembled = PETSC_FALSE;
3013 
3014   B->insertmode = NOT_SET_VALUES;
3015   ierr          = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
3016   ierr          = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
3017 
3018   /* build local table of row and column ownerships */
3019   ierr = PetscMalloc1((b->size+1),&b->rangebs);CHKERRQ(ierr);
3020 
3021   /* build cache for off array entries formed */
3022   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
3023 
3024   b->donotstash  = PETSC_FALSE;
3025   b->colmap      = NULL;
3026   b->garray      = NULL;
3027   b->roworiented = PETSC_TRUE;
3028 
3029   /* stuff used in block assembly */
3030   b->barray = 0;
3031 
3032   /* stuff used for matrix vector multiply */
3033   b->lvec  = 0;
3034   b->Mvctx = 0;
3035 
3036   /* stuff for MatGetRow() */
3037   b->rowindices   = 0;
3038   b->rowvalues    = 0;
3039   b->getrowactive = PETSC_FALSE;
3040 
3041   /* hash table stuff */
3042   b->ht           = 0;
3043   b->hd           = 0;
3044   b->ht_size      = 0;
3045   b->ht_flag      = PETSC_FALSE;
3046   b->ht_fact      = 0;
3047   b->ht_total_ct  = 0;
3048   b->ht_insert_ct = 0;
3049 
3050   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
3051   b->ijonly = PETSC_FALSE;
3052 
3053 
3054   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3055   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3056   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3057   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3058   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3059   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3060   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3061   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3062   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3063   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3064   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr);
3065   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3066 
3067   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3068   ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",flg,&flg,NULL);CHKERRQ(ierr);
3069   if (flg) {
3070     PetscReal fact = 1.39;
3071     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3072     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
3073     if (fact <= 1.0) fact = 1.39;
3074     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3075     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3076   }
3077   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3078   PetscFunctionReturn(0);
3079 }
3080 
3081 /*MC
3082    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3083 
3084    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3085    and MATMPIBAIJ otherwise.
3086 
3087    Options Database Keys:
3088 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3089 
3090   Level: beginner
3091 
3092 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3093 M*/
3094 
3095 #undef __FUNCT__
3096 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3097 /*@C
3098    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3099    (block compressed row).  For good matrix assembly performance
3100    the user should preallocate the matrix storage by setting the parameters
3101    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3102    performance can be increased by more than a factor of 50.
3103 
3104    Collective on Mat
3105 
3106    Input Parameters:
3107 +  B - the matrix
3108 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3109           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3110 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3111            submatrix  (same for all local rows)
3112 .  d_nnz - array containing the number of block nonzeros in the various block rows
3113            of the in diagonal portion of the local (possibly different for each block
3114            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3115            set it even if it is zero.
3116 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3117            submatrix (same for all local rows).
3118 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3119            off-diagonal portion of the local submatrix (possibly different for
3120            each block row) or NULL.
3121 
3122    If the *_nnz parameter is given then the *_nz parameter is ignored
3123 
3124    Options Database Keys:
3125 +   -mat_block_size - size of the blocks to use
3126 -   -mat_use_hash_table <fact>
3127 
3128    Notes:
3129    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3130    than it must be used on all processors that share the object for that argument.
3131 
3132    Storage Information:
3133    For a square global matrix we define each processor's diagonal portion
3134    to be its local rows and the corresponding columns (a square submatrix);
3135    each processor's off-diagonal portion encompasses the remainder of the
3136    local matrix (a rectangular submatrix).
3137 
3138    The user can specify preallocated storage for the diagonal part of
3139    the local submatrix with either d_nz or d_nnz (not both).  Set
3140    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3141    memory allocation.  Likewise, specify preallocated storage for the
3142    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3143 
3144    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3145    the figure below we depict these three local rows and all columns (0-11).
3146 
3147 .vb
3148            0 1 2 3 4 5 6 7 8 9 10 11
3149           --------------------------
3150    row 3  |o o o d d d o o o o  o  o
3151    row 4  |o o o d d d o o o o  o  o
3152    row 5  |o o o d d d o o o o  o  o
3153           --------------------------
3154 .ve
3155 
3156    Thus, any entries in the d locations are stored in the d (diagonal)
3157    submatrix, and any entries in the o locations are stored in the
3158    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3159    stored simply in the MATSEQBAIJ format for compressed row storage.
3160 
3161    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3162    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3163    In general, for PDE problems in which most nonzeros are near the diagonal,
3164    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3165    or you will get TERRIBLE performance; see the users' manual chapter on
3166    matrices.
3167 
3168    You can call MatGetInfo() to get information on how effective the preallocation was;
3169    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3170    You can also run with the option -info and look for messages with the string
3171    malloc in them to see if additional memory allocation was needed.
3172 
3173    Level: intermediate
3174 
3175 .keywords: matrix, block, aij, compressed row, sparse, parallel
3176 
3177 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3178 @*/
3179 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3180 {
3181   PetscErrorCode ierr;
3182 
3183   PetscFunctionBegin;
3184   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3185   PetscValidType(B,1);
3186   PetscValidLogicalCollectiveInt(B,bs,2);
3187   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3188   PetscFunctionReturn(0);
3189 }
3190 
3191 #undef __FUNCT__
3192 #define __FUNCT__ "MatCreateBAIJ"
3193 /*@C
3194    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3195    (block compressed row).  For good matrix assembly performance
3196    the user should preallocate the matrix storage by setting the parameters
3197    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3198    performance can be increased by more than a factor of 50.
3199 
3200    Collective on MPI_Comm
3201 
3202    Input Parameters:
3203 +  comm - MPI communicator
3204 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3205           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3206 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3207            This value should be the same as the local size used in creating the
3208            y vector for the matrix-vector product y = Ax.
3209 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3210            This value should be the same as the local size used in creating the
3211            x vector for the matrix-vector product y = Ax.
3212 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3213 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3214 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3215            submatrix  (same for all local rows)
3216 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3217            of the in diagonal portion of the local (possibly different for each block
3218            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3219            and set it even if it is zero.
3220 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3221            submatrix (same for all local rows).
3222 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3223            off-diagonal portion of the local submatrix (possibly different for
3224            each block row) or NULL.
3225 
3226    Output Parameter:
3227 .  A - the matrix
3228 
3229    Options Database Keys:
3230 +   -mat_block_size - size of the blocks to use
3231 -   -mat_use_hash_table <fact>
3232 
3233    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3234    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3235    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3236 
3237    Notes:
3238    If the *_nnz parameter is given then the *_nz parameter is ignored
3239 
3240    A nonzero block is any block that as 1 or more nonzeros in it
3241 
3242    The user MUST specify either the local or global matrix dimensions
3243    (possibly both).
3244 
3245    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3246    than it must be used on all processors that share the object for that argument.
3247 
3248    Storage Information:
3249    For a square global matrix we define each processor's diagonal portion
3250    to be its local rows and the corresponding columns (a square submatrix);
3251    each processor's off-diagonal portion encompasses the remainder of the
3252    local matrix (a rectangular submatrix).
3253 
3254    The user can specify preallocated storage for the diagonal part of
3255    the local submatrix with either d_nz or d_nnz (not both).  Set
3256    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3257    memory allocation.  Likewise, specify preallocated storage for the
3258    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3259 
3260    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3261    the figure below we depict these three local rows and all columns (0-11).
3262 
3263 .vb
3264            0 1 2 3 4 5 6 7 8 9 10 11
3265           --------------------------
3266    row 3  |o o o d d d o o o o  o  o
3267    row 4  |o o o d d d o o o o  o  o
3268    row 5  |o o o d d d o o o o  o  o
3269           --------------------------
3270 .ve
3271 
3272    Thus, any entries in the d locations are stored in the d (diagonal)
3273    submatrix, and any entries in the o locations are stored in the
3274    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3275    stored simply in the MATSEQBAIJ format for compressed row storage.
3276 
3277    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3278    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3279    In general, for PDE problems in which most nonzeros are near the diagonal,
3280    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3281    or you will get TERRIBLE performance; see the users' manual chapter on
3282    matrices.
3283 
3284    Level: intermediate
3285 
3286 .keywords: matrix, block, aij, compressed row, sparse, parallel
3287 
3288 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3289 @*/
3290 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3291 {
3292   PetscErrorCode ierr;
3293   PetscMPIInt    size;
3294 
3295   PetscFunctionBegin;
3296   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3297   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3298   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3299   if (size > 1) {
3300     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3301     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3302   } else {
3303     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3304     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3305   }
3306   PetscFunctionReturn(0);
3307 }
3308 
3309 #undef __FUNCT__
3310 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3311 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3312 {
3313   Mat            mat;
3314   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3315   PetscErrorCode ierr;
3316   PetscInt       len=0;
3317 
3318   PetscFunctionBegin;
3319   *newmat = 0;
3320   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3321   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3322   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3323   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3324 
3325   mat->factortype   = matin->factortype;
3326   mat->preallocated = PETSC_TRUE;
3327   mat->assembled    = PETSC_TRUE;
3328   mat->insertmode   = NOT_SET_VALUES;
3329 
3330   a             = (Mat_MPIBAIJ*)mat->data;
3331   mat->rmap->bs = matin->rmap->bs;
3332   a->bs2        = oldmat->bs2;
3333   a->mbs        = oldmat->mbs;
3334   a->nbs        = oldmat->nbs;
3335   a->Mbs        = oldmat->Mbs;
3336   a->Nbs        = oldmat->Nbs;
3337 
3338   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3339   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3340 
3341   a->size         = oldmat->size;
3342   a->rank         = oldmat->rank;
3343   a->donotstash   = oldmat->donotstash;
3344   a->roworiented  = oldmat->roworiented;
3345   a->rowindices   = 0;
3346   a->rowvalues    = 0;
3347   a->getrowactive = PETSC_FALSE;
3348   a->barray       = 0;
3349   a->rstartbs     = oldmat->rstartbs;
3350   a->rendbs       = oldmat->rendbs;
3351   a->cstartbs     = oldmat->cstartbs;
3352   a->cendbs       = oldmat->cendbs;
3353 
3354   /* hash table stuff */
3355   a->ht           = 0;
3356   a->hd           = 0;
3357   a->ht_size      = 0;
3358   a->ht_flag      = oldmat->ht_flag;
3359   a->ht_fact      = oldmat->ht_fact;
3360   a->ht_total_ct  = 0;
3361   a->ht_insert_ct = 0;
3362 
3363   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3364   if (oldmat->colmap) {
3365 #if defined(PETSC_USE_CTABLE)
3366     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3367 #else
3368     ierr = PetscMalloc1((a->Nbs),&a->colmap);CHKERRQ(ierr);
3369     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3370     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3371 #endif
3372   } else a->colmap = 0;
3373 
3374   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3375     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
3376     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3377     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3378   } else a->garray = 0;
3379 
3380   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3381   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3382   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3383   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3384   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3385 
3386   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3387   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3388   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3389   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3390   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3391   *newmat = mat;
3392   PetscFunctionReturn(0);
3393 }
3394 
3395 #undef __FUNCT__
3396 #define __FUNCT__ "MatLoad_MPIBAIJ"
3397 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3398 {
3399   PetscErrorCode ierr;
3400   int            fd;
3401   PetscInt       i,nz,j,rstart,rend;
3402   PetscScalar    *vals,*buf;
3403   MPI_Comm       comm;
3404   MPI_Status     status;
3405   PetscMPIInt    rank,size,maxnz;
3406   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3407   PetscInt       *locrowlens = NULL,*procsnz = NULL,*browners = NULL;
3408   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3409   PetscMPIInt    tag    = ((PetscObject)viewer)->tag;
3410   PetscInt       *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount;
3411   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3412 
3413   PetscFunctionBegin;
3414   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3415   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3416   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3417   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3418 
3419   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3420   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3421   if (!rank) {
3422     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3423     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3424     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3425   }
3426 
3427   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3428 
3429   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3430   M    = header[1]; N = header[2];
3431 
3432   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3433   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3434   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3435 
3436   /* If global sizes are set, check if they are consistent with that given in the file */
3437   if (sizesset) {
3438     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3439   }
3440   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3441   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3442 
3443   if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices");
3444 
3445   /*
3446      This code adds extra rows to make sure the number of rows is
3447      divisible by the blocksize
3448   */
3449   Mbs        = M/bs;
3450   extra_rows = bs - M + bs*Mbs;
3451   if (extra_rows == bs) extra_rows = 0;
3452   else                  Mbs++;
3453   if (extra_rows && !rank) {
3454     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3455   }
3456 
3457   /* determine ownership of all rows */
3458   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3459     mbs = Mbs/size + ((Mbs % size) > rank);
3460     m   = mbs*bs;
3461   } else { /* User set */
3462     m   = newmat->rmap->n;
3463     mbs = m/bs;
3464   }
3465   ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
3466   ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3467 
3468   /* process 0 needs enough room for process with most rows */
3469   if (!rank) {
3470     mmax = rowners[1];
3471     for (i=2; i<=size; i++) {
3472       mmax = PetscMax(mmax,rowners[i]);
3473     }
3474     mmax*=bs;
3475   } else mmax = -1;             /* unused, but compiler warns anyway */
3476 
3477   rowners[0] = 0;
3478   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
3479   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
3480   rstart = rowners[rank];
3481   rend   = rowners[rank+1];
3482 
3483   /* distribute row lengths to all processors */
3484   ierr = PetscMalloc1(m,&locrowlens);CHKERRQ(ierr);
3485   if (!rank) {
3486     mend = m;
3487     if (size == 1) mend = mend - extra_rows;
3488     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3489     for (j=mend; j<m; j++) locrowlens[j] = 1;
3490     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3491     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3492     for (j=0; j<m; j++) {
3493       procsnz[0] += locrowlens[j];
3494     }
3495     for (i=1; i<size; i++) {
3496       mend = browners[i+1] - browners[i];
3497       if (i == size-1) mend = mend - extra_rows;
3498       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3499       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3500       /* calculate the number of nonzeros on each processor */
3501       for (j=0; j<browners[i+1]-browners[i]; j++) {
3502         procsnz[i] += rowlengths[j];
3503       }
3504       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3505     }
3506     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3507   } else {
3508     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3509   }
3510 
3511   if (!rank) {
3512     /* determine max buffer needed and allocate it */
3513     maxnz = procsnz[0];
3514     for (i=1; i<size; i++) {
3515       maxnz = PetscMax(maxnz,procsnz[i]);
3516     }
3517     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3518 
3519     /* read in my part of the matrix column indices  */
3520     nz     = procsnz[0];
3521     ierr   = PetscMalloc1((nz+1),&ibuf);CHKERRQ(ierr);
3522     mycols = ibuf;
3523     if (size == 1) nz -= extra_rows;
3524     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3525     if (size == 1) {
3526       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
3527     }
3528 
3529     /* read in every ones (except the last) and ship off */
3530     for (i=1; i<size-1; i++) {
3531       nz   = procsnz[i];
3532       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3533       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3534     }
3535     /* read in the stuff for the last proc */
3536     if (size != 1) {
3537       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3538       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3539       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3540       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3541     }
3542     ierr = PetscFree(cols);CHKERRQ(ierr);
3543   } else {
3544     /* determine buffer space needed for message */
3545     nz = 0;
3546     for (i=0; i<m; i++) {
3547       nz += locrowlens[i];
3548     }
3549     ierr   = PetscMalloc1((nz+1),&ibuf);CHKERRQ(ierr);
3550     mycols = ibuf;
3551     /* receive message of column indices*/
3552     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3553     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3554     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3555   }
3556 
3557   /* loop over local rows, determining number of off diagonal entries */
3558   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
3559   ierr     = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
3560   rowcount = 0; nzcount = 0;
3561   for (i=0; i<mbs; i++) {
3562     dcount  = 0;
3563     odcount = 0;
3564     for (j=0; j<bs; j++) {
3565       kmax = locrowlens[rowcount];
3566       for (k=0; k<kmax; k++) {
3567         tmp = mycols[nzcount++]/bs;
3568         if (!mask[tmp]) {
3569           mask[tmp] = 1;
3570           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3571           else masked1[dcount++] = tmp;
3572         }
3573       }
3574       rowcount++;
3575     }
3576 
3577     dlens[i]  = dcount;
3578     odlens[i] = odcount;
3579 
3580     /* zero out the mask elements we set */
3581     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3582     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3583   }
3584 
3585 
3586   if (!sizesset) {
3587     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3588   }
3589   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3590 
3591   if (!rank) {
3592     ierr = PetscMalloc1((maxnz+1),&buf);CHKERRQ(ierr);
3593     /* read in my part of the matrix numerical values  */
3594     nz     = procsnz[0];
3595     vals   = buf;
3596     mycols = ibuf;
3597     if (size == 1) nz -= extra_rows;
3598     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3599     if (size == 1) {
3600       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
3601     }
3602 
3603     /* insert into matrix */
3604     jj = rstart*bs;
3605     for (i=0; i<m; i++) {
3606       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3607       mycols += locrowlens[i];
3608       vals   += locrowlens[i];
3609       jj++;
3610     }
3611     /* read in other processors (except the last one) and ship out */
3612     for (i=1; i<size-1; i++) {
3613       nz   = procsnz[i];
3614       vals = buf;
3615       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3616       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3617     }
3618     /* the last proc */
3619     if (size != 1) {
3620       nz   = procsnz[i] - extra_rows;
3621       vals = buf;
3622       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3623       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3624       ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3625     }
3626     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3627   } else {
3628     /* receive numeric values */
3629     ierr = PetscMalloc1((nz+1),&buf);CHKERRQ(ierr);
3630 
3631     /* receive message of values*/
3632     vals   = buf;
3633     mycols = ibuf;
3634     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3635 
3636     /* insert into matrix */
3637     jj = rstart*bs;
3638     for (i=0; i<m; i++) {
3639       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3640       mycols += locrowlens[i];
3641       vals   += locrowlens[i];
3642       jj++;
3643     }
3644   }
3645   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3646   ierr = PetscFree(buf);CHKERRQ(ierr);
3647   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3648   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3649   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3650   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3651   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3652   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3653   PetscFunctionReturn(0);
3654 }
3655 
3656 #undef __FUNCT__
3657 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3658 /*@
3659    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3660 
3661    Input Parameters:
3662 .  mat  - the matrix
3663 .  fact - factor
3664 
3665    Not Collective, each process can use a different factor
3666 
3667    Level: advanced
3668 
3669   Notes:
3670    This can also be set by the command line option: -mat_use_hash_table <fact>
3671 
3672 .keywords: matrix, hashtable, factor, HT
3673 
3674 .seealso: MatSetOption()
3675 @*/
3676 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3677 {
3678   PetscErrorCode ierr;
3679 
3680   PetscFunctionBegin;
3681   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3682   PetscFunctionReturn(0);
3683 }
3684 
3685 #undef __FUNCT__
3686 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3687 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3688 {
3689   Mat_MPIBAIJ *baij;
3690 
3691   PetscFunctionBegin;
3692   baij          = (Mat_MPIBAIJ*)mat->data;
3693   baij->ht_fact = fact;
3694   PetscFunctionReturn(0);
3695 }
3696 
3697 #undef __FUNCT__
3698 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3699 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3700 {
3701   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
3702 
3703   PetscFunctionBegin;
3704   if (Ad)     *Ad     = a->A;
3705   if (Ao)     *Ao     = a->B;
3706   if (colmap) *colmap = a->garray;
3707   PetscFunctionReturn(0);
3708 }
3709 
3710 /*
3711     Special version for direct calls from Fortran (to eliminate two function call overheads
3712 */
3713 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3714 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3715 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3716 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3717 #endif
3718 
3719 #undef __FUNCT__
3720 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3721 /*@C
3722   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3723 
3724   Collective on Mat
3725 
3726   Input Parameters:
3727 + mat - the matrix
3728 . min - number of input rows
3729 . im - input rows
3730 . nin - number of input columns
3731 . in - input columns
3732 . v - numerical values input
3733 - addvin - INSERT_VALUES or ADD_VALUES
3734 
3735   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3736 
3737   Level: advanced
3738 
3739 .seealso:   MatSetValuesBlocked()
3740 @*/
3741 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3742 {
3743   /* convert input arguments to C version */
3744   Mat        mat  = *matin;
3745   PetscInt   m    = *min, n = *nin;
3746   InsertMode addv = *addvin;
3747 
3748   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3749   const MatScalar *value;
3750   MatScalar       *barray     = baij->barray;
3751   PetscBool       roworiented = baij->roworiented;
3752   PetscErrorCode  ierr;
3753   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3754   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3755   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3756 
3757   PetscFunctionBegin;
3758   /* tasks normally handled by MatSetValuesBlocked() */
3759   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3760 #if defined(PETSC_USE_DEBUG)
3761   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3762   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3763 #endif
3764   if (mat->assembled) {
3765     mat->was_assembled = PETSC_TRUE;
3766     mat->assembled     = PETSC_FALSE;
3767   }
3768   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3769 
3770 
3771   if (!barray) {
3772     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
3773     baij->barray = barray;
3774   }
3775 
3776   if (roworiented) stepval = (n-1)*bs;
3777   else stepval = (m-1)*bs;
3778 
3779   for (i=0; i<m; i++) {
3780     if (im[i] < 0) continue;
3781 #if defined(PETSC_USE_DEBUG)
3782     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3783 #endif
3784     if (im[i] >= rstart && im[i] < rend) {
3785       row = im[i] - rstart;
3786       for (j=0; j<n; j++) {
3787         /* If NumCol = 1 then a copy is not required */
3788         if ((roworiented) && (n == 1)) {
3789           barray = (MatScalar*)v + i*bs2;
3790         } else if ((!roworiented) && (m == 1)) {
3791           barray = (MatScalar*)v + j*bs2;
3792         } else { /* Here a copy is required */
3793           if (roworiented) {
3794             value = v + i*(stepval+bs)*bs + j*bs;
3795           } else {
3796             value = v + j*(stepval+bs)*bs + i*bs;
3797           }
3798           for (ii=0; ii<bs; ii++,value+=stepval) {
3799             for (jj=0; jj<bs; jj++) {
3800               *barray++ = *value++;
3801             }
3802           }
3803           barray -=bs2;
3804         }
3805 
3806         if (in[j] >= cstart && in[j] < cend) {
3807           col  = in[j] - cstart;
3808           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3809         } else if (in[j] < 0) continue;
3810 #if defined(PETSC_USE_DEBUG)
3811         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3812 #endif
3813         else {
3814           if (mat->was_assembled) {
3815             if (!baij->colmap) {
3816               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3817             }
3818 
3819 #if defined(PETSC_USE_DEBUG)
3820 #if defined(PETSC_USE_CTABLE)
3821             { PetscInt data;
3822               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3823               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3824             }
3825 #else
3826             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3827 #endif
3828 #endif
3829 #if defined(PETSC_USE_CTABLE)
3830             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3831             col  = (col - 1)/bs;
3832 #else
3833             col = (baij->colmap[in[j]] - 1)/bs;
3834 #endif
3835             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3836               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3837               col  =  in[j];
3838             }
3839           } else col = in[j];
3840           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3841         }
3842       }
3843     } else {
3844       if (!baij->donotstash) {
3845         if (roworiented) {
3846           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3847         } else {
3848           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3849         }
3850       }
3851     }
3852   }
3853 
3854   /* task normally handled by MatSetValuesBlocked() */
3855   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3856   PetscFunctionReturn(0);
3857 }
3858 
3859 #undef __FUNCT__
3860 #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
3861 /*@
3862      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
3863          CSR format the local rows.
3864 
3865    Collective on MPI_Comm
3866 
3867    Input Parameters:
3868 +  comm - MPI communicator
3869 .  bs - the block size, only a block size of 1 is supported
3870 .  m - number of local rows (Cannot be PETSC_DECIDE)
3871 .  n - This value should be the same as the local size used in creating the
3872        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3873        calculated if N is given) For square matrices n is almost always m.
3874 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3875 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3876 .   i - row indices
3877 .   j - column indices
3878 -   a - matrix values
3879 
3880    Output Parameter:
3881 .   mat - the matrix
3882 
3883    Level: intermediate
3884 
3885    Notes:
3886        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3887      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3888      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3889 
3890      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3891      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3892      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3893      with column-major ordering within blocks.
3894 
3895        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3896 
3897 .keywords: matrix, aij, compressed row, sparse, parallel
3898 
3899 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3900           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3901 @*/
3902 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3903 {
3904   PetscErrorCode ierr;
3905 
3906   PetscFunctionBegin;
3907   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3908   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3909   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3910   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3911   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
3912   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3913   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3914   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3915   PetscFunctionReturn(0);
3916 }
3917 
3918 #undef __FUNCT__
3919 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_MPIBAIJ"
3920 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3921 {
3922   PetscErrorCode ierr;
3923   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3924   PetscInt       *indx;
3925   PetscScalar    *values;
3926 
3927   PetscFunctionBegin;
3928   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3929   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3930     Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inmat->data;
3931     PetscInt       *dnz,*onz,sum,mbs,Nbs;
3932     PetscInt       *bindx,rmax=a->rmax,j;
3933 
3934     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3935     mbs = m/bs; Nbs = N/cbs;
3936     if (n == PETSC_DECIDE) {
3937       ierr = PetscSplitOwnership(comm,&n,&Nbs);CHKERRQ(ierr);
3938     }
3939     /* Check sum(n) = Nbs */
3940     ierr = MPI_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3941     if (sum != Nbs) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns != global columns %d",Nbs);
3942 
3943     ierr    = MPI_Scan(&mbs, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3944     rstart -= mbs;
3945 
3946     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
3947     ierr = MatPreallocateInitialize(comm,mbs,n,dnz,onz);CHKERRQ(ierr);
3948     for (i=0; i<mbs; i++) {
3949       ierr = MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3950       nnz = nnz/bs;
3951       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3952       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3953       ierr = MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3954     }
3955     ierr = PetscFree(bindx);CHKERRQ(ierr);
3956 
3957     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3958     ierr = MatSetSizes(*outmat,m,n*bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3959     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3960     ierr = MatSetType(*outmat,MATMPIBAIJ);CHKERRQ(ierr);
3961     ierr = MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3962     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3963   }
3964 
3965   /* numeric phase */
3966   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3967   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3968 
3969   for (i=0; i<m; i++) {
3970     ierr = MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3971     Ii   = i + rstart;
3972     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3973     ierr = MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3974   }
3975   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3976   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3977   PetscFunctionReturn(0);
3978 }
3979