xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 4de5dcee1ec30f28daae377968e03f120d4a5a2a)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 #include <petscblaslapack.h>
4 #include <petscsf.h>
5 
6 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
7 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat);
8 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
9 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
10 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
11 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
12 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec);
14 
15 #undef __FUNCT__
16 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
17 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
18 {
19   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
20   PetscErrorCode ierr;
21   PetscInt       i,*idxb = 0;
22   PetscScalar    *va,*vb;
23   Vec            vtmp;
24 
25   PetscFunctionBegin;
26   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
27   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
28   if (idx) {
29     for (i=0; i<A->rmap->n; i++) {
30       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
31     }
32   }
33 
34   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
35   if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);}
36   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
37   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
38 
39   for (i=0; i<A->rmap->n; i++) {
40     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
41       va[i] = vb[i];
42       if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);
43     }
44   }
45 
46   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
47   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
48   ierr = PetscFree(idxb);CHKERRQ(ierr);
49   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
50   PetscFunctionReturn(0);
51 }
52 
53 #undef __FUNCT__
54 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
55 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
56 {
57   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
58   PetscErrorCode ierr;
59 
60   PetscFunctionBegin;
61   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
62   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
63   PetscFunctionReturn(0);
64 }
65 
66 #undef __FUNCT__
67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
68 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
69 {
70   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
71   PetscErrorCode ierr;
72 
73   PetscFunctionBegin;
74   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
75   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
76   PetscFunctionReturn(0);
77 }
78 
79 /*
80      Local utility routine that creates a mapping from the global column
81    number to the local number in the off-diagonal part of the local
82    storage of the matrix.  This is done in a non scalable way since the
83    length of colmap equals the global matrix length.
84 */
85 #undef __FUNCT__
86 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private"
87 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
88 {
89   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
90   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
91   PetscErrorCode ierr;
92   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
93 
94   PetscFunctionBegin;
95 #if defined(PETSC_USE_CTABLE)
96   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
97   for (i=0; i<nbs; i++) {
98     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
99   }
100 #else
101   ierr = PetscMalloc1((baij->Nbs+1),&baij->colmap);CHKERRQ(ierr);
102   ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
103   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
104   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
105 #endif
106   PetscFunctionReturn(0);
107 }
108 
109 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
110   { \
111  \
112     brow = row/bs;  \
113     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
114     rmax = aimax[brow]; nrow = ailen[brow]; \
115     bcol = col/bs; \
116     ridx = row % bs; cidx = col % bs; \
117     low  = 0; high = nrow; \
118     while (high-low > 3) { \
119       t = (low+high)/2; \
120       if (rp[t] > bcol) high = t; \
121       else              low  = t; \
122     } \
123     for (_i=low; _i<high; _i++) { \
124       if (rp[_i] > bcol) break; \
125       if (rp[_i] == bcol) { \
126         bap = ap +  bs2*_i + bs*cidx + ridx; \
127         if (addv == ADD_VALUES) *bap += value;  \
128         else                    *bap  = value;  \
129         goto a_noinsert; \
130       } \
131     } \
132     if (a->nonew == 1) goto a_noinsert; \
133     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
134     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
135     N = nrow++ - 1;  \
136     /* shift up all the later entries in this row */ \
137     for (ii=N; ii>=_i; ii--) { \
138       rp[ii+1] = rp[ii]; \
139       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
140     } \
141     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
142     rp[_i]                      = bcol;  \
143     ap[bs2*_i + bs*cidx + ridx] = value;  \
144 a_noinsert:; \
145     ailen[brow] = nrow; \
146   }
147 
148 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
149   { \
150     brow = row/bs;  \
151     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
152     rmax = bimax[brow]; nrow = bilen[brow]; \
153     bcol = col/bs; \
154     ridx = row % bs; cidx = col % bs; \
155     low  = 0; high = nrow; \
156     while (high-low > 3) { \
157       t = (low+high)/2; \
158       if (rp[t] > bcol) high = t; \
159       else              low  = t; \
160     } \
161     for (_i=low; _i<high; _i++) { \
162       if (rp[_i] > bcol) break; \
163       if (rp[_i] == bcol) { \
164         bap = ap +  bs2*_i + bs*cidx + ridx; \
165         if (addv == ADD_VALUES) *bap += value;  \
166         else                    *bap  = value;  \
167         goto b_noinsert; \
168       } \
169     } \
170     if (b->nonew == 1) goto b_noinsert; \
171     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
172     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
173     N = nrow++ - 1;  \
174     /* shift up all the later entries in this row */ \
175     for (ii=N; ii>=_i; ii--) { \
176       rp[ii+1] = rp[ii]; \
177       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
178     } \
179     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
180     rp[_i]                      = bcol;  \
181     ap[bs2*_i + bs*cidx + ridx] = value;  \
182 b_noinsert:; \
183     bilen[brow] = nrow; \
184   }
185 
186 #undef __FUNCT__
187 #define __FUNCT__ "MatSetValues_MPIBAIJ"
188 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
189 {
190   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
191   MatScalar      value;
192   PetscBool      roworiented = baij->roworiented;
193   PetscErrorCode ierr;
194   PetscInt       i,j,row,col;
195   PetscInt       rstart_orig=mat->rmap->rstart;
196   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
197   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
198 
199   /* Some Variables required in the macro */
200   Mat         A     = baij->A;
201   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
202   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
203   MatScalar   *aa   =a->a;
204 
205   Mat         B     = baij->B;
206   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
207   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
208   MatScalar   *ba   =b->a;
209 
210   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
211   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
212   MatScalar *ap,*bap;
213 
214   PetscFunctionBegin;
215   for (i=0; i<m; i++) {
216     if (im[i] < 0) continue;
217 #if defined(PETSC_USE_DEBUG)
218     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
219 #endif
220     if (im[i] >= rstart_orig && im[i] < rend_orig) {
221       row = im[i] - rstart_orig;
222       for (j=0; j<n; j++) {
223         if (in[j] >= cstart_orig && in[j] < cend_orig) {
224           col = in[j] - cstart_orig;
225           if (roworiented) value = v[i*n+j];
226           else             value = v[i+j*m];
227           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
228           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
229         } else if (in[j] < 0) continue;
230 #if defined(PETSC_USE_DEBUG)
231         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
232 #endif
233         else {
234           if (mat->was_assembled) {
235             if (!baij->colmap) {
236               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
237             }
238 #if defined(PETSC_USE_CTABLE)
239             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
240             col  = col - 1;
241 #else
242             col = baij->colmap[in[j]/bs] - 1;
243 #endif
244             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
245               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
246               col  =  in[j];
247               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
248               B    = baij->B;
249               b    = (Mat_SeqBAIJ*)(B)->data;
250               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
251               ba   =b->a;
252             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
253             else col += in[j]%bs;
254           } else col = in[j];
255           if (roworiented) value = v[i*n+j];
256           else             value = v[i+j*m];
257           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
258           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
259         }
260       }
261     } else {
262       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
263       if (!baij->donotstash) {
264         mat->assembled = PETSC_FALSE;
265         if (roworiented) {
266           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
267         } else {
268           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
269         }
270       }
271     }
272   }
273   PetscFunctionReturn(0);
274 }
275 
276 #undef __FUNCT__
277 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
278 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
279 {
280   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
281   const PetscScalar *value;
282   MatScalar         *barray     = baij->barray;
283   PetscBool         roworiented = baij->roworiented;
284   PetscErrorCode    ierr;
285   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
286   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
287   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
288 
289   PetscFunctionBegin;
290   if (!barray) {
291     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
292     baij->barray = barray;
293   }
294 
295   if (roworiented) stepval = (n-1)*bs;
296   else stepval = (m-1)*bs;
297 
298   for (i=0; i<m; i++) {
299     if (im[i] < 0) continue;
300 #if defined(PETSC_USE_DEBUG)
301     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
302 #endif
303     if (im[i] >= rstart && im[i] < rend) {
304       row = im[i] - rstart;
305       for (j=0; j<n; j++) {
306         /* If NumCol = 1 then a copy is not required */
307         if ((roworiented) && (n == 1)) {
308           barray = (MatScalar*)v + i*bs2;
309         } else if ((!roworiented) && (m == 1)) {
310           barray = (MatScalar*)v + j*bs2;
311         } else { /* Here a copy is required */
312           if (roworiented) {
313             value = v + (i*(stepval+bs) + j)*bs;
314           } else {
315             value = v + (j*(stepval+bs) + i)*bs;
316           }
317           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
318             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
319             barray += bs;
320           }
321           barray -= bs2;
322         }
323 
324         if (in[j] >= cstart && in[j] < cend) {
325           col  = in[j] - cstart;
326           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
327         } else if (in[j] < 0) continue;
328 #if defined(PETSC_USE_DEBUG)
329         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
330 #endif
331         else {
332           if (mat->was_assembled) {
333             if (!baij->colmap) {
334               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
335             }
336 
337 #if defined(PETSC_USE_DEBUG)
338 #if defined(PETSC_USE_CTABLE)
339             { PetscInt data;
340               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
341               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
342             }
343 #else
344             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
345 #endif
346 #endif
347 #if defined(PETSC_USE_CTABLE)
348             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
349             col  = (col - 1)/bs;
350 #else
351             col = (baij->colmap[in[j]] - 1)/bs;
352 #endif
353             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
354               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
355               col  =  in[j];
356             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]);
357           } else col = in[j];
358           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
359         }
360       }
361     } else {
362       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
363       if (!baij->donotstash) {
364         if (roworiented) {
365           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
366         } else {
367           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
368         }
369       }
370     }
371   }
372   PetscFunctionReturn(0);
373 }
374 
375 #define HASH_KEY 0.6180339887
376 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
377 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
378 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
379 #undef __FUNCT__
380 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
381 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
382 {
383   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
384   PetscBool      roworiented = baij->roworiented;
385   PetscErrorCode ierr;
386   PetscInt       i,j,row,col;
387   PetscInt       rstart_orig=mat->rmap->rstart;
388   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
389   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
390   PetscReal      tmp;
391   MatScalar      **HD = baij->hd,value;
392 #if defined(PETSC_USE_DEBUG)
393   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
394 #endif
395 
396   PetscFunctionBegin;
397   for (i=0; i<m; i++) {
398 #if defined(PETSC_USE_DEBUG)
399     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
400     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
401 #endif
402     row = im[i];
403     if (row >= rstart_orig && row < rend_orig) {
404       for (j=0; j<n; j++) {
405         col = in[j];
406         if (roworiented) value = v[i*n+j];
407         else             value = v[i+j*m];
408         /* Look up PetscInto the Hash Table */
409         key = (row/bs)*Nbs+(col/bs)+1;
410         h1  = HASH(size,key,tmp);
411 
412 
413         idx = h1;
414 #if defined(PETSC_USE_DEBUG)
415         insert_ct++;
416         total_ct++;
417         if (HT[idx] != key) {
418           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
419           if (idx == size) {
420             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
421             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
422           }
423         }
424 #else
425         if (HT[idx] != key) {
426           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
427           if (idx == size) {
428             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
429             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
430           }
431         }
432 #endif
433         /* A HASH table entry is found, so insert the values at the correct address */
434         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
435         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
436       }
437     } else if (!baij->donotstash) {
438       if (roworiented) {
439         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
440       } else {
441         ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
442       }
443     }
444   }
445 #if defined(PETSC_USE_DEBUG)
446   baij->ht_total_ct  = total_ct;
447   baij->ht_insert_ct = insert_ct;
448 #endif
449   PetscFunctionReturn(0);
450 }
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
454 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
455 {
456   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
457   PetscBool         roworiented = baij->roworiented;
458   PetscErrorCode    ierr;
459   PetscInt          i,j,ii,jj,row,col;
460   PetscInt          rstart=baij->rstartbs;
461   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
462   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
463   PetscReal         tmp;
464   MatScalar         **HD = baij->hd,*baij_a;
465   const PetscScalar *v_t,*value;
466 #if defined(PETSC_USE_DEBUG)
467   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
468 #endif
469 
470   PetscFunctionBegin;
471   if (roworiented) stepval = (n-1)*bs;
472   else stepval = (m-1)*bs;
473 
474   for (i=0; i<m; i++) {
475 #if defined(PETSC_USE_DEBUG)
476     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
477     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
478 #endif
479     row = im[i];
480     v_t = v + i*nbs2;
481     if (row >= rstart && row < rend) {
482       for (j=0; j<n; j++) {
483         col = in[j];
484 
485         /* Look up into the Hash Table */
486         key = row*Nbs+col+1;
487         h1  = HASH(size,key,tmp);
488 
489         idx = h1;
490 #if defined(PETSC_USE_DEBUG)
491         total_ct++;
492         insert_ct++;
493         if (HT[idx] != key) {
494           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
495           if (idx == size) {
496             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
497             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
498           }
499         }
500 #else
501         if (HT[idx] != key) {
502           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
503           if (idx == size) {
504             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
505             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
506           }
507         }
508 #endif
509         baij_a = HD[idx];
510         if (roworiented) {
511           /*value = v + i*(stepval+bs)*bs + j*bs;*/
512           /* value = v + (i*(stepval+bs)+j)*bs; */
513           value = v_t;
514           v_t  += bs;
515           if (addv == ADD_VALUES) {
516             for (ii=0; ii<bs; ii++,value+=stepval) {
517               for (jj=ii; jj<bs2; jj+=bs) {
518                 baij_a[jj] += *value++;
519               }
520             }
521           } else {
522             for (ii=0; ii<bs; ii++,value+=stepval) {
523               for (jj=ii; jj<bs2; jj+=bs) {
524                 baij_a[jj] = *value++;
525               }
526             }
527           }
528         } else {
529           value = v + j*(stepval+bs)*bs + i*bs;
530           if (addv == ADD_VALUES) {
531             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
532               for (jj=0; jj<bs; jj++) {
533                 baij_a[jj] += *value++;
534               }
535             }
536           } else {
537             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
538               for (jj=0; jj<bs; jj++) {
539                 baij_a[jj] = *value++;
540               }
541             }
542           }
543         }
544       }
545     } else {
546       if (!baij->donotstash) {
547         if (roworiented) {
548           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
549         } else {
550           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
551         }
552       }
553     }
554   }
555 #if defined(PETSC_USE_DEBUG)
556   baij->ht_total_ct  = total_ct;
557   baij->ht_insert_ct = insert_ct;
558 #endif
559   PetscFunctionReturn(0);
560 }
561 
562 #undef __FUNCT__
563 #define __FUNCT__ "MatGetValues_MPIBAIJ"
564 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
565 {
566   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
567   PetscErrorCode ierr;
568   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
569   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
570 
571   PetscFunctionBegin;
572   for (i=0; i<m; i++) {
573     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
574     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
575     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
576       row = idxm[i] - bsrstart;
577       for (j=0; j<n; j++) {
578         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
579         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
580         if (idxn[j] >= bscstart && idxn[j] < bscend) {
581           col  = idxn[j] - bscstart;
582           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
583         } else {
584           if (!baij->colmap) {
585             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
586           }
587 #if defined(PETSC_USE_CTABLE)
588           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
589           data--;
590 #else
591           data = baij->colmap[idxn[j]/bs]-1;
592 #endif
593           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
594           else {
595             col  = data + idxn[j]%bs;
596             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
597           }
598         }
599       }
600     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
601   }
602   PetscFunctionReturn(0);
603 }
604 
605 #undef __FUNCT__
606 #define __FUNCT__ "MatNorm_MPIBAIJ"
607 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
608 {
609   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
610   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
611   PetscErrorCode ierr;
612   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
613   PetscReal      sum = 0.0;
614   MatScalar      *v;
615 
616   PetscFunctionBegin;
617   if (baij->size == 1) {
618     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
619   } else {
620     if (type == NORM_FROBENIUS) {
621       v  = amat->a;
622       nz = amat->nz*bs2;
623       for (i=0; i<nz; i++) {
624         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
625       }
626       v  = bmat->a;
627       nz = bmat->nz*bs2;
628       for (i=0; i<nz; i++) {
629         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
630       }
631       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
632       *nrm = PetscSqrtReal(*nrm);
633     } else if (type == NORM_1) { /* max column sum */
634       PetscReal *tmp,*tmp2;
635       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
636       ierr = PetscMalloc2(mat->cmap->N,&tmp,mat->cmap->N,&tmp2);CHKERRQ(ierr);
637       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
638       v    = amat->a; jj = amat->j;
639       for (i=0; i<amat->nz; i++) {
640         for (j=0; j<bs; j++) {
641           col = bs*(cstart + *jj) + j; /* column index */
642           for (row=0; row<bs; row++) {
643             tmp[col] += PetscAbsScalar(*v);  v++;
644           }
645         }
646         jj++;
647       }
648       v = bmat->a; jj = bmat->j;
649       for (i=0; i<bmat->nz; i++) {
650         for (j=0; j<bs; j++) {
651           col = bs*garray[*jj] + j;
652           for (row=0; row<bs; row++) {
653             tmp[col] += PetscAbsScalar(*v); v++;
654           }
655         }
656         jj++;
657       }
658       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
659       *nrm = 0.0;
660       for (j=0; j<mat->cmap->N; j++) {
661         if (tmp2[j] > *nrm) *nrm = tmp2[j];
662       }
663       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
664     } else if (type == NORM_INFINITY) { /* max row sum */
665       PetscReal *sums;
666       ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr);
667       sum  = 0.0;
668       for (j=0; j<amat->mbs; j++) {
669         for (row=0; row<bs; row++) sums[row] = 0.0;
670         v  = amat->a + bs2*amat->i[j];
671         nz = amat->i[j+1]-amat->i[j];
672         for (i=0; i<nz; i++) {
673           for (col=0; col<bs; col++) {
674             for (row=0; row<bs; row++) {
675               sums[row] += PetscAbsScalar(*v); v++;
676             }
677           }
678         }
679         v  = bmat->a + bs2*bmat->i[j];
680         nz = bmat->i[j+1]-bmat->i[j];
681         for (i=0; i<nz; i++) {
682           for (col=0; col<bs; col++) {
683             for (row=0; row<bs; row++) {
684               sums[row] += PetscAbsScalar(*v); v++;
685             }
686           }
687         }
688         for (row=0; row<bs; row++) {
689           if (sums[row] > sum) sum = sums[row];
690         }
691       }
692       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
693       ierr = PetscFree(sums);CHKERRQ(ierr);
694     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
695   }
696   PetscFunctionReturn(0);
697 }
698 
699 /*
700   Creates the hash table, and sets the table
701   This table is created only once.
702   If new entried need to be added to the matrix
703   then the hash table has to be destroyed and
704   recreated.
705 */
706 #undef __FUNCT__
707 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
708 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
709 {
710   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
711   Mat            A     = baij->A,B=baij->B;
712   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
713   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
714   PetscErrorCode ierr;
715   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
716   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
717   PetscInt       *HT,key;
718   MatScalar      **HD;
719   PetscReal      tmp;
720 #if defined(PETSC_USE_INFO)
721   PetscInt ct=0,max=0;
722 #endif
723 
724   PetscFunctionBegin;
725   if (baij->ht) PetscFunctionReturn(0);
726 
727   baij->ht_size = (PetscInt)(factor*nz);
728   ht_size       = baij->ht_size;
729 
730   /* Allocate Memory for Hash Table */
731   ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr);
732   HD   = baij->hd;
733   HT   = baij->ht;
734 
735   /* Loop Over A */
736   for (i=0; i<a->mbs; i++) {
737     for (j=ai[i]; j<ai[i+1]; j++) {
738       row = i+rstart;
739       col = aj[j]+cstart;
740 
741       key = row*Nbs + col + 1;
742       h1  = HASH(ht_size,key,tmp);
743       for (k=0; k<ht_size; k++) {
744         if (!HT[(h1+k)%ht_size]) {
745           HT[(h1+k)%ht_size] = key;
746           HD[(h1+k)%ht_size] = a->a + j*bs2;
747           break;
748 #if defined(PETSC_USE_INFO)
749         } else {
750           ct++;
751 #endif
752         }
753       }
754 #if defined(PETSC_USE_INFO)
755       if (k> max) max = k;
756 #endif
757     }
758   }
759   /* Loop Over B */
760   for (i=0; i<b->mbs; i++) {
761     for (j=bi[i]; j<bi[i+1]; j++) {
762       row = i+rstart;
763       col = garray[bj[j]];
764       key = row*Nbs + col + 1;
765       h1  = HASH(ht_size,key,tmp);
766       for (k=0; k<ht_size; k++) {
767         if (!HT[(h1+k)%ht_size]) {
768           HT[(h1+k)%ht_size] = key;
769           HD[(h1+k)%ht_size] = b->a + j*bs2;
770           break;
771 #if defined(PETSC_USE_INFO)
772         } else {
773           ct++;
774 #endif
775         }
776       }
777 #if defined(PETSC_USE_INFO)
778       if (k> max) max = k;
779 #endif
780     }
781   }
782 
783   /* Print Summary */
784 #if defined(PETSC_USE_INFO)
785   for (i=0,j=0; i<ht_size; i++) {
786     if (HT[i]) j++;
787   }
788   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
789 #endif
790   PetscFunctionReturn(0);
791 }
792 
793 #undef __FUNCT__
794 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
795 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
796 {
797   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
798   PetscErrorCode ierr;
799   PetscInt       nstash,reallocs;
800   InsertMode     addv;
801 
802   PetscFunctionBegin;
803   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
804 
805   /* make sure all processors are either in INSERTMODE or ADDMODE */
806   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
807   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
808   mat->insertmode = addv; /* in case this processor had no cache */
809 
810   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
811   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
812   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
813   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
814   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
815   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
816   PetscFunctionReturn(0);
817 }
818 
819 #undef __FUNCT__
820 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
821 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
822 {
823   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
824   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
825   PetscErrorCode ierr;
826   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
827   PetscInt       *row,*col;
828   PetscBool      r1,r2,r3,other_disassembled;
829   MatScalar      *val;
830   InsertMode     addv = mat->insertmode;
831   PetscMPIInt    n;
832 
833   PetscFunctionBegin;
834   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
835   if (!baij->donotstash && !mat->nooffprocentries) {
836     while (1) {
837       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
838       if (!flg) break;
839 
840       for (i=0; i<n;) {
841         /* Now identify the consecutive vals belonging to the same row */
842         for (j=i,rstart=row[j]; j<n; j++) {
843           if (row[j] != rstart) break;
844         }
845         if (j < n) ncols = j-i;
846         else       ncols = n-i;
847         /* Now assemble all these values with a single function call */
848         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
849         i    = j;
850       }
851     }
852     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
853     /* Now process the block-stash. Since the values are stashed column-oriented,
854        set the roworiented flag to column oriented, and after MatSetValues()
855        restore the original flags */
856     r1 = baij->roworiented;
857     r2 = a->roworiented;
858     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
859 
860     baij->roworiented = PETSC_FALSE;
861     a->roworiented    = PETSC_FALSE;
862 
863     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
864     while (1) {
865       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
866       if (!flg) break;
867 
868       for (i=0; i<n;) {
869         /* Now identify the consecutive vals belonging to the same row */
870         for (j=i,rstart=row[j]; j<n; j++) {
871           if (row[j] != rstart) break;
872         }
873         if (j < n) ncols = j-i;
874         else       ncols = n-i;
875         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
876         i    = j;
877       }
878     }
879     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
880 
881     baij->roworiented = r1;
882     a->roworiented    = r2;
883 
884     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
885   }
886 
887   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
888   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
889 
890   /* determine if any processor has disassembled, if so we must
891      also disassemble ourselfs, in order that we may reassemble. */
892   /*
893      if nonzero structure of submatrix B cannot change then we know that
894      no processor disassembled thus we can skip this stuff
895   */
896   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
897     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
898     if (mat->was_assembled && !other_disassembled) {
899       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
900     }
901   }
902 
903   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
904     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
905   }
906   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
907   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
908 
909 #if defined(PETSC_USE_INFO)
910   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
911     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
912 
913     baij->ht_total_ct  = 0;
914     baij->ht_insert_ct = 0;
915   }
916 #endif
917   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
918     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
919 
920     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
921     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
922   }
923 
924   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
925 
926   baij->rowvalues = 0;
927 
928   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
929   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
930     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
931     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
932   }
933   PetscFunctionReturn(0);
934 }
935 
936 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer);
937 #include <petscdraw.h>
938 #undef __FUNCT__
939 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
940 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
941 {
942   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
943   PetscErrorCode    ierr;
944   PetscMPIInt       rank = baij->rank;
945   PetscInt          bs   = mat->rmap->bs;
946   PetscBool         iascii,isdraw;
947   PetscViewer       sviewer;
948   PetscViewerFormat format;
949 
950   PetscFunctionBegin;
951   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
952   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
953   if (iascii) {
954     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
955     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
956       MatInfo info;
957       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
958       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
959       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
960       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
961                                                 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
962       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
963       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
964       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
965       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
966       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
967       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
968       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
969       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
970       PetscFunctionReturn(0);
971     } else if (format == PETSC_VIEWER_ASCII_INFO) {
972       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
973       PetscFunctionReturn(0);
974     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
975       PetscFunctionReturn(0);
976     }
977   }
978 
979   if (isdraw) {
980     PetscDraw draw;
981     PetscBool isnull;
982     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
983     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
984   }
985 
986   {
987     /* assemble the entire matrix onto first processor. */
988     Mat         A;
989     Mat_SeqBAIJ *Aloc;
990     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
991     MatScalar   *a;
992 
993     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
994     /* Perhaps this should be the type of mat? */
995     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
996     if (!rank) {
997       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
998     } else {
999       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1000     }
1001     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1002     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
1003     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1004     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1005 
1006     /* copy over the A part */
1007     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1008     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1009     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1010 
1011     for (i=0; i<mbs; i++) {
1012       rvals[0] = bs*(baij->rstartbs + i);
1013       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1014       for (j=ai[i]; j<ai[i+1]; j++) {
1015         col = (baij->cstartbs+aj[j])*bs;
1016         for (k=0; k<bs; k++) {
1017           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1018           col++; a += bs;
1019         }
1020       }
1021     }
1022     /* copy over the B part */
1023     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1024     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1025     for (i=0; i<mbs; i++) {
1026       rvals[0] = bs*(baij->rstartbs + i);
1027       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1028       for (j=ai[i]; j<ai[i+1]; j++) {
1029         col = baij->garray[aj[j]]*bs;
1030         for (k=0; k<bs; k++) {
1031           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1032           col++; a += bs;
1033         }
1034       }
1035     }
1036     ierr = PetscFree(rvals);CHKERRQ(ierr);
1037     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1038     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1039     /*
1040        Everyone has to call to draw the matrix since the graphics waits are
1041        synchronized across all processors that share the PetscDraw object
1042     */
1043     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1044     if (!rank) {
1045       ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1046     }
1047     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1048     ierr = MatDestroy(&A);CHKERRQ(ierr);
1049   }
1050   PetscFunctionReturn(0);
1051 }
1052 
1053 #undef __FUNCT__
1054 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1055 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1056 {
1057   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1058   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)a->A->data;
1059   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
1060   PetscErrorCode ierr;
1061   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1062   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1063   int            fd;
1064   PetscScalar    *column_values;
1065   FILE           *file;
1066   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1067   PetscInt       message_count,flowcontrolcount;
1068 
1069   PetscFunctionBegin;
1070   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1071   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1072   nz   = bs2*(A->nz + B->nz);
1073   rlen = mat->rmap->n;
1074   if (!rank) {
1075     header[0] = MAT_FILE_CLASSID;
1076     header[1] = mat->rmap->N;
1077     header[2] = mat->cmap->N;
1078 
1079     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1080     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1081     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1082     /* get largest number of rows any processor has */
1083     range = mat->rmap->range;
1084     for (i=1; i<size; i++) {
1085       rlen = PetscMax(rlen,range[i+1] - range[i]);
1086     }
1087   } else {
1088     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1089   }
1090 
1091   ierr = PetscMalloc1((rlen/bs),&crow_lens);CHKERRQ(ierr);
1092   /* compute lengths of each row  */
1093   for (i=0; i<a->mbs; i++) {
1094     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1095   }
1096   /* store the row lengths to the file */
1097   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1098   if (!rank) {
1099     MPI_Status status;
1100     ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr);
1101     rlen = (range[1] - range[0])/bs;
1102     for (i=0; i<rlen; i++) {
1103       for (j=0; j<bs; j++) {
1104         row_lens[i*bs+j] = bs*crow_lens[i];
1105       }
1106     }
1107     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1108     for (i=1; i<size; i++) {
1109       rlen = (range[i+1] - range[i])/bs;
1110       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1111       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1112       for (k=0; k<rlen; k++) {
1113         for (j=0; j<bs; j++) {
1114           row_lens[k*bs+j] = bs*crow_lens[k];
1115         }
1116       }
1117       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1118     }
1119     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1120     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1121   } else {
1122     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1123     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1124     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1125   }
1126   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1127 
1128   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1129      information needed to make it for each row from a block row. This does require more communication but still not more than
1130      the communication needed for the nonzero values  */
1131   nzmax = nz; /*  space a largest processor needs */
1132   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1133   ierr  = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr);
1134   cnt   = 0;
1135   for (i=0; i<a->mbs; i++) {
1136     pcnt = cnt;
1137     for (j=B->i[i]; j<B->i[i+1]; j++) {
1138       if ((col = garray[B->j[j]]) > cstart) break;
1139       for (l=0; l<bs; l++) {
1140         column_indices[cnt++] = bs*col+l;
1141       }
1142     }
1143     for (k=A->i[i]; k<A->i[i+1]; k++) {
1144       for (l=0; l<bs; l++) {
1145         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1146       }
1147     }
1148     for (; j<B->i[i+1]; j++) {
1149       for (l=0; l<bs; l++) {
1150         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1151       }
1152     }
1153     len = cnt - pcnt;
1154     for (k=1; k<bs; k++) {
1155       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1156       cnt += len;
1157     }
1158   }
1159   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1160 
1161   /* store the columns to the file */
1162   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1163   if (!rank) {
1164     MPI_Status status;
1165     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1166     for (i=1; i<size; i++) {
1167       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1168       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1169       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1170       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1171     }
1172     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1173   } else {
1174     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1175     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1176     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1177     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1178   }
1179   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1180 
1181   /* load up the numerical values */
1182   ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr);
1183   cnt  = 0;
1184   for (i=0; i<a->mbs; i++) {
1185     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1186     for (j=B->i[i]; j<B->i[i+1]; j++) {
1187       if (garray[B->j[j]] > cstart) break;
1188       for (l=0; l<bs; l++) {
1189         for (ll=0; ll<bs; ll++) {
1190           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1191         }
1192       }
1193       cnt += bs;
1194     }
1195     for (k=A->i[i]; k<A->i[i+1]; k++) {
1196       for (l=0; l<bs; l++) {
1197         for (ll=0; ll<bs; ll++) {
1198           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1199         }
1200       }
1201       cnt += bs;
1202     }
1203     for (; j<B->i[i+1]; j++) {
1204       for (l=0; l<bs; l++) {
1205         for (ll=0; ll<bs; ll++) {
1206           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1207         }
1208       }
1209       cnt += bs;
1210     }
1211     cnt += (bs-1)*rlen;
1212   }
1213   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1214 
1215   /* store the column values to the file */
1216   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1217   if (!rank) {
1218     MPI_Status status;
1219     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1220     for (i=1; i<size; i++) {
1221       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1222       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1223       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1224       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1225     }
1226     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1227   } else {
1228     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1229     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1230     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1231     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1232   }
1233   ierr = PetscFree(column_values);CHKERRQ(ierr);
1234 
1235   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1236   if (file) {
1237     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1238   }
1239   PetscFunctionReturn(0);
1240 }
1241 
1242 #undef __FUNCT__
1243 #define __FUNCT__ "MatView_MPIBAIJ"
1244 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1245 {
1246   PetscErrorCode ierr;
1247   PetscBool      iascii,isdraw,issocket,isbinary;
1248 
1249   PetscFunctionBegin;
1250   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1251   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1252   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1253   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1254   if (iascii || isdraw || issocket) {
1255     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1256   } else if (isbinary) {
1257     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1258   }
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 #undef __FUNCT__
1263 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1264 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1265 {
1266   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1267   PetscErrorCode ierr;
1268 
1269   PetscFunctionBegin;
1270 #if defined(PETSC_USE_LOG)
1271   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1272 #endif
1273   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1274   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1275   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1276   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1277 #if defined(PETSC_USE_CTABLE)
1278   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1279 #else
1280   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1281 #endif
1282   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1283   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1284   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1285   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1286   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1287   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1288   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1289   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1290 
1291   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1292   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1293   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1294   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1295   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1296   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr);
1301   PetscFunctionReturn(0);
1302 }
1303 
1304 #undef __FUNCT__
1305 #define __FUNCT__ "MatMult_MPIBAIJ"
1306 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1307 {
1308   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1309   PetscErrorCode ierr;
1310   PetscInt       nt;
1311 
1312   PetscFunctionBegin;
1313   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1314   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1315   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1316   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1317   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1318   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1319   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1320   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1321   PetscFunctionReturn(0);
1322 }
1323 
1324 #undef __FUNCT__
1325 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1326 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1327 {
1328   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1329   PetscErrorCode ierr;
1330 
1331   PetscFunctionBegin;
1332   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1333   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1334   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1335   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1336   PetscFunctionReturn(0);
1337 }
1338 
1339 #undef __FUNCT__
1340 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1341 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1342 {
1343   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1344   PetscErrorCode ierr;
1345   PetscBool      merged;
1346 
1347   PetscFunctionBegin;
1348   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1349   /* do nondiagonal part */
1350   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1351   if (!merged) {
1352     /* send it on its way */
1353     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1354     /* do local part */
1355     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1356     /* receive remote parts: note this assumes the values are not actually */
1357     /* inserted in yy until the next line */
1358     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1359   } else {
1360     /* do local part */
1361     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1362     /* send it on its way */
1363     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1364     /* values actually were received in the Begin() but we need to call this nop */
1365     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1366   }
1367   PetscFunctionReturn(0);
1368 }
1369 
1370 #undef __FUNCT__
1371 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1372 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1373 {
1374   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1375   PetscErrorCode ierr;
1376 
1377   PetscFunctionBegin;
1378   /* do nondiagonal part */
1379   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1380   /* send it on its way */
1381   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1382   /* do local part */
1383   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1384   /* receive remote parts: note this assumes the values are not actually */
1385   /* inserted in yy until the next line, which is true for my implementation*/
1386   /* but is not perhaps always true. */
1387   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1388   PetscFunctionReturn(0);
1389 }
1390 
1391 /*
1392   This only works correctly for square matrices where the subblock A->A is the
1393    diagonal block
1394 */
1395 #undef __FUNCT__
1396 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1397 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1398 {
1399   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1400   PetscErrorCode ierr;
1401 
1402   PetscFunctionBegin;
1403   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1404   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1405   PetscFunctionReturn(0);
1406 }
1407 
1408 #undef __FUNCT__
1409 #define __FUNCT__ "MatScale_MPIBAIJ"
1410 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1411 {
1412   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1413   PetscErrorCode ierr;
1414 
1415   PetscFunctionBegin;
1416   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1417   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1418   PetscFunctionReturn(0);
1419 }
1420 
1421 #undef __FUNCT__
1422 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1423 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1424 {
1425   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1426   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1427   PetscErrorCode ierr;
1428   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1429   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1430   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1431 
1432   PetscFunctionBegin;
1433   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1434   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1435   mat->getrowactive = PETSC_TRUE;
1436 
1437   if (!mat->rowvalues && (idx || v)) {
1438     /*
1439         allocate enough space to hold information from the longest row.
1440     */
1441     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1442     PetscInt    max = 1,mbs = mat->mbs,tmp;
1443     for (i=0; i<mbs; i++) {
1444       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1445       if (max < tmp) max = tmp;
1446     }
1447     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1448   }
1449   lrow = row - brstart;
1450 
1451   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1452   if (!v)   {pvA = 0; pvB = 0;}
1453   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1454   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1455   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1456   nztot = nzA + nzB;
1457 
1458   cmap = mat->garray;
1459   if (v  || idx) {
1460     if (nztot) {
1461       /* Sort by increasing column numbers, assuming A and B already sorted */
1462       PetscInt imark = -1;
1463       if (v) {
1464         *v = v_p = mat->rowvalues;
1465         for (i=0; i<nzB; i++) {
1466           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1467           else break;
1468         }
1469         imark = i;
1470         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1471         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1472       }
1473       if (idx) {
1474         *idx = idx_p = mat->rowindices;
1475         if (imark > -1) {
1476           for (i=0; i<imark; i++) {
1477             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1478           }
1479         } else {
1480           for (i=0; i<nzB; i++) {
1481             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1482             else break;
1483           }
1484           imark = i;
1485         }
1486         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1487         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1488       }
1489     } else {
1490       if (idx) *idx = 0;
1491       if (v)   *v   = 0;
1492     }
1493   }
1494   *nz  = nztot;
1495   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1496   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1497   PetscFunctionReturn(0);
1498 }
1499 
1500 #undef __FUNCT__
1501 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1502 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1503 {
1504   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1505 
1506   PetscFunctionBegin;
1507   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1508   baij->getrowactive = PETSC_FALSE;
1509   PetscFunctionReturn(0);
1510 }
1511 
1512 #undef __FUNCT__
1513 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1514 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1515 {
1516   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1517   PetscErrorCode ierr;
1518 
1519   PetscFunctionBegin;
1520   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1521   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1522   PetscFunctionReturn(0);
1523 }
1524 
1525 #undef __FUNCT__
1526 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1527 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1528 {
1529   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1530   Mat            A  = a->A,B = a->B;
1531   PetscErrorCode ierr;
1532   PetscReal      isend[5],irecv[5];
1533 
1534   PetscFunctionBegin;
1535   info->block_size = (PetscReal)matin->rmap->bs;
1536 
1537   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1538 
1539   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1540   isend[3] = info->memory;  isend[4] = info->mallocs;
1541 
1542   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1543 
1544   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1545   isend[3] += info->memory;  isend[4] += info->mallocs;
1546 
1547   if (flag == MAT_LOCAL) {
1548     info->nz_used      = isend[0];
1549     info->nz_allocated = isend[1];
1550     info->nz_unneeded  = isend[2];
1551     info->memory       = isend[3];
1552     info->mallocs      = isend[4];
1553   } else if (flag == MAT_GLOBAL_MAX) {
1554     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1555 
1556     info->nz_used      = irecv[0];
1557     info->nz_allocated = irecv[1];
1558     info->nz_unneeded  = irecv[2];
1559     info->memory       = irecv[3];
1560     info->mallocs      = irecv[4];
1561   } else if (flag == MAT_GLOBAL_SUM) {
1562     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1563 
1564     info->nz_used      = irecv[0];
1565     info->nz_allocated = irecv[1];
1566     info->nz_unneeded  = irecv[2];
1567     info->memory       = irecv[3];
1568     info->mallocs      = irecv[4];
1569   } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1570   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1571   info->fill_ratio_needed = 0;
1572   info->factor_mallocs    = 0;
1573   PetscFunctionReturn(0);
1574 }
1575 
1576 #undef __FUNCT__
1577 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1578 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1579 {
1580   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1581   PetscErrorCode ierr;
1582 
1583   PetscFunctionBegin;
1584   switch (op) {
1585   case MAT_NEW_NONZERO_LOCATIONS:
1586   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1587   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1588   case MAT_KEEP_NONZERO_PATTERN:
1589   case MAT_NEW_NONZERO_LOCATION_ERR:
1590     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1591     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1592     break;
1593   case MAT_ROW_ORIENTED:
1594     a->roworiented = flg;
1595 
1596     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1597     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1598     break;
1599   case MAT_NEW_DIAGONALS:
1600     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1601     break;
1602   case MAT_IGNORE_OFF_PROC_ENTRIES:
1603     a->donotstash = flg;
1604     break;
1605   case MAT_USE_HASH_TABLE:
1606     a->ht_flag = flg;
1607     break;
1608   case MAT_SYMMETRIC:
1609   case MAT_STRUCTURALLY_SYMMETRIC:
1610   case MAT_HERMITIAN:
1611   case MAT_SYMMETRY_ETERNAL:
1612     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1613     break;
1614   default:
1615     SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1616   }
1617   PetscFunctionReturn(0);
1618 }
1619 
1620 #undef __FUNCT__
1621 #define __FUNCT__ "MatTranspose_MPIBAIJ"
1622 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1623 {
1624   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1625   Mat_SeqBAIJ    *Aloc;
1626   Mat            B;
1627   PetscErrorCode ierr;
1628   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1629   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1630   MatScalar      *a;
1631 
1632   PetscFunctionBegin;
1633   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1634   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1635     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1636     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1637     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1638     /* Do not know preallocation information, but must set block size */
1639     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr);
1640   } else {
1641     B = *matout;
1642   }
1643 
1644   /* copy over the A part */
1645   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1646   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1647   ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1648 
1649   for (i=0; i<mbs; i++) {
1650     rvals[0] = bs*(baij->rstartbs + i);
1651     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1652     for (j=ai[i]; j<ai[i+1]; j++) {
1653       col = (baij->cstartbs+aj[j])*bs;
1654       for (k=0; k<bs; k++) {
1655         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1656 
1657         col++; a += bs;
1658       }
1659     }
1660   }
1661   /* copy over the B part */
1662   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1663   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1664   for (i=0; i<mbs; i++) {
1665     rvals[0] = bs*(baij->rstartbs + i);
1666     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1667     for (j=ai[i]; j<ai[i+1]; j++) {
1668       col = baij->garray[aj[j]]*bs;
1669       for (k=0; k<bs; k++) {
1670         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1671         col++;
1672         a += bs;
1673       }
1674     }
1675   }
1676   ierr = PetscFree(rvals);CHKERRQ(ierr);
1677   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1678   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1679 
1680   if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B;
1681   else {
1682     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1683   }
1684   PetscFunctionReturn(0);
1685 }
1686 
1687 #undef __FUNCT__
1688 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1689 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1690 {
1691   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1692   Mat            a     = baij->A,b = baij->B;
1693   PetscErrorCode ierr;
1694   PetscInt       s1,s2,s3;
1695 
1696   PetscFunctionBegin;
1697   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1698   if (rr) {
1699     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1700     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1701     /* Overlap communication with computation. */
1702     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1703   }
1704   if (ll) {
1705     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1706     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1707     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1708   }
1709   /* scale  the diagonal block */
1710   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1711 
1712   if (rr) {
1713     /* Do a scatter end and then right scale the off-diagonal block */
1714     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1715     ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1716   }
1717   PetscFunctionReturn(0);
1718 }
1719 
1720 #undef __FUNCT__
1721 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1722 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1723 {
1724   Mat_MPIBAIJ   *l      = (Mat_MPIBAIJ *) A->data;
1725   PetscInt      *owners = A->rmap->range;
1726   PetscInt       n      = A->rmap->n;
1727   PetscMPIInt    size   = l->size;
1728   PetscSF        sf;
1729   PetscInt      *lrows;
1730   PetscSFNode   *rrows;
1731   PetscInt       lastidx = -1, r, p = 0, len = 0;
1732   PetscErrorCode ierr;
1733 
1734   PetscFunctionBegin;
1735   /* Create SF where leaves are input rows and roots are owned rows */
1736   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1737   for (r = 0; r < n; ++r) lrows[r] = -1;
1738   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1739   for (r = 0; r < N; ++r) {
1740     const PetscInt idx   = rows[r];
1741     PetscBool      found = PETSC_FALSE;
1742     /* Trick for efficient searching for sorted rows */
1743     if (lastidx > idx) p = 0;
1744     lastidx = idx;
1745     for (; p < size; ++p) {
1746       if (idx >= owners[p] && idx < owners[p+1]) {
1747         rrows[r].rank  = p;
1748         rrows[r].index = rows[r] - owners[p];
1749         found = PETSC_TRUE;
1750         break;
1751       }
1752     }
1753     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
1754   }
1755   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1756   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1757   /* Collect flags for rows to be zeroed */
1758   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1759   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1760   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1761   /* Compress and put in row numbers */
1762   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1763   /* fix right hand side if needed */
1764   if (x && b) {
1765     const PetscScalar *xx;
1766     PetscScalar       *bb;
1767 
1768     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1769     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1770     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
1771     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1772     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1773   }
1774 
1775   /* actually zap the local rows */
1776   /*
1777         Zero the required rows. If the "diagonal block" of the matrix
1778      is square and the user wishes to set the diagonal we use separate
1779      code so that MatSetValues() is not called for each diagonal allocating
1780      new memory, thus calling lots of mallocs and slowing things down.
1781 
1782   */
1783   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1784   ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,0,0);CHKERRQ(ierr);
1785   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1786     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,0,0);CHKERRQ(ierr);
1787   } else if (diag != 0.0) {
1788     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr);
1789     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1790        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1791     for (r = 0; r < len; ++r) {
1792       const PetscInt row = lrows[r] + A->rmap->rstart;
1793       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1794     }
1795     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1796     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1797   } else {
1798     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr);
1799   }
1800   ierr = PetscFree(lrows);CHKERRQ(ierr);
1801 
1802   /* only change matrix nonzero state if pattern was allowed to be changed */
1803   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1804     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1805     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1806   }
1807   PetscFunctionReturn(0);
1808 }
1809 
1810 #undef __FUNCT__
1811 #define __FUNCT__ "MatZeroRowsColumns_MPIBAIJ"
1812 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1813 {
1814   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1815   PetscErrorCode    ierr;
1816   PetscMPIInt       size = l->size,n = A->rmap->n,lastidx = -1;
1817   PetscInt          i,j,k,r,p = 0,len = 0,row,col,count;
1818   PetscInt          *lrows,*owners = A->rmap->range;
1819   PetscSFNode       *rrows;
1820   PetscSF           sf;
1821   const PetscScalar *xx;
1822   PetscScalar       *bb,*mask;
1823   Vec               xmask,lmask;
1824   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ*)l->B->data;
1825   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1826   PetscScalar       *aa;
1827 #if defined(PETSC_DEBUG)
1828   PetscBool found = PETSC_FALSE;
1829 #endif
1830 
1831   PetscFunctionBegin;
1832   /* Create SF where leaves are input rows and roots are owned rows */
1833   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1834   for (r = 0; r < n; ++r) lrows[r] = -1;
1835   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1836   for (r = 0; r < N; ++r) {
1837     const PetscInt idx   = rows[r];
1838     PetscBool      found = PETSC_FALSE;
1839     /* Trick for efficient searching for sorted rows */
1840     if (lastidx > idx) p = 0;
1841     lastidx = idx;
1842     for (; p < size; ++p) {
1843       if (idx >= owners[p] && idx < owners[p+1]) {
1844         rrows[r].rank  = p;
1845         rrows[r].index = rows[r] - owners[p];
1846         found = PETSC_TRUE;
1847         break;
1848       }
1849     }
1850     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
1851   }
1852   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1853   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1854   /* Collect flags for rows to be zeroed */
1855   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1856   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1857   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1858   /* Compress and put in row numbers */
1859   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1860   /* zero diagonal part of matrix */
1861   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1862   /* handle off diagonal part of matrix */
1863   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
1864   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1865   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1866   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1867   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1868   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1869   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1870   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1871   if (x) {
1872     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1873     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1874     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1875     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1876   }
1877   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1878   /* remove zeroed rows of off diagonal matrix */
1879   for (i = 0; i < len; ++i) {
1880     row   = lrows[i];
1881     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1882     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1883     for (k = 0; k < count; ++k) {
1884       aa[0] = 0.0;
1885       aa   += bs;
1886     }
1887   }
1888   /* loop over all elements of off process part of matrix zeroing removed columns*/
1889   for (i = 0; i < l->B->rmap->N; ++i) {
1890     row = i/bs;
1891     for (j = baij->i[row]; j < baij->i[row+1]; ++j) {
1892       for (k = 0; k < bs; ++k) {
1893         col = bs*baij->j[j] + k;
1894         if (PetscAbsScalar(mask[col])) {
1895           aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
1896           if (b) bb[i] -= aa[0]*xx[col];
1897           aa[0] = 0.0;
1898         }
1899       }
1900     }
1901   }
1902   if (x) {
1903     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1904     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1905   }
1906   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1907   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1908   ierr = PetscFree(lrows);CHKERRQ(ierr);
1909 
1910   /* only change matrix nonzero state if pattern was allowed to be changed */
1911   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1912     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1913     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1914   }
1915   PetscFunctionReturn(0);
1916 }
1917 
1918 #undef __FUNCT__
1919 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1920 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1921 {
1922   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1923   PetscErrorCode ierr;
1924 
1925   PetscFunctionBegin;
1926   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1927   PetscFunctionReturn(0);
1928 }
1929 
1930 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
1931 
1932 #undef __FUNCT__
1933 #define __FUNCT__ "MatEqual_MPIBAIJ"
1934 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1935 {
1936   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1937   Mat            a,b,c,d;
1938   PetscBool      flg;
1939   PetscErrorCode ierr;
1940 
1941   PetscFunctionBegin;
1942   a = matA->A; b = matA->B;
1943   c = matB->A; d = matB->B;
1944 
1945   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1946   if (flg) {
1947     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1948   }
1949   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1950   PetscFunctionReturn(0);
1951 }
1952 
1953 #undef __FUNCT__
1954 #define __FUNCT__ "MatCopy_MPIBAIJ"
1955 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1956 {
1957   PetscErrorCode ierr;
1958   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1959   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
1960 
1961   PetscFunctionBegin;
1962   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1963   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1964     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1965   } else {
1966     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1967     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1968   }
1969   PetscFunctionReturn(0);
1970 }
1971 
1972 #undef __FUNCT__
1973 #define __FUNCT__ "MatSetUp_MPIBAIJ"
1974 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1975 {
1976   PetscErrorCode ierr;
1977 
1978   PetscFunctionBegin;
1979   ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1980   PetscFunctionReturn(0);
1981 }
1982 
1983 #undef __FUNCT__
1984 #define __FUNCT__ "MatAXPYGetPreallocation_MPIBAIJ"
1985 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
1986 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
1987 {
1988   PetscInt        i,bs=Y->rmap->bs,m=Y->rmap->N/bs;
1989   Mat_SeqBAIJ     *x  = (Mat_SeqBAIJ*)X->data;
1990   Mat_SeqBAIJ     *y  = (Mat_SeqBAIJ*)Y->data;
1991   const PetscInt  *xi = x->i,*yi = y->i;
1992 
1993   PetscFunctionBegin;
1994   /* Set the number of nonzeros in the new matrix */
1995   for (i=0; i<m; i++) {
1996     PetscInt       j,k,nzx = xi[i+1] - xi[i],nzy = yi[i+1] - yi[i];
1997     const PetscInt *xj = x->j+xi[i],*yj = y->j+yi[i];
1998     nnz[i] = 0;
1999     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2000       for (; k<nzy && yltog[yj[k]]<xltog[xj[j]]; k++) nnz[i]++; /* Catch up to X */
2001       if (k<nzy && yltog[yj[k]]==xltog[xj[j]]) k++;             /* Skip duplicate */
2002       nnz[i]++;
2003     }
2004     for (; k<nzy; k++) nnz[i]++;
2005   }
2006   PetscFunctionReturn(0);
2007 }
2008 
2009 #undef __FUNCT__
2010 #define __FUNCT__ "MatAXPY_MPIBAIJ"
2011 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2012 {
2013   PetscErrorCode ierr;
2014   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
2015   PetscBLASInt   bnz,one=1;
2016   Mat_SeqBAIJ    *x,*y;
2017 
2018   PetscFunctionBegin;
2019   if (str == SAME_NONZERO_PATTERN) {
2020     PetscScalar alpha = a;
2021     x    = (Mat_SeqBAIJ*)xx->A->data;
2022     y    = (Mat_SeqBAIJ*)yy->A->data;
2023     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2024     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2025     x    = (Mat_SeqBAIJ*)xx->B->data;
2026     y    = (Mat_SeqBAIJ*)yy->B->data;
2027     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2028     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2029     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2030   } else {
2031     Mat      B;
2032     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
2033     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2034     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2035     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2036     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2037     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2038     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2039     ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr);
2040     ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2041     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2042     ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2043     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
2044     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2045     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2046     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2047     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2048   }
2049   PetscFunctionReturn(0);
2050 }
2051 
2052 #undef __FUNCT__
2053 #define __FUNCT__ "MatRealPart_MPIBAIJ"
2054 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
2055 {
2056   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2057   PetscErrorCode ierr;
2058 
2059   PetscFunctionBegin;
2060   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2061   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2062   PetscFunctionReturn(0);
2063 }
2064 
2065 #undef __FUNCT__
2066 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
2067 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2068 {
2069   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2070   PetscErrorCode ierr;
2071 
2072   PetscFunctionBegin;
2073   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2074   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2075   PetscFunctionReturn(0);
2076 }
2077 
2078 #undef __FUNCT__
2079 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
2080 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2081 {
2082   PetscErrorCode ierr;
2083   IS             iscol_local;
2084   PetscInt       csize;
2085 
2086   PetscFunctionBegin;
2087   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2088   if (call == MAT_REUSE_MATRIX) {
2089     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2090     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2091   } else {
2092     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2093   }
2094   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2095   if (call == MAT_INITIAL_MATRIX) {
2096     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2097     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2098   }
2099   PetscFunctionReturn(0);
2100 }
2101 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*);
2102 #undef __FUNCT__
2103 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
2104 /*
2105   Not great since it makes two copies of the submatrix, first an SeqBAIJ
2106   in local and then by concatenating the local matrices the end result.
2107   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
2108 */
2109 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2110 {
2111   PetscErrorCode ierr;
2112   PetscMPIInt    rank,size;
2113   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2114   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow;
2115   Mat            M,Mreuse;
2116   MatScalar      *vwork,*aa;
2117   MPI_Comm       comm;
2118   IS             isrow_new, iscol_new;
2119   PetscBool      idflag,allrows, allcols;
2120   Mat_SeqBAIJ    *aij;
2121 
2122   PetscFunctionBegin;
2123   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2124   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2125   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2126   /* The compression and expansion should be avoided. Doesn't point
2127      out errors, might change the indices, hence buggey */
2128   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
2129   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
2130 
2131   /* Check for special case: each processor gets entire matrix columns */
2132   ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr);
2133   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
2134   if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE;
2135   else allcols = PETSC_FALSE;
2136 
2137   ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr);
2138   ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr);
2139   if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE;
2140   else allrows = PETSC_FALSE;
2141 
2142   if (call ==  MAT_REUSE_MATRIX) {
2143     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
2144     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2145     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2146   } else {
2147     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2148   }
2149   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
2150   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
2151   /*
2152       m - number of local rows
2153       n - number of columns (same on all processors)
2154       rstart - first row in new global matrix generated
2155   */
2156   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2157   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2158   m    = m/bs;
2159   n    = n/bs;
2160 
2161   if (call == MAT_INITIAL_MATRIX) {
2162     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2163     ii  = aij->i;
2164     jj  = aij->j;
2165 
2166     /*
2167         Determine the number of non-zeros in the diagonal and off-diagonal
2168         portions of the matrix in order to do correct preallocation
2169     */
2170 
2171     /* first get start and end of "diagonal" columns */
2172     if (csize == PETSC_DECIDE) {
2173       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2174       if (mglobal == n*bs) { /* square matrix */
2175         nlocal = m;
2176       } else {
2177         nlocal = n/size + ((n % size) > rank);
2178       }
2179     } else {
2180       nlocal = csize/bs;
2181     }
2182     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2183     rstart = rend - nlocal;
2184     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2185 
2186     /* next, compute all the lengths */
2187     ierr  = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr);
2188     for (i=0; i<m; i++) {
2189       jend = ii[i+1] - ii[i];
2190       olen = 0;
2191       dlen = 0;
2192       for (j=0; j<jend; j++) {
2193         if (*jj < rstart || *jj >= rend) olen++;
2194         else dlen++;
2195         jj++;
2196       }
2197       olens[i] = olen;
2198       dlens[i] = dlen;
2199     }
2200     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2201     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2202     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2203     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2204     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
2205   } else {
2206     PetscInt ml,nl;
2207 
2208     M    = *newmat;
2209     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2210     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2211     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2212     /*
2213          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2214        rather than the slower MatSetValues().
2215     */
2216     M->was_assembled = PETSC_TRUE;
2217     M->assembled     = PETSC_FALSE;
2218   }
2219   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2220   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2221   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
2222   ii   = aij->i;
2223   jj   = aij->j;
2224   aa   = aij->a;
2225   for (i=0; i<m; i++) {
2226     row   = rstart/bs + i;
2227     nz    = ii[i+1] - ii[i];
2228     cwork = jj;     jj += nz;
2229     vwork = aa;     aa += nz*bs*bs;
2230     ierr  = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2231   }
2232 
2233   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2234   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2235   *newmat = M;
2236 
2237   /* save submatrix used in processor for next request */
2238   if (call ==  MAT_INITIAL_MATRIX) {
2239     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2240     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2241   }
2242   PetscFunctionReturn(0);
2243 }
2244 
2245 #undef __FUNCT__
2246 #define __FUNCT__ "MatPermute_MPIBAIJ"
2247 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2248 {
2249   MPI_Comm       comm,pcomm;
2250   PetscInt       clocal_size,nrows;
2251   const PetscInt *rows;
2252   PetscMPIInt    size;
2253   IS             crowp,lcolp;
2254   PetscErrorCode ierr;
2255 
2256   PetscFunctionBegin;
2257   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2258   /* make a collective version of 'rowp' */
2259   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2260   if (pcomm==comm) {
2261     crowp = rowp;
2262   } else {
2263     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2264     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2265     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2266     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2267   }
2268   ierr = ISSetPermutation(crowp);CHKERRQ(ierr);
2269   /* make a local version of 'colp' */
2270   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2271   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2272   if (size==1) {
2273     lcolp = colp;
2274   } else {
2275     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2276   }
2277   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2278   /* now we just get the submatrix */
2279   ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr);
2280   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2281   /* clean up */
2282   if (pcomm!=comm) {
2283     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2284   }
2285   if (size>1) {
2286     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2287   }
2288   PetscFunctionReturn(0);
2289 }
2290 
2291 #undef __FUNCT__
2292 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2293 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2294 {
2295   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2296   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
2297 
2298   PetscFunctionBegin;
2299   if (nghosts) *nghosts = B->nbs;
2300   if (ghosts) *ghosts = baij->garray;
2301   PetscFunctionReturn(0);
2302 }
2303 
2304 #undef __FUNCT__
2305 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ"
2306 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2307 {
2308   Mat            B;
2309   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2310   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2311   Mat_SeqAIJ     *b;
2312   PetscErrorCode ierr;
2313   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2314   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2315   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2316 
2317   PetscFunctionBegin;
2318   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2319   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2320 
2321   /* ----------------------------------------------------------------
2322      Tell every processor the number of nonzeros per row
2323   */
2324   ierr = PetscMalloc1((A->rmap->N/bs),&lens);CHKERRQ(ierr);
2325   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2326     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2327   }
2328   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2329   ierr      = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr);
2330   displs    = recvcounts + size;
2331   for (i=0; i<size; i++) {
2332     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2333     displs[i]     = A->rmap->range[i]/bs;
2334   }
2335 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2336   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2337 #else
2338   ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2339 #endif
2340   /* ---------------------------------------------------------------
2341      Create the sequential matrix of the same type as the local block diagonal
2342   */
2343   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2344   ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2345   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2346   ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2347   b    = (Mat_SeqAIJ*)B->data;
2348 
2349   /*--------------------------------------------------------------------
2350     Copy my part of matrix column indices over
2351   */
2352   sendcount  = ad->nz + bd->nz;
2353   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2354   a_jsendbuf = ad->j;
2355   b_jsendbuf = bd->j;
2356   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2357   cnt        = 0;
2358   for (i=0; i<n; i++) {
2359 
2360     /* put in lower diagonal portion */
2361     m = bd->i[i+1] - bd->i[i];
2362     while (m > 0) {
2363       /* is it above diagonal (in bd (compressed) numbering) */
2364       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2365       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2366       m--;
2367     }
2368 
2369     /* put in diagonal portion */
2370     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2371       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2372     }
2373 
2374     /* put in upper diagonal portion */
2375     while (m-- > 0) {
2376       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2377     }
2378   }
2379   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2380 
2381   /*--------------------------------------------------------------------
2382     Gather all column indices to all processors
2383   */
2384   for (i=0; i<size; i++) {
2385     recvcounts[i] = 0;
2386     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2387       recvcounts[i] += lens[j];
2388     }
2389   }
2390   displs[0] = 0;
2391   for (i=1; i<size; i++) {
2392     displs[i] = displs[i-1] + recvcounts[i-1];
2393   }
2394 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2395   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2396 #else
2397   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2398 #endif
2399   /*--------------------------------------------------------------------
2400     Assemble the matrix into useable form (note numerical values not yet set)
2401   */
2402   /* set the b->ilen (length of each row) values */
2403   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2404   /* set the b->i indices */
2405   b->i[0] = 0;
2406   for (i=1; i<=A->rmap->N/bs; i++) {
2407     b->i[i] = b->i[i-1] + lens[i-1];
2408   }
2409   ierr = PetscFree(lens);CHKERRQ(ierr);
2410   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2411   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2412   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2413 
2414   if (A->symmetric) {
2415     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2416   } else if (A->hermitian) {
2417     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2418   } else if (A->structurally_symmetric) {
2419     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2420   }
2421   *newmat = B;
2422   PetscFunctionReturn(0);
2423 }
2424 
2425 #undef __FUNCT__
2426 #define __FUNCT__ "MatSOR_MPIBAIJ"
2427 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2428 {
2429   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2430   PetscErrorCode ierr;
2431   Vec            bb1 = 0;
2432 
2433   PetscFunctionBegin;
2434   if (flag == SOR_APPLY_UPPER) {
2435     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2436     PetscFunctionReturn(0);
2437   }
2438 
2439   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2440     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2441   }
2442 
2443   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2444     if (flag & SOR_ZERO_INITIAL_GUESS) {
2445       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2446       its--;
2447     }
2448 
2449     while (its--) {
2450       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2451       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2452 
2453       /* update rhs: bb1 = bb - B*x */
2454       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2455       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2456 
2457       /* local sweep */
2458       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2459     }
2460   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2461     if (flag & SOR_ZERO_INITIAL_GUESS) {
2462       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2463       its--;
2464     }
2465     while (its--) {
2466       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2467       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2468 
2469       /* update rhs: bb1 = bb - B*x */
2470       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2471       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2472 
2473       /* local sweep */
2474       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2475     }
2476   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2477     if (flag & SOR_ZERO_INITIAL_GUESS) {
2478       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2479       its--;
2480     }
2481     while (its--) {
2482       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2483       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2484 
2485       /* update rhs: bb1 = bb - B*x */
2486       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2487       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2488 
2489       /* local sweep */
2490       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2491     }
2492   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2493 
2494   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2495   PetscFunctionReturn(0);
2496 }
2497 
2498 #undef __FUNCT__
2499 #define __FUNCT__ "MatGetColumnNorms_MPIBAIJ"
2500 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms)
2501 {
2502   PetscErrorCode ierr;
2503   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)A->data;
2504   PetscInt       N,i,*garray = aij->garray;
2505   PetscInt       ib,jb,bs = A->rmap->bs;
2506   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) aij->A->data;
2507   MatScalar      *a_val = a_aij->a;
2508   Mat_SeqBAIJ    *b_aij = (Mat_SeqBAIJ*) aij->B->data;
2509   MatScalar      *b_val = b_aij->a;
2510   PetscReal      *work;
2511 
2512   PetscFunctionBegin;
2513   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
2514   ierr = PetscCalloc1(N,&work);CHKERRQ(ierr);
2515   if (type == NORM_2) {
2516     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2517       for (jb=0; jb<bs; jb++) {
2518         for (ib=0; ib<bs; ib++) {
2519           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2520           a_val++;
2521         }
2522       }
2523     }
2524     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2525       for (jb=0; jb<bs; jb++) {
2526         for (ib=0; ib<bs; ib++) {
2527           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2528           b_val++;
2529         }
2530       }
2531     }
2532   } else if (type == NORM_1) {
2533     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2534       for (jb=0; jb<bs; jb++) {
2535         for (ib=0; ib<bs; ib++) {
2536           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2537           a_val++;
2538         }
2539       }
2540     }
2541     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2542       for (jb=0; jb<bs; jb++) {
2543        for (ib=0; ib<bs; ib++) {
2544           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2545           b_val++;
2546         }
2547       }
2548     }
2549   } else if (type == NORM_INFINITY) {
2550     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2551       for (jb=0; jb<bs; jb++) {
2552         for (ib=0; ib<bs; ib++) {
2553           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2554           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2555           a_val++;
2556         }
2557       }
2558     }
2559     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2560       for (jb=0; jb<bs; jb++) {
2561         for (ib=0; ib<bs; ib++) {
2562           int col = garray[b_aij->j[i]] * bs + jb;
2563           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2564           b_val++;
2565         }
2566       }
2567     }
2568   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
2569   if (type == NORM_INFINITY) {
2570     ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2571   } else {
2572     ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2573   }
2574   ierr = PetscFree(work);CHKERRQ(ierr);
2575   if (type == NORM_2) {
2576     for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]);
2577   }
2578   PetscFunctionReturn(0);
2579 }
2580 
2581 #undef __FUNCT__
2582 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ"
2583 PetscErrorCode  MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2584 {
2585   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2586   PetscErrorCode ierr;
2587 
2588   PetscFunctionBegin;
2589   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2590   PetscFunctionReturn(0);
2591 }
2592 
2593 
2594 /* -------------------------------------------------------------------*/
2595 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2596                                        MatGetRow_MPIBAIJ,
2597                                        MatRestoreRow_MPIBAIJ,
2598                                        MatMult_MPIBAIJ,
2599                                 /* 4*/ MatMultAdd_MPIBAIJ,
2600                                        MatMultTranspose_MPIBAIJ,
2601                                        MatMultTransposeAdd_MPIBAIJ,
2602                                        0,
2603                                        0,
2604                                        0,
2605                                 /*10*/ 0,
2606                                        0,
2607                                        0,
2608                                        MatSOR_MPIBAIJ,
2609                                        MatTranspose_MPIBAIJ,
2610                                 /*15*/ MatGetInfo_MPIBAIJ,
2611                                        MatEqual_MPIBAIJ,
2612                                        MatGetDiagonal_MPIBAIJ,
2613                                        MatDiagonalScale_MPIBAIJ,
2614                                        MatNorm_MPIBAIJ,
2615                                 /*20*/ MatAssemblyBegin_MPIBAIJ,
2616                                        MatAssemblyEnd_MPIBAIJ,
2617                                        MatSetOption_MPIBAIJ,
2618                                        MatZeroEntries_MPIBAIJ,
2619                                 /*24*/ MatZeroRows_MPIBAIJ,
2620                                        0,
2621                                        0,
2622                                        0,
2623                                        0,
2624                                 /*29*/ MatSetUp_MPIBAIJ,
2625                                        0,
2626                                        0,
2627                                        0,
2628                                        0,
2629                                 /*34*/ MatDuplicate_MPIBAIJ,
2630                                        0,
2631                                        0,
2632                                        0,
2633                                        0,
2634                                 /*39*/ MatAXPY_MPIBAIJ,
2635                                        MatGetSubMatrices_MPIBAIJ,
2636                                        MatIncreaseOverlap_MPIBAIJ,
2637                                        MatGetValues_MPIBAIJ,
2638                                        MatCopy_MPIBAIJ,
2639                                 /*44*/ 0,
2640                                        MatScale_MPIBAIJ,
2641                                        0,
2642                                        0,
2643                                        MatZeroRowsColumns_MPIBAIJ,
2644                                 /*49*/ 0,
2645                                        0,
2646                                        0,
2647                                        0,
2648                                        0,
2649                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2650                                        0,
2651                                        MatSetUnfactored_MPIBAIJ,
2652                                        MatPermute_MPIBAIJ,
2653                                        MatSetValuesBlocked_MPIBAIJ,
2654                                 /*59*/ MatGetSubMatrix_MPIBAIJ,
2655                                        MatDestroy_MPIBAIJ,
2656                                        MatView_MPIBAIJ,
2657                                        0,
2658                                        0,
2659                                 /*64*/ 0,
2660                                        0,
2661                                        0,
2662                                        0,
2663                                        0,
2664                                 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2665                                        0,
2666                                        0,
2667                                        0,
2668                                        0,
2669                                 /*74*/ 0,
2670                                        MatFDColoringApply_BAIJ,
2671                                        0,
2672                                        0,
2673                                        0,
2674                                 /*79*/ 0,
2675                                        0,
2676                                        0,
2677                                        0,
2678                                        MatLoad_MPIBAIJ,
2679                                 /*84*/ 0,
2680                                        0,
2681                                        0,
2682                                        0,
2683                                        0,
2684                                 /*89*/ 0,
2685                                        0,
2686                                        0,
2687                                        0,
2688                                        0,
2689                                 /*94*/ 0,
2690                                        0,
2691                                        0,
2692                                        0,
2693                                        0,
2694                                 /*99*/ 0,
2695                                        0,
2696                                        0,
2697                                        0,
2698                                        0,
2699                                 /*104*/0,
2700                                        MatRealPart_MPIBAIJ,
2701                                        MatImaginaryPart_MPIBAIJ,
2702                                        0,
2703                                        0,
2704                                 /*109*/0,
2705                                        0,
2706                                        0,
2707                                        0,
2708                                        0,
2709                                 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2710                                        0,
2711                                        MatGetGhosts_MPIBAIJ,
2712                                        0,
2713                                        0,
2714                                 /*119*/0,
2715                                        0,
2716                                        0,
2717                                        0,
2718                                        MatGetMultiProcBlock_MPIBAIJ,
2719                                 /*124*/0,
2720                                        MatGetColumnNorms_MPIBAIJ,
2721                                        MatInvertBlockDiagonal_MPIBAIJ,
2722                                        0,
2723                                        0,
2724                                /*129*/ 0,
2725                                        0,
2726                                        0,
2727                                        0,
2728                                        0,
2729                                /*134*/ 0,
2730                                        0,
2731                                        0,
2732                                        0,
2733                                        0,
2734                                /*139*/ 0,
2735                                        0,
2736                                        0,
2737                                        MatFDColoringSetUp_MPIXAIJ
2738 };
2739 
2740 #undef __FUNCT__
2741 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2742 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2743 {
2744   PetscFunctionBegin;
2745   *a = ((Mat_MPIBAIJ*)A->data)->A;
2746   PetscFunctionReturn(0);
2747 }
2748 
2749 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2750 
2751 #undef __FUNCT__
2752 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2753 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2754 {
2755   PetscInt       m,rstart,cstart,cend;
2756   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2757   const PetscInt *JJ    =0;
2758   PetscScalar    *values=0;
2759   PetscBool      roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2760   PetscErrorCode ierr;
2761 
2762   PetscFunctionBegin;
2763   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2764   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2765   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2766   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2767   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2768   m      = B->rmap->n/bs;
2769   rstart = B->rmap->rstart/bs;
2770   cstart = B->cmap->rstart/bs;
2771   cend   = B->cmap->rend/bs;
2772 
2773   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2774   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2775   for (i=0; i<m; i++) {
2776     nz = ii[i+1] - ii[i];
2777     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2778     nz_max = PetscMax(nz_max,nz);
2779     JJ     = jj + ii[i];
2780     for (j=0; j<nz; j++) {
2781       if (*JJ >= cstart) break;
2782       JJ++;
2783     }
2784     d = 0;
2785     for (; j<nz; j++) {
2786       if (*JJ++ >= cend) break;
2787       d++;
2788     }
2789     d_nnz[i] = d;
2790     o_nnz[i] = nz - d;
2791   }
2792   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2793   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2794 
2795   values = (PetscScalar*)V;
2796   if (!values) {
2797     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2798     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2799   }
2800   for (i=0; i<m; i++) {
2801     PetscInt          row    = i + rstart;
2802     PetscInt          ncols  = ii[i+1] - ii[i];
2803     const PetscInt    *icols = jj + ii[i];
2804     if (!roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2805       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2806       ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2807     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2808       PetscInt j;
2809       for (j=0; j<ncols; j++) {
2810         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2811         ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2812       }
2813     }
2814   }
2815 
2816   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2817   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2818   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2819   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2820   PetscFunctionReturn(0);
2821 }
2822 
2823 #undef __FUNCT__
2824 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2825 /*@C
2826    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2827    (the default parallel PETSc format).
2828 
2829    Collective on MPI_Comm
2830 
2831    Input Parameters:
2832 +  B - the matrix
2833 .  bs - the block size
2834 .  i - the indices into j for the start of each local row (starts with zero)
2835 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2836 -  v - optional values in the matrix
2837 
2838    Level: developer
2839 
2840    Notes: The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
2841    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
2842    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2843    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2844    block column and the second index is over columns within a block.
2845 
2846 .keywords: matrix, aij, compressed row, sparse, parallel
2847 
2848 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ
2849 @*/
2850 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2851 {
2852   PetscErrorCode ierr;
2853 
2854   PetscFunctionBegin;
2855   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2856   PetscValidType(B,1);
2857   PetscValidLogicalCollectiveInt(B,bs,2);
2858   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2859   PetscFunctionReturn(0);
2860 }
2861 
2862 #undef __FUNCT__
2863 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2864 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2865 {
2866   Mat_MPIBAIJ    *b;
2867   PetscErrorCode ierr;
2868   PetscInt       i;
2869 
2870   PetscFunctionBegin;
2871   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2872   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2873   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2874   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2875 
2876   if (d_nnz) {
2877     for (i=0; i<B->rmap->n/bs; i++) {
2878       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2879     }
2880   }
2881   if (o_nnz) {
2882     for (i=0; i<B->rmap->n/bs; i++) {
2883       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2884     }
2885   }
2886 
2887   b      = (Mat_MPIBAIJ*)B->data;
2888   b->bs2 = bs*bs;
2889   b->mbs = B->rmap->n/bs;
2890   b->nbs = B->cmap->n/bs;
2891   b->Mbs = B->rmap->N/bs;
2892   b->Nbs = B->cmap->N/bs;
2893 
2894   for (i=0; i<=b->size; i++) {
2895     b->rangebs[i] = B->rmap->range[i]/bs;
2896   }
2897   b->rstartbs = B->rmap->rstart/bs;
2898   b->rendbs   = B->rmap->rend/bs;
2899   b->cstartbs = B->cmap->rstart/bs;
2900   b->cendbs   = B->cmap->rend/bs;
2901 
2902   if (!B->preallocated) {
2903     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2904     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2905     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2906     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2907     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2908     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2909     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2910     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2911     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2912   }
2913 
2914   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2915   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2916   B->preallocated = PETSC_TRUE;
2917   PetscFunctionReturn(0);
2918 }
2919 
2920 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2921 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2922 
2923 #undef __FUNCT__
2924 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
2925 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
2926 {
2927   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2928   PetscErrorCode ierr;
2929   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2930   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2931   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2932 
2933   PetscFunctionBegin;
2934   ierr  = PetscMalloc1((M+1),&ii);CHKERRQ(ierr);
2935   ii[0] = 0;
2936   for (i=0; i<M; i++) {
2937     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2938     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
2939     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2940     /* remove one from count of matrix has diagonal */
2941     for (j=id[i]; j<id[i+1]; j++) {
2942       if (jd[j] == i) {ii[i+1]--;break;}
2943     }
2944   }
2945   ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr);
2946   cnt  = 0;
2947   for (i=0; i<M; i++) {
2948     for (j=io[i]; j<io[i+1]; j++) {
2949       if (garray[jo[j]] > rstart) break;
2950       jj[cnt++] = garray[jo[j]];
2951     }
2952     for (k=id[i]; k<id[i+1]; k++) {
2953       if (jd[k] != i) {
2954         jj[cnt++] = rstart + jd[k];
2955       }
2956     }
2957     for (; j<io[i+1]; j++) {
2958       jj[cnt++] = garray[jo[j]];
2959     }
2960   }
2961   ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr);
2962   PetscFunctionReturn(0);
2963 }
2964 
2965 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2966 
2967 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
2968 
2969 #undef __FUNCT__
2970 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ"
2971 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
2972 {
2973   PetscErrorCode ierr;
2974   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2975   Mat            B;
2976   Mat_MPIAIJ     *b;
2977 
2978   PetscFunctionBegin;
2979   if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
2980 
2981   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2982   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2983   ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2984   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
2985   ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
2986   b    = (Mat_MPIAIJ*) B->data;
2987 
2988   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
2989   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2990   ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
2991   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
2992   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
2993   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2994   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2995   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2996   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2997   if (reuse == MAT_REUSE_MATRIX) {
2998     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
2999   } else {
3000    *newmat = B;
3001   }
3002   PetscFunctionReturn(0);
3003 }
3004 
3005 #if defined(PETSC_HAVE_MUMPS)
3006 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3007 #endif
3008 
3009 /*MC
3010    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
3011 
3012    Options Database Keys:
3013 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
3014 . -mat_block_size <bs> - set the blocksize used to store the matrix
3015 - -mat_use_hash_table <fact>
3016 
3017   Level: beginner
3018 
3019 .seealso: MatCreateMPIBAIJ
3020 M*/
3021 
3022 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
3023 
3024 #undef __FUNCT__
3025 #define __FUNCT__ "MatCreate_MPIBAIJ"
3026 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
3027 {
3028   Mat_MPIBAIJ    *b;
3029   PetscErrorCode ierr;
3030   PetscBool      flg;
3031 
3032   PetscFunctionBegin;
3033   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3034   B->data = (void*)b;
3035 
3036   ierr         = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3037   B->assembled = PETSC_FALSE;
3038 
3039   B->insertmode = NOT_SET_VALUES;
3040   ierr          = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
3041   ierr          = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
3042 
3043   /* build local table of row and column ownerships */
3044   ierr = PetscMalloc1((b->size+1),&b->rangebs);CHKERRQ(ierr);
3045 
3046   /* build cache for off array entries formed */
3047   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
3048 
3049   b->donotstash  = PETSC_FALSE;
3050   b->colmap      = NULL;
3051   b->garray      = NULL;
3052   b->roworiented = PETSC_TRUE;
3053 
3054   /* stuff used in block assembly */
3055   b->barray = 0;
3056 
3057   /* stuff used for matrix vector multiply */
3058   b->lvec  = 0;
3059   b->Mvctx = 0;
3060 
3061   /* stuff for MatGetRow() */
3062   b->rowindices   = 0;
3063   b->rowvalues    = 0;
3064   b->getrowactive = PETSC_FALSE;
3065 
3066   /* hash table stuff */
3067   b->ht           = 0;
3068   b->hd           = 0;
3069   b->ht_size      = 0;
3070   b->ht_flag      = PETSC_FALSE;
3071   b->ht_fact      = 0;
3072   b->ht_total_ct  = 0;
3073   b->ht_insert_ct = 0;
3074 
3075   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
3076   b->ijonly = PETSC_FALSE;
3077 
3078   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3079   ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr);
3080   if (flg) {
3081     PetscReal fact = 1.39;
3082     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3083     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
3084     if (fact <= 1.0) fact = 1.39;
3085     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3086     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3087   }
3088   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3089 
3090 #if defined(PETSC_HAVE_MUMPS)
3091   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_baij_mumps);CHKERRQ(ierr);
3092 #endif
3093   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3094   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3095   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3096   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3097   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3098   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3099   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3100   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3101   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3102   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3103   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr);
3104   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3105   PetscFunctionReturn(0);
3106 }
3107 
3108 /*MC
3109    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3110 
3111    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3112    and MATMPIBAIJ otherwise.
3113 
3114    Options Database Keys:
3115 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3116 
3117   Level: beginner
3118 
3119 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3120 M*/
3121 
3122 #undef __FUNCT__
3123 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3124 /*@C
3125    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3126    (block compressed row).  For good matrix assembly performance
3127    the user should preallocate the matrix storage by setting the parameters
3128    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3129    performance can be increased by more than a factor of 50.
3130 
3131    Collective on Mat
3132 
3133    Input Parameters:
3134 +  B - the matrix
3135 .  bs   - size of block
3136 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3137            submatrix  (same for all local rows)
3138 .  d_nnz - array containing the number of block nonzeros in the various block rows
3139            of the in diagonal portion of the local (possibly different for each block
3140            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3141            set it even if it is zero.
3142 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3143            submatrix (same for all local rows).
3144 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3145            off-diagonal portion of the local submatrix (possibly different for
3146            each block row) or NULL.
3147 
3148    If the *_nnz parameter is given then the *_nz parameter is ignored
3149 
3150    Options Database Keys:
3151 +   -mat_block_size - size of the blocks to use
3152 -   -mat_use_hash_table <fact>
3153 
3154    Notes:
3155    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3156    than it must be used on all processors that share the object for that argument.
3157 
3158    Storage Information:
3159    For a square global matrix we define each processor's diagonal portion
3160    to be its local rows and the corresponding columns (a square submatrix);
3161    each processor's off-diagonal portion encompasses the remainder of the
3162    local matrix (a rectangular submatrix).
3163 
3164    The user can specify preallocated storage for the diagonal part of
3165    the local submatrix with either d_nz or d_nnz (not both).  Set
3166    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3167    memory allocation.  Likewise, specify preallocated storage for the
3168    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3169 
3170    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3171    the figure below we depict these three local rows and all columns (0-11).
3172 
3173 .vb
3174            0 1 2 3 4 5 6 7 8 9 10 11
3175           --------------------------
3176    row 3  |o o o d d d o o o o  o  o
3177    row 4  |o o o d d d o o o o  o  o
3178    row 5  |o o o d d d o o o o  o  o
3179           --------------------------
3180 .ve
3181 
3182    Thus, any entries in the d locations are stored in the d (diagonal)
3183    submatrix, and any entries in the o locations are stored in the
3184    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3185    stored simply in the MATSEQBAIJ format for compressed row storage.
3186 
3187    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3188    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3189    In general, for PDE problems in which most nonzeros are near the diagonal,
3190    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3191    or you will get TERRIBLE performance; see the users' manual chapter on
3192    matrices.
3193 
3194    You can call MatGetInfo() to get information on how effective the preallocation was;
3195    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3196    You can also run with the option -info and look for messages with the string
3197    malloc in them to see if additional memory allocation was needed.
3198 
3199    Level: intermediate
3200 
3201 .keywords: matrix, block, aij, compressed row, sparse, parallel
3202 
3203 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3204 @*/
3205 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3206 {
3207   PetscErrorCode ierr;
3208 
3209   PetscFunctionBegin;
3210   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3211   PetscValidType(B,1);
3212   PetscValidLogicalCollectiveInt(B,bs,2);
3213   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3214   PetscFunctionReturn(0);
3215 }
3216 
3217 #undef __FUNCT__
3218 #define __FUNCT__ "MatCreateBAIJ"
3219 /*@C
3220    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3221    (block compressed row).  For good matrix assembly performance
3222    the user should preallocate the matrix storage by setting the parameters
3223    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3224    performance can be increased by more than a factor of 50.
3225 
3226    Collective on MPI_Comm
3227 
3228    Input Parameters:
3229 +  comm - MPI communicator
3230 .  bs   - size of blockk
3231 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3232            This value should be the same as the local size used in creating the
3233            y vector for the matrix-vector product y = Ax.
3234 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3235            This value should be the same as the local size used in creating the
3236            x vector for the matrix-vector product y = Ax.
3237 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3238 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3239 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3240            submatrix  (same for all local rows)
3241 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3242            of the in diagonal portion of the local (possibly different for each block
3243            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3244            and set it even if it is zero.
3245 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3246            submatrix (same for all local rows).
3247 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3248            off-diagonal portion of the local submatrix (possibly different for
3249            each block row) or NULL.
3250 
3251    Output Parameter:
3252 .  A - the matrix
3253 
3254    Options Database Keys:
3255 +   -mat_block_size - size of the blocks to use
3256 -   -mat_use_hash_table <fact>
3257 
3258    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3259    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3260    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3261 
3262    Notes:
3263    If the *_nnz parameter is given then the *_nz parameter is ignored
3264 
3265    A nonzero block is any block that as 1 or more nonzeros in it
3266 
3267    The user MUST specify either the local or global matrix dimensions
3268    (possibly both).
3269 
3270    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3271    than it must be used on all processors that share the object for that argument.
3272 
3273    Storage Information:
3274    For a square global matrix we define each processor's diagonal portion
3275    to be its local rows and the corresponding columns (a square submatrix);
3276    each processor's off-diagonal portion encompasses the remainder of the
3277    local matrix (a rectangular submatrix).
3278 
3279    The user can specify preallocated storage for the diagonal part of
3280    the local submatrix with either d_nz or d_nnz (not both).  Set
3281    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3282    memory allocation.  Likewise, specify preallocated storage for the
3283    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3284 
3285    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3286    the figure below we depict these three local rows and all columns (0-11).
3287 
3288 .vb
3289            0 1 2 3 4 5 6 7 8 9 10 11
3290           --------------------------
3291    row 3  |o o o d d d o o o o  o  o
3292    row 4  |o o o d d d o o o o  o  o
3293    row 5  |o o o d d d o o o o  o  o
3294           --------------------------
3295 .ve
3296 
3297    Thus, any entries in the d locations are stored in the d (diagonal)
3298    submatrix, and any entries in the o locations are stored in the
3299    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3300    stored simply in the MATSEQBAIJ format for compressed row storage.
3301 
3302    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3303    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3304    In general, for PDE problems in which most nonzeros are near the diagonal,
3305    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3306    or you will get TERRIBLE performance; see the users' manual chapter on
3307    matrices.
3308 
3309    Level: intermediate
3310 
3311 .keywords: matrix, block, aij, compressed row, sparse, parallel
3312 
3313 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3314 @*/
3315 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3316 {
3317   PetscErrorCode ierr;
3318   PetscMPIInt    size;
3319 
3320   PetscFunctionBegin;
3321   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3322   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3323   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3324   if (size > 1) {
3325     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3326     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3327   } else {
3328     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3329     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3330   }
3331   PetscFunctionReturn(0);
3332 }
3333 
3334 #undef __FUNCT__
3335 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3336 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3337 {
3338   Mat            mat;
3339   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3340   PetscErrorCode ierr;
3341   PetscInt       len=0;
3342 
3343   PetscFunctionBegin;
3344   *newmat = 0;
3345   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3346   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3347   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3348   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3349 
3350   mat->factortype   = matin->factortype;
3351   mat->preallocated = PETSC_TRUE;
3352   mat->assembled    = PETSC_TRUE;
3353   mat->insertmode   = NOT_SET_VALUES;
3354 
3355   a             = (Mat_MPIBAIJ*)mat->data;
3356   mat->rmap->bs = matin->rmap->bs;
3357   a->bs2        = oldmat->bs2;
3358   a->mbs        = oldmat->mbs;
3359   a->nbs        = oldmat->nbs;
3360   a->Mbs        = oldmat->Mbs;
3361   a->Nbs        = oldmat->Nbs;
3362 
3363   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3364   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3365 
3366   a->size         = oldmat->size;
3367   a->rank         = oldmat->rank;
3368   a->donotstash   = oldmat->donotstash;
3369   a->roworiented  = oldmat->roworiented;
3370   a->rowindices   = 0;
3371   a->rowvalues    = 0;
3372   a->getrowactive = PETSC_FALSE;
3373   a->barray       = 0;
3374   a->rstartbs     = oldmat->rstartbs;
3375   a->rendbs       = oldmat->rendbs;
3376   a->cstartbs     = oldmat->cstartbs;
3377   a->cendbs       = oldmat->cendbs;
3378 
3379   /* hash table stuff */
3380   a->ht           = 0;
3381   a->hd           = 0;
3382   a->ht_size      = 0;
3383   a->ht_flag      = oldmat->ht_flag;
3384   a->ht_fact      = oldmat->ht_fact;
3385   a->ht_total_ct  = 0;
3386   a->ht_insert_ct = 0;
3387 
3388   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3389   if (oldmat->colmap) {
3390 #if defined(PETSC_USE_CTABLE)
3391     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3392 #else
3393     ierr = PetscMalloc1((a->Nbs),&a->colmap);CHKERRQ(ierr);
3394     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3395     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3396 #endif
3397   } else a->colmap = 0;
3398 
3399   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3400     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
3401     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3402     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3403   } else a->garray = 0;
3404 
3405   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3406   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3407   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3408   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3409   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3410 
3411   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3412   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3413   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3414   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3415   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3416   *newmat = mat;
3417   PetscFunctionReturn(0);
3418 }
3419 
3420 #undef __FUNCT__
3421 #define __FUNCT__ "MatLoad_MPIBAIJ"
3422 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3423 {
3424   PetscErrorCode ierr;
3425   int            fd;
3426   PetscInt       i,nz,j,rstart,rend;
3427   PetscScalar    *vals,*buf;
3428   MPI_Comm       comm;
3429   MPI_Status     status;
3430   PetscMPIInt    rank,size,maxnz;
3431   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3432   PetscInt       *locrowlens = NULL,*procsnz = NULL,*browners = NULL;
3433   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3434   PetscMPIInt    tag    = ((PetscObject)viewer)->tag;
3435   PetscInt       *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount;
3436   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3437 
3438   PetscFunctionBegin;
3439   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3440   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3441   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3442   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3443 
3444   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3445   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3446   if (!rank) {
3447     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3448     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3449     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3450   }
3451 
3452   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3453 
3454   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3455   M    = header[1]; N = header[2];
3456 
3457   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3458   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3459   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3460 
3461   /* If global sizes are set, check if they are consistent with that given in the file */
3462   if (sizesset) {
3463     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3464   }
3465   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3466   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3467 
3468   if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices");
3469 
3470   /*
3471      This code adds extra rows to make sure the number of rows is
3472      divisible by the blocksize
3473   */
3474   Mbs        = M/bs;
3475   extra_rows = bs - M + bs*Mbs;
3476   if (extra_rows == bs) extra_rows = 0;
3477   else                  Mbs++;
3478   if (extra_rows && !rank) {
3479     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3480   }
3481 
3482   /* determine ownership of all rows */
3483   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3484     mbs = Mbs/size + ((Mbs % size) > rank);
3485     m   = mbs*bs;
3486   } else { /* User set */
3487     m   = newmat->rmap->n;
3488     mbs = m/bs;
3489   }
3490   ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
3491   ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3492 
3493   /* process 0 needs enough room for process with most rows */
3494   if (!rank) {
3495     mmax = rowners[1];
3496     for (i=2; i<=size; i++) {
3497       mmax = PetscMax(mmax,rowners[i]);
3498     }
3499     mmax*=bs;
3500   } else mmax = -1;             /* unused, but compiler warns anyway */
3501 
3502   rowners[0] = 0;
3503   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
3504   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
3505   rstart = rowners[rank];
3506   rend   = rowners[rank+1];
3507 
3508   /* distribute row lengths to all processors */
3509   ierr = PetscMalloc1(m,&locrowlens);CHKERRQ(ierr);
3510   if (!rank) {
3511     mend = m;
3512     if (size == 1) mend = mend - extra_rows;
3513     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3514     for (j=mend; j<m; j++) locrowlens[j] = 1;
3515     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3516     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3517     for (j=0; j<m; j++) {
3518       procsnz[0] += locrowlens[j];
3519     }
3520     for (i=1; i<size; i++) {
3521       mend = browners[i+1] - browners[i];
3522       if (i == size-1) mend = mend - extra_rows;
3523       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3524       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3525       /* calculate the number of nonzeros on each processor */
3526       for (j=0; j<browners[i+1]-browners[i]; j++) {
3527         procsnz[i] += rowlengths[j];
3528       }
3529       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3530     }
3531     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3532   } else {
3533     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3534   }
3535 
3536   if (!rank) {
3537     /* determine max buffer needed and allocate it */
3538     maxnz = procsnz[0];
3539     for (i=1; i<size; i++) {
3540       maxnz = PetscMax(maxnz,procsnz[i]);
3541     }
3542     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3543 
3544     /* read in my part of the matrix column indices  */
3545     nz     = procsnz[0];
3546     ierr   = PetscMalloc1((nz+1),&ibuf);CHKERRQ(ierr);
3547     mycols = ibuf;
3548     if (size == 1) nz -= extra_rows;
3549     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3550     if (size == 1) {
3551       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
3552     }
3553 
3554     /* read in every ones (except the last) and ship off */
3555     for (i=1; i<size-1; i++) {
3556       nz   = procsnz[i];
3557       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3558       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3559     }
3560     /* read in the stuff for the last proc */
3561     if (size != 1) {
3562       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3563       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3564       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3565       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3566     }
3567     ierr = PetscFree(cols);CHKERRQ(ierr);
3568   } else {
3569     /* determine buffer space needed for message */
3570     nz = 0;
3571     for (i=0; i<m; i++) {
3572       nz += locrowlens[i];
3573     }
3574     ierr   = PetscMalloc1((nz+1),&ibuf);CHKERRQ(ierr);
3575     mycols = ibuf;
3576     /* receive message of column indices*/
3577     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3578     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3579     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3580   }
3581 
3582   /* loop over local rows, determining number of off diagonal entries */
3583   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
3584   ierr     = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
3585   rowcount = 0; nzcount = 0;
3586   for (i=0; i<mbs; i++) {
3587     dcount  = 0;
3588     odcount = 0;
3589     for (j=0; j<bs; j++) {
3590       kmax = locrowlens[rowcount];
3591       for (k=0; k<kmax; k++) {
3592         tmp = mycols[nzcount++]/bs;
3593         if (!mask[tmp]) {
3594           mask[tmp] = 1;
3595           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3596           else masked1[dcount++] = tmp;
3597         }
3598       }
3599       rowcount++;
3600     }
3601 
3602     dlens[i]  = dcount;
3603     odlens[i] = odcount;
3604 
3605     /* zero out the mask elements we set */
3606     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3607     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3608   }
3609 
3610 
3611   if (!sizesset) {
3612     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3613   }
3614   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3615 
3616   if (!rank) {
3617     ierr = PetscMalloc1((maxnz+1),&buf);CHKERRQ(ierr);
3618     /* read in my part of the matrix numerical values  */
3619     nz     = procsnz[0];
3620     vals   = buf;
3621     mycols = ibuf;
3622     if (size == 1) nz -= extra_rows;
3623     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3624     if (size == 1) {
3625       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
3626     }
3627 
3628     /* insert into matrix */
3629     jj = rstart*bs;
3630     for (i=0; i<m; i++) {
3631       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3632       mycols += locrowlens[i];
3633       vals   += locrowlens[i];
3634       jj++;
3635     }
3636     /* read in other processors (except the last one) and ship out */
3637     for (i=1; i<size-1; i++) {
3638       nz   = procsnz[i];
3639       vals = buf;
3640       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3641       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3642     }
3643     /* the last proc */
3644     if (size != 1) {
3645       nz   = procsnz[i] - extra_rows;
3646       vals = buf;
3647       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3648       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3649       ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3650     }
3651     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3652   } else {
3653     /* receive numeric values */
3654     ierr = PetscMalloc1((nz+1),&buf);CHKERRQ(ierr);
3655 
3656     /* receive message of values*/
3657     vals   = buf;
3658     mycols = ibuf;
3659     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3660 
3661     /* insert into matrix */
3662     jj = rstart*bs;
3663     for (i=0; i<m; i++) {
3664       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3665       mycols += locrowlens[i];
3666       vals   += locrowlens[i];
3667       jj++;
3668     }
3669   }
3670   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3671   ierr = PetscFree(buf);CHKERRQ(ierr);
3672   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3673   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3674   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3675   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3676   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3677   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3678   PetscFunctionReturn(0);
3679 }
3680 
3681 #undef __FUNCT__
3682 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3683 /*@
3684    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3685 
3686    Input Parameters:
3687 .  mat  - the matrix
3688 .  fact - factor
3689 
3690    Not Collective, each process can use a different factor
3691 
3692    Level: advanced
3693 
3694   Notes:
3695    This can also be set by the command line option: -mat_use_hash_table <fact>
3696 
3697 .keywords: matrix, hashtable, factor, HT
3698 
3699 .seealso: MatSetOption()
3700 @*/
3701 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3702 {
3703   PetscErrorCode ierr;
3704 
3705   PetscFunctionBegin;
3706   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3707   PetscFunctionReturn(0);
3708 }
3709 
3710 #undef __FUNCT__
3711 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3712 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3713 {
3714   Mat_MPIBAIJ *baij;
3715 
3716   PetscFunctionBegin;
3717   baij          = (Mat_MPIBAIJ*)mat->data;
3718   baij->ht_fact = fact;
3719   PetscFunctionReturn(0);
3720 }
3721 
3722 #undef __FUNCT__
3723 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3724 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3725 {
3726   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
3727 
3728   PetscFunctionBegin;
3729   if (Ad)     *Ad     = a->A;
3730   if (Ao)     *Ao     = a->B;
3731   if (colmap) *colmap = a->garray;
3732   PetscFunctionReturn(0);
3733 }
3734 
3735 /*
3736     Special version for direct calls from Fortran (to eliminate two function call overheads
3737 */
3738 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3739 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3740 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3741 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3742 #endif
3743 
3744 #undef __FUNCT__
3745 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3746 /*@C
3747   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3748 
3749   Collective on Mat
3750 
3751   Input Parameters:
3752 + mat - the matrix
3753 . min - number of input rows
3754 . im - input rows
3755 . nin - number of input columns
3756 . in - input columns
3757 . v - numerical values input
3758 - addvin - INSERT_VALUES or ADD_VALUES
3759 
3760   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3761 
3762   Level: advanced
3763 
3764 .seealso:   MatSetValuesBlocked()
3765 @*/
3766 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3767 {
3768   /* convert input arguments to C version */
3769   Mat        mat  = *matin;
3770   PetscInt   m    = *min, n = *nin;
3771   InsertMode addv = *addvin;
3772 
3773   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3774   const MatScalar *value;
3775   MatScalar       *barray     = baij->barray;
3776   PetscBool       roworiented = baij->roworiented;
3777   PetscErrorCode  ierr;
3778   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3779   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3780   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3781 
3782   PetscFunctionBegin;
3783   /* tasks normally handled by MatSetValuesBlocked() */
3784   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3785 #if defined(PETSC_USE_DEBUG)
3786   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3787   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3788 #endif
3789   if (mat->assembled) {
3790     mat->was_assembled = PETSC_TRUE;
3791     mat->assembled     = PETSC_FALSE;
3792   }
3793   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3794 
3795 
3796   if (!barray) {
3797     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
3798     baij->barray = barray;
3799   }
3800 
3801   if (roworiented) stepval = (n-1)*bs;
3802   else stepval = (m-1)*bs;
3803 
3804   for (i=0; i<m; i++) {
3805     if (im[i] < 0) continue;
3806 #if defined(PETSC_USE_DEBUG)
3807     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3808 #endif
3809     if (im[i] >= rstart && im[i] < rend) {
3810       row = im[i] - rstart;
3811       for (j=0; j<n; j++) {
3812         /* If NumCol = 1 then a copy is not required */
3813         if ((roworiented) && (n == 1)) {
3814           barray = (MatScalar*)v + i*bs2;
3815         } else if ((!roworiented) && (m == 1)) {
3816           barray = (MatScalar*)v + j*bs2;
3817         } else { /* Here a copy is required */
3818           if (roworiented) {
3819             value = v + i*(stepval+bs)*bs + j*bs;
3820           } else {
3821             value = v + j*(stepval+bs)*bs + i*bs;
3822           }
3823           for (ii=0; ii<bs; ii++,value+=stepval) {
3824             for (jj=0; jj<bs; jj++) {
3825               *barray++ = *value++;
3826             }
3827           }
3828           barray -=bs2;
3829         }
3830 
3831         if (in[j] >= cstart && in[j] < cend) {
3832           col  = in[j] - cstart;
3833           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3834         } else if (in[j] < 0) continue;
3835 #if defined(PETSC_USE_DEBUG)
3836         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3837 #endif
3838         else {
3839           if (mat->was_assembled) {
3840             if (!baij->colmap) {
3841               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3842             }
3843 
3844 #if defined(PETSC_USE_DEBUG)
3845 #if defined(PETSC_USE_CTABLE)
3846             { PetscInt data;
3847               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3848               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3849             }
3850 #else
3851             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3852 #endif
3853 #endif
3854 #if defined(PETSC_USE_CTABLE)
3855             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3856             col  = (col - 1)/bs;
3857 #else
3858             col = (baij->colmap[in[j]] - 1)/bs;
3859 #endif
3860             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3861               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3862               col  =  in[j];
3863             }
3864           } else col = in[j];
3865           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3866         }
3867       }
3868     } else {
3869       if (!baij->donotstash) {
3870         if (roworiented) {
3871           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3872         } else {
3873           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3874         }
3875       }
3876     }
3877   }
3878 
3879   /* task normally handled by MatSetValuesBlocked() */
3880   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3881   PetscFunctionReturn(0);
3882 }
3883 
3884 #undef __FUNCT__
3885 #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
3886 /*@
3887      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
3888          CSR format the local rows.
3889 
3890    Collective on MPI_Comm
3891 
3892    Input Parameters:
3893 +  comm - MPI communicator
3894 .  bs - the block size, only a block size of 1 is supported
3895 .  m - number of local rows (Cannot be PETSC_DECIDE)
3896 .  n - This value should be the same as the local size used in creating the
3897        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3898        calculated if N is given) For square matrices n is almost always m.
3899 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3900 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3901 .   i - row indices
3902 .   j - column indices
3903 -   a - matrix values
3904 
3905    Output Parameter:
3906 .   mat - the matrix
3907 
3908    Level: intermediate
3909 
3910    Notes:
3911        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3912      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3913      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3914 
3915      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3916      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3917      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3918      with column-major ordering within blocks.
3919 
3920        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3921 
3922 .keywords: matrix, aij, compressed row, sparse, parallel
3923 
3924 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3925           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3926 @*/
3927 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3928 {
3929   PetscErrorCode ierr;
3930 
3931   PetscFunctionBegin;
3932   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3933   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3934   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3935   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3936   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
3937   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3938   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3939   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3940   PetscFunctionReturn(0);
3941 }
3942