xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 69ea2d38414256fc1d7e7d8986091b8a5f5e4114)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 #include <petscblaslapack.h>
4 #include <petscsf.h>
5 
6 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
7 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat);
8 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
9 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
10 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
11 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
12 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec);
14 
15 #undef __FUNCT__
16 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
17 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
18 {
19   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
20   PetscErrorCode ierr;
21   PetscInt       i,*idxb = 0;
22   PetscScalar    *va,*vb;
23   Vec            vtmp;
24 
25   PetscFunctionBegin;
26   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
27   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
28   if (idx) {
29     for (i=0; i<A->rmap->n; i++) {
30       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
31     }
32   }
33 
34   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
35   if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);}
36   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
37   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
38 
39   for (i=0; i<A->rmap->n; i++) {
40     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
41       va[i] = vb[i];
42       if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);
43     }
44   }
45 
46   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
47   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
48   ierr = PetscFree(idxb);CHKERRQ(ierr);
49   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
50   PetscFunctionReturn(0);
51 }
52 
53 #undef __FUNCT__
54 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
55 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
56 {
57   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
58   PetscErrorCode ierr;
59 
60   PetscFunctionBegin;
61   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
62   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
63   PetscFunctionReturn(0);
64 }
65 
66 #undef __FUNCT__
67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
68 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
69 {
70   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
71   PetscErrorCode ierr;
72 
73   PetscFunctionBegin;
74   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
75   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
76   PetscFunctionReturn(0);
77 }
78 
79 /*
80      Local utility routine that creates a mapping from the global column
81    number to the local number in the off-diagonal part of the local
82    storage of the matrix.  This is done in a non scalable way since the
83    length of colmap equals the global matrix length.
84 */
85 #undef __FUNCT__
86 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private"
87 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
88 {
89   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
90   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
91   PetscErrorCode ierr;
92   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
93 
94   PetscFunctionBegin;
95 #if defined(PETSC_USE_CTABLE)
96   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
97   for (i=0; i<nbs; i++) {
98     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
99   }
100 #else
101   ierr = PetscMalloc1((baij->Nbs+1),&baij->colmap);CHKERRQ(ierr);
102   ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
103   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
104   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
105 #endif
106   PetscFunctionReturn(0);
107 }
108 
109 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
110   { \
111  \
112     brow = row/bs;  \
113     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
114     rmax = aimax[brow]; nrow = ailen[brow]; \
115     bcol = col/bs; \
116     ridx = row % bs; cidx = col % bs; \
117     low  = 0; high = nrow; \
118     while (high-low > 3) { \
119       t = (low+high)/2; \
120       if (rp[t] > bcol) high = t; \
121       else              low  = t; \
122     } \
123     for (_i=low; _i<high; _i++) { \
124       if (rp[_i] > bcol) break; \
125       if (rp[_i] == bcol) { \
126         bap = ap +  bs2*_i + bs*cidx + ridx; \
127         if (addv == ADD_VALUES) *bap += value;  \
128         else                    *bap  = value;  \
129         goto a_noinsert; \
130       } \
131     } \
132     if (a->nonew == 1) goto a_noinsert; \
133     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
134     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
135     N = nrow++ - 1;  \
136     /* shift up all the later entries in this row */ \
137     for (ii=N; ii>=_i; ii--) { \
138       rp[ii+1] = rp[ii]; \
139       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
140     } \
141     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
142     rp[_i]                      = bcol;  \
143     ap[bs2*_i + bs*cidx + ridx] = value;  \
144 a_noinsert:; \
145     ailen[brow] = nrow; \
146   }
147 
148 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
149   { \
150     brow = row/bs;  \
151     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
152     rmax = bimax[brow]; nrow = bilen[brow]; \
153     bcol = col/bs; \
154     ridx = row % bs; cidx = col % bs; \
155     low  = 0; high = nrow; \
156     while (high-low > 3) { \
157       t = (low+high)/2; \
158       if (rp[t] > bcol) high = t; \
159       else              low  = t; \
160     } \
161     for (_i=low; _i<high; _i++) { \
162       if (rp[_i] > bcol) break; \
163       if (rp[_i] == bcol) { \
164         bap = ap +  bs2*_i + bs*cidx + ridx; \
165         if (addv == ADD_VALUES) *bap += value;  \
166         else                    *bap  = value;  \
167         goto b_noinsert; \
168       } \
169     } \
170     if (b->nonew == 1) goto b_noinsert; \
171     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
172     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
173     N = nrow++ - 1;  \
174     /* shift up all the later entries in this row */ \
175     for (ii=N; ii>=_i; ii--) { \
176       rp[ii+1] = rp[ii]; \
177       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
178     } \
179     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
180     rp[_i]                      = bcol;  \
181     ap[bs2*_i + bs*cidx + ridx] = value;  \
182 b_noinsert:; \
183     bilen[brow] = nrow; \
184   }
185 
186 #undef __FUNCT__
187 #define __FUNCT__ "MatSetValues_MPIBAIJ"
188 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
189 {
190   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
191   MatScalar      value;
192   PetscBool      roworiented = baij->roworiented;
193   PetscErrorCode ierr;
194   PetscInt       i,j,row,col;
195   PetscInt       rstart_orig=mat->rmap->rstart;
196   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
197   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
198 
199   /* Some Variables required in the macro */
200   Mat         A     = baij->A;
201   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
202   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
203   MatScalar   *aa   =a->a;
204 
205   Mat         B     = baij->B;
206   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
207   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
208   MatScalar   *ba   =b->a;
209 
210   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
211   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
212   MatScalar *ap,*bap;
213 
214   PetscFunctionBegin;
215   for (i=0; i<m; i++) {
216     if (im[i] < 0) continue;
217 #if defined(PETSC_USE_DEBUG)
218     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
219 #endif
220     if (im[i] >= rstart_orig && im[i] < rend_orig) {
221       row = im[i] - rstart_orig;
222       for (j=0; j<n; j++) {
223         if (in[j] >= cstart_orig && in[j] < cend_orig) {
224           col = in[j] - cstart_orig;
225           if (roworiented) value = v[i*n+j];
226           else             value = v[i+j*m];
227           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
228           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
229         } else if (in[j] < 0) continue;
230 #if defined(PETSC_USE_DEBUG)
231         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
232 #endif
233         else {
234           if (mat->was_assembled) {
235             if (!baij->colmap) {
236               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
237             }
238 #if defined(PETSC_USE_CTABLE)
239             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
240             col  = col - 1;
241 #else
242             col = baij->colmap[in[j]/bs] - 1;
243 #endif
244             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
245               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
246               col  =  in[j];
247               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
248               B    = baij->B;
249               b    = (Mat_SeqBAIJ*)(B)->data;
250               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
251               ba   =b->a;
252             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
253             else col += in[j]%bs;
254           } else col = in[j];
255           if (roworiented) value = v[i*n+j];
256           else             value = v[i+j*m];
257           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
258           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
259         }
260       }
261     } else {
262       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
263       if (!baij->donotstash) {
264         mat->assembled = PETSC_FALSE;
265         if (roworiented) {
266           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
267         } else {
268           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
269         }
270       }
271     }
272   }
273   PetscFunctionReturn(0);
274 }
275 
276 #undef __FUNCT__
277 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
278 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
279 {
280   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
281   const PetscScalar *value;
282   MatScalar         *barray     = baij->barray;
283   PetscBool         roworiented = baij->roworiented;
284   PetscErrorCode    ierr;
285   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
286   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
287   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
288 
289   PetscFunctionBegin;
290   if (!barray) {
291     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
292     baij->barray = barray;
293   }
294 
295   if (roworiented) stepval = (n-1)*bs;
296   else stepval = (m-1)*bs;
297 
298   for (i=0; i<m; i++) {
299     if (im[i] < 0) continue;
300 #if defined(PETSC_USE_DEBUG)
301     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
302 #endif
303     if (im[i] >= rstart && im[i] < rend) {
304       row = im[i] - rstart;
305       for (j=0; j<n; j++) {
306         /* If NumCol = 1 then a copy is not required */
307         if ((roworiented) && (n == 1)) {
308           barray = (MatScalar*)v + i*bs2;
309         } else if ((!roworiented) && (m == 1)) {
310           barray = (MatScalar*)v + j*bs2;
311         } else { /* Here a copy is required */
312           if (roworiented) {
313             value = v + (i*(stepval+bs) + j)*bs;
314           } else {
315             value = v + (j*(stepval+bs) + i)*bs;
316           }
317           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
318             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
319             barray += bs;
320           }
321           barray -= bs2;
322         }
323 
324         if (in[j] >= cstart && in[j] < cend) {
325           col  = in[j] - cstart;
326           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
327         } else if (in[j] < 0) continue;
328 #if defined(PETSC_USE_DEBUG)
329         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
330 #endif
331         else {
332           if (mat->was_assembled) {
333             if (!baij->colmap) {
334               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
335             }
336 
337 #if defined(PETSC_USE_DEBUG)
338 #if defined(PETSC_USE_CTABLE)
339             { PetscInt data;
340               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
341               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
342             }
343 #else
344             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
345 #endif
346 #endif
347 #if defined(PETSC_USE_CTABLE)
348             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
349             col  = (col - 1)/bs;
350 #else
351             col = (baij->colmap[in[j]] - 1)/bs;
352 #endif
353             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
354               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
355               col  =  in[j];
356             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]);
357           } else col = in[j];
358           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
359         }
360       }
361     } else {
362       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
363       if (!baij->donotstash) {
364         if (roworiented) {
365           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
366         } else {
367           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
368         }
369       }
370     }
371   }
372   PetscFunctionReturn(0);
373 }
374 
375 #define HASH_KEY 0.6180339887
376 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
377 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
378 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
379 #undef __FUNCT__
380 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
381 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
382 {
383   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
384   PetscBool      roworiented = baij->roworiented;
385   PetscErrorCode ierr;
386   PetscInt       i,j,row,col;
387   PetscInt       rstart_orig=mat->rmap->rstart;
388   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
389   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
390   PetscReal      tmp;
391   MatScalar      **HD = baij->hd,value;
392 #if defined(PETSC_USE_DEBUG)
393   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
394 #endif
395 
396   PetscFunctionBegin;
397   for (i=0; i<m; i++) {
398 #if defined(PETSC_USE_DEBUG)
399     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
400     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
401 #endif
402     row = im[i];
403     if (row >= rstart_orig && row < rend_orig) {
404       for (j=0; j<n; j++) {
405         col = in[j];
406         if (roworiented) value = v[i*n+j];
407         else             value = v[i+j*m];
408         /* Look up PetscInto the Hash Table */
409         key = (row/bs)*Nbs+(col/bs)+1;
410         h1  = HASH(size,key,tmp);
411 
412 
413         idx = h1;
414 #if defined(PETSC_USE_DEBUG)
415         insert_ct++;
416         total_ct++;
417         if (HT[idx] != key) {
418           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
419           if (idx == size) {
420             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
421             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
422           }
423         }
424 #else
425         if (HT[idx] != key) {
426           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
427           if (idx == size) {
428             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
429             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
430           }
431         }
432 #endif
433         /* A HASH table entry is found, so insert the values at the correct address */
434         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
435         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
436       }
437     } else if (!baij->donotstash) {
438       if (roworiented) {
439         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
440       } else {
441         ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
442       }
443     }
444   }
445 #if defined(PETSC_USE_DEBUG)
446   baij->ht_total_ct  = total_ct;
447   baij->ht_insert_ct = insert_ct;
448 #endif
449   PetscFunctionReturn(0);
450 }
451 
452 #undef __FUNCT__
453 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
454 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
455 {
456   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
457   PetscBool         roworiented = baij->roworiented;
458   PetscErrorCode    ierr;
459   PetscInt          i,j,ii,jj,row,col;
460   PetscInt          rstart=baij->rstartbs;
461   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
462   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
463   PetscReal         tmp;
464   MatScalar         **HD = baij->hd,*baij_a;
465   const PetscScalar *v_t,*value;
466 #if defined(PETSC_USE_DEBUG)
467   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
468 #endif
469 
470   PetscFunctionBegin;
471   if (roworiented) stepval = (n-1)*bs;
472   else stepval = (m-1)*bs;
473 
474   for (i=0; i<m; i++) {
475 #if defined(PETSC_USE_DEBUG)
476     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
477     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
478 #endif
479     row = im[i];
480     v_t = v + i*nbs2;
481     if (row >= rstart && row < rend) {
482       for (j=0; j<n; j++) {
483         col = in[j];
484 
485         /* Look up into the Hash Table */
486         key = row*Nbs+col+1;
487         h1  = HASH(size,key,tmp);
488 
489         idx = h1;
490 #if defined(PETSC_USE_DEBUG)
491         total_ct++;
492         insert_ct++;
493         if (HT[idx] != key) {
494           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
495           if (idx == size) {
496             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
497             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
498           }
499         }
500 #else
501         if (HT[idx] != key) {
502           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
503           if (idx == size) {
504             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
505             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
506           }
507         }
508 #endif
509         baij_a = HD[idx];
510         if (roworiented) {
511           /*value = v + i*(stepval+bs)*bs + j*bs;*/
512           /* value = v + (i*(stepval+bs)+j)*bs; */
513           value = v_t;
514           v_t  += bs;
515           if (addv == ADD_VALUES) {
516             for (ii=0; ii<bs; ii++,value+=stepval) {
517               for (jj=ii; jj<bs2; jj+=bs) {
518                 baij_a[jj] += *value++;
519               }
520             }
521           } else {
522             for (ii=0; ii<bs; ii++,value+=stepval) {
523               for (jj=ii; jj<bs2; jj+=bs) {
524                 baij_a[jj] = *value++;
525               }
526             }
527           }
528         } else {
529           value = v + j*(stepval+bs)*bs + i*bs;
530           if (addv == ADD_VALUES) {
531             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
532               for (jj=0; jj<bs; jj++) {
533                 baij_a[jj] += *value++;
534               }
535             }
536           } else {
537             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
538               for (jj=0; jj<bs; jj++) {
539                 baij_a[jj] = *value++;
540               }
541             }
542           }
543         }
544       }
545     } else {
546       if (!baij->donotstash) {
547         if (roworiented) {
548           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
549         } else {
550           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
551         }
552       }
553     }
554   }
555 #if defined(PETSC_USE_DEBUG)
556   baij->ht_total_ct  = total_ct;
557   baij->ht_insert_ct = insert_ct;
558 #endif
559   PetscFunctionReturn(0);
560 }
561 
562 #undef __FUNCT__
563 #define __FUNCT__ "MatGetValues_MPIBAIJ"
564 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
565 {
566   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
567   PetscErrorCode ierr;
568   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
569   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
570 
571   PetscFunctionBegin;
572   for (i=0; i<m; i++) {
573     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
574     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
575     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
576       row = idxm[i] - bsrstart;
577       for (j=0; j<n; j++) {
578         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
579         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
580         if (idxn[j] >= bscstart && idxn[j] < bscend) {
581           col  = idxn[j] - bscstart;
582           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
583         } else {
584           if (!baij->colmap) {
585             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
586           }
587 #if defined(PETSC_USE_CTABLE)
588           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
589           data--;
590 #else
591           data = baij->colmap[idxn[j]/bs]-1;
592 #endif
593           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
594           else {
595             col  = data + idxn[j]%bs;
596             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
597           }
598         }
599       }
600     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
601   }
602   PetscFunctionReturn(0);
603 }
604 
605 #undef __FUNCT__
606 #define __FUNCT__ "MatNorm_MPIBAIJ"
607 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
608 {
609   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
610   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
611   PetscErrorCode ierr;
612   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
613   PetscReal      sum = 0.0;
614   MatScalar      *v;
615 
616   PetscFunctionBegin;
617   if (baij->size == 1) {
618     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
619   } else {
620     if (type == NORM_FROBENIUS) {
621       v  = amat->a;
622       nz = amat->nz*bs2;
623       for (i=0; i<nz; i++) {
624         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
625       }
626       v  = bmat->a;
627       nz = bmat->nz*bs2;
628       for (i=0; i<nz; i++) {
629         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
630       }
631       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
632       *nrm = PetscSqrtReal(*nrm);
633     } else if (type == NORM_1) { /* max column sum */
634       PetscReal *tmp,*tmp2;
635       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
636       ierr = PetscMalloc2(mat->cmap->N,&tmp,mat->cmap->N,&tmp2);CHKERRQ(ierr);
637       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
638       v    = amat->a; jj = amat->j;
639       for (i=0; i<amat->nz; i++) {
640         for (j=0; j<bs; j++) {
641           col = bs*(cstart + *jj) + j; /* column index */
642           for (row=0; row<bs; row++) {
643             tmp[col] += PetscAbsScalar(*v);  v++;
644           }
645         }
646         jj++;
647       }
648       v = bmat->a; jj = bmat->j;
649       for (i=0; i<bmat->nz; i++) {
650         for (j=0; j<bs; j++) {
651           col = bs*garray[*jj] + j;
652           for (row=0; row<bs; row++) {
653             tmp[col] += PetscAbsScalar(*v); v++;
654           }
655         }
656         jj++;
657       }
658       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
659       *nrm = 0.0;
660       for (j=0; j<mat->cmap->N; j++) {
661         if (tmp2[j] > *nrm) *nrm = tmp2[j];
662       }
663       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
664     } else if (type == NORM_INFINITY) { /* max row sum */
665       PetscReal *sums;
666       ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr);
667       sum  = 0.0;
668       for (j=0; j<amat->mbs; j++) {
669         for (row=0; row<bs; row++) sums[row] = 0.0;
670         v  = amat->a + bs2*amat->i[j];
671         nz = amat->i[j+1]-amat->i[j];
672         for (i=0; i<nz; i++) {
673           for (col=0; col<bs; col++) {
674             for (row=0; row<bs; row++) {
675               sums[row] += PetscAbsScalar(*v); v++;
676             }
677           }
678         }
679         v  = bmat->a + bs2*bmat->i[j];
680         nz = bmat->i[j+1]-bmat->i[j];
681         for (i=0; i<nz; i++) {
682           for (col=0; col<bs; col++) {
683             for (row=0; row<bs; row++) {
684               sums[row] += PetscAbsScalar(*v); v++;
685             }
686           }
687         }
688         for (row=0; row<bs; row++) {
689           if (sums[row] > sum) sum = sums[row];
690         }
691       }
692       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
693       ierr = PetscFree(sums);CHKERRQ(ierr);
694     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
695   }
696   PetscFunctionReturn(0);
697 }
698 
699 /*
700   Creates the hash table, and sets the table
701   This table is created only once.
702   If new entried need to be added to the matrix
703   then the hash table has to be destroyed and
704   recreated.
705 */
706 #undef __FUNCT__
707 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
708 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
709 {
710   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
711   Mat            A     = baij->A,B=baij->B;
712   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
713   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
714   PetscErrorCode ierr;
715   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
716   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
717   PetscInt       *HT,key;
718   MatScalar      **HD;
719   PetscReal      tmp;
720 #if defined(PETSC_USE_INFO)
721   PetscInt ct=0,max=0;
722 #endif
723 
724   PetscFunctionBegin;
725   if (baij->ht) PetscFunctionReturn(0);
726 
727   baij->ht_size = (PetscInt)(factor*nz);
728   ht_size       = baij->ht_size;
729 
730   /* Allocate Memory for Hash Table */
731   ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr);
732   HD   = baij->hd;
733   HT   = baij->ht;
734 
735   /* Loop Over A */
736   for (i=0; i<a->mbs; i++) {
737     for (j=ai[i]; j<ai[i+1]; j++) {
738       row = i+rstart;
739       col = aj[j]+cstart;
740 
741       key = row*Nbs + col + 1;
742       h1  = HASH(ht_size,key,tmp);
743       for (k=0; k<ht_size; k++) {
744         if (!HT[(h1+k)%ht_size]) {
745           HT[(h1+k)%ht_size] = key;
746           HD[(h1+k)%ht_size] = a->a + j*bs2;
747           break;
748 #if defined(PETSC_USE_INFO)
749         } else {
750           ct++;
751 #endif
752         }
753       }
754 #if defined(PETSC_USE_INFO)
755       if (k> max) max = k;
756 #endif
757     }
758   }
759   /* Loop Over B */
760   for (i=0; i<b->mbs; i++) {
761     for (j=bi[i]; j<bi[i+1]; j++) {
762       row = i+rstart;
763       col = garray[bj[j]];
764       key = row*Nbs + col + 1;
765       h1  = HASH(ht_size,key,tmp);
766       for (k=0; k<ht_size; k++) {
767         if (!HT[(h1+k)%ht_size]) {
768           HT[(h1+k)%ht_size] = key;
769           HD[(h1+k)%ht_size] = b->a + j*bs2;
770           break;
771 #if defined(PETSC_USE_INFO)
772         } else {
773           ct++;
774 #endif
775         }
776       }
777 #if defined(PETSC_USE_INFO)
778       if (k> max) max = k;
779 #endif
780     }
781   }
782 
783   /* Print Summary */
784 #if defined(PETSC_USE_INFO)
785   for (i=0,j=0; i<ht_size; i++) {
786     if (HT[i]) j++;
787   }
788   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
789 #endif
790   PetscFunctionReturn(0);
791 }
792 
793 #undef __FUNCT__
794 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
795 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
796 {
797   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
798   PetscErrorCode ierr;
799   PetscInt       nstash,reallocs;
800   InsertMode     addv;
801 
802   PetscFunctionBegin;
803   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
804 
805   /* make sure all processors are either in INSERTMODE or ADDMODE */
806   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
807   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
808   mat->insertmode = addv; /* in case this processor had no cache */
809 
810   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
811   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
812   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
813   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
814   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
815   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
816   PetscFunctionReturn(0);
817 }
818 
819 #undef __FUNCT__
820 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
821 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
822 {
823   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
824   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
825   PetscErrorCode ierr;
826   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
827   PetscInt       *row,*col;
828   PetscBool      r1,r2,r3,other_disassembled;
829   MatScalar      *val;
830   InsertMode     addv = mat->insertmode;
831   PetscMPIInt    n;
832 
833   PetscFunctionBegin;
834   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
835   if (!baij->donotstash && !mat->nooffprocentries) {
836     while (1) {
837       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
838       if (!flg) break;
839 
840       for (i=0; i<n;) {
841         /* Now identify the consecutive vals belonging to the same row */
842         for (j=i,rstart=row[j]; j<n; j++) {
843           if (row[j] != rstart) break;
844         }
845         if (j < n) ncols = j-i;
846         else       ncols = n-i;
847         /* Now assemble all these values with a single function call */
848         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
849         i    = j;
850       }
851     }
852     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
853     /* Now process the block-stash. Since the values are stashed column-oriented,
854        set the roworiented flag to column oriented, and after MatSetValues()
855        restore the original flags */
856     r1 = baij->roworiented;
857     r2 = a->roworiented;
858     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
859 
860     baij->roworiented = PETSC_FALSE;
861     a->roworiented    = PETSC_FALSE;
862 
863     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
864     while (1) {
865       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
866       if (!flg) break;
867 
868       for (i=0; i<n;) {
869         /* Now identify the consecutive vals belonging to the same row */
870         for (j=i,rstart=row[j]; j<n; j++) {
871           if (row[j] != rstart) break;
872         }
873         if (j < n) ncols = j-i;
874         else       ncols = n-i;
875         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
876         i    = j;
877       }
878     }
879     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
880 
881     baij->roworiented = r1;
882     a->roworiented    = r2;
883 
884     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
885   }
886 
887   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
888   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
889 
890   /* determine if any processor has disassembled, if so we must
891      also disassemble ourselfs, in order that we may reassemble. */
892   /*
893      if nonzero structure of submatrix B cannot change then we know that
894      no processor disassembled thus we can skip this stuff
895   */
896   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
897     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
898     if (mat->was_assembled && !other_disassembled) {
899       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
900     }
901   }
902 
903   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
904     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
905   }
906   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
907   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
908 
909 #if defined(PETSC_USE_INFO)
910   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
911     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
912 
913     baij->ht_total_ct  = 0;
914     baij->ht_insert_ct = 0;
915   }
916 #endif
917   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
918     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
919 
920     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
921     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
922   }
923 
924   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
925 
926   baij->rowvalues = 0;
927 
928   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
929   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
930     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
931     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
932   }
933   PetscFunctionReturn(0);
934 }
935 
936 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer);
937 #include <petscdraw.h>
938 #undef __FUNCT__
939 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
940 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
941 {
942   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
943   PetscErrorCode    ierr;
944   PetscMPIInt       rank = baij->rank;
945   PetscInt          bs   = mat->rmap->bs;
946   PetscBool         iascii,isdraw;
947   PetscViewer       sviewer;
948   PetscViewerFormat format;
949 
950   PetscFunctionBegin;
951   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
952   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
953   if (iascii) {
954     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
955     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
956       MatInfo info;
957       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
958       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
959       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
960       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
961                                                 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
962       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
963       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
964       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
965       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
966       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
967       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
968       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
969       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
970       PetscFunctionReturn(0);
971     } else if (format == PETSC_VIEWER_ASCII_INFO) {
972       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
973       PetscFunctionReturn(0);
974     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
975       PetscFunctionReturn(0);
976     }
977   }
978 
979   if (isdraw) {
980     PetscDraw draw;
981     PetscBool isnull;
982     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
983     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
984   }
985 
986   {
987     /* assemble the entire matrix onto first processor. */
988     Mat         A;
989     Mat_SeqBAIJ *Aloc;
990     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
991     MatScalar   *a;
992 
993     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
994     /* Perhaps this should be the type of mat? */
995     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
996     if (!rank) {
997       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
998     } else {
999       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1000     }
1001     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1002     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
1003     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1004     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1005 
1006     /* copy over the A part */
1007     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1008     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1009     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1010 
1011     for (i=0; i<mbs; i++) {
1012       rvals[0] = bs*(baij->rstartbs + i);
1013       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1014       for (j=ai[i]; j<ai[i+1]; j++) {
1015         col = (baij->cstartbs+aj[j])*bs;
1016         for (k=0; k<bs; k++) {
1017           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1018           col++; a += bs;
1019         }
1020       }
1021     }
1022     /* copy over the B part */
1023     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1024     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1025     for (i=0; i<mbs; i++) {
1026       rvals[0] = bs*(baij->rstartbs + i);
1027       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1028       for (j=ai[i]; j<ai[i+1]; j++) {
1029         col = baij->garray[aj[j]]*bs;
1030         for (k=0; k<bs; k++) {
1031           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1032           col++; a += bs;
1033         }
1034       }
1035     }
1036     ierr = PetscFree(rvals);CHKERRQ(ierr);
1037     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1038     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1039     /*
1040        Everyone has to call to draw the matrix since the graphics waits are
1041        synchronized across all processors that share the PetscDraw object
1042     */
1043     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1044     if (!rank) {
1045       ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1046     }
1047     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1048     ierr = MatDestroy(&A);CHKERRQ(ierr);
1049   }
1050   PetscFunctionReturn(0);
1051 }
1052 
1053 #undef __FUNCT__
1054 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1055 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1056 {
1057   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1058   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)a->A->data;
1059   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
1060   PetscErrorCode ierr;
1061   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1062   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1063   int            fd;
1064   PetscScalar    *column_values;
1065   FILE           *file;
1066   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1067   PetscInt       message_count,flowcontrolcount;
1068 
1069   PetscFunctionBegin;
1070   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1071   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1072   nz   = bs2*(A->nz + B->nz);
1073   rlen = mat->rmap->n;
1074   if (!rank) {
1075     header[0] = MAT_FILE_CLASSID;
1076     header[1] = mat->rmap->N;
1077     header[2] = mat->cmap->N;
1078 
1079     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1080     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1081     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1082     /* get largest number of rows any processor has */
1083     range = mat->rmap->range;
1084     for (i=1; i<size; i++) {
1085       rlen = PetscMax(rlen,range[i+1] - range[i]);
1086     }
1087   } else {
1088     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1089   }
1090 
1091   ierr = PetscMalloc1((rlen/bs),&crow_lens);CHKERRQ(ierr);
1092   /* compute lengths of each row  */
1093   for (i=0; i<a->mbs; i++) {
1094     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1095   }
1096   /* store the row lengths to the file */
1097   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1098   if (!rank) {
1099     MPI_Status status;
1100     ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr);
1101     rlen = (range[1] - range[0])/bs;
1102     for (i=0; i<rlen; i++) {
1103       for (j=0; j<bs; j++) {
1104         row_lens[i*bs+j] = bs*crow_lens[i];
1105       }
1106     }
1107     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1108     for (i=1; i<size; i++) {
1109       rlen = (range[i+1] - range[i])/bs;
1110       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1111       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1112       for (k=0; k<rlen; k++) {
1113         for (j=0; j<bs; j++) {
1114           row_lens[k*bs+j] = bs*crow_lens[k];
1115         }
1116       }
1117       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1118     }
1119     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1120     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1121   } else {
1122     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1123     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1124     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1125   }
1126   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1127 
1128   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1129      information needed to make it for each row from a block row. This does require more communication but still not more than
1130      the communication needed for the nonzero values  */
1131   nzmax = nz; /*  space a largest processor needs */
1132   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1133   ierr  = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr);
1134   cnt   = 0;
1135   for (i=0; i<a->mbs; i++) {
1136     pcnt = cnt;
1137     for (j=B->i[i]; j<B->i[i+1]; j++) {
1138       if ((col = garray[B->j[j]]) > cstart) break;
1139       for (l=0; l<bs; l++) {
1140         column_indices[cnt++] = bs*col+l;
1141       }
1142     }
1143     for (k=A->i[i]; k<A->i[i+1]; k++) {
1144       for (l=0; l<bs; l++) {
1145         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1146       }
1147     }
1148     for (; j<B->i[i+1]; j++) {
1149       for (l=0; l<bs; l++) {
1150         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1151       }
1152     }
1153     len = cnt - pcnt;
1154     for (k=1; k<bs; k++) {
1155       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1156       cnt += len;
1157     }
1158   }
1159   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1160 
1161   /* store the columns to the file */
1162   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1163   if (!rank) {
1164     MPI_Status status;
1165     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1166     for (i=1; i<size; i++) {
1167       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1168       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1169       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1170       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1171     }
1172     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1173   } else {
1174     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1175     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1176     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1177     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1178   }
1179   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1180 
1181   /* load up the numerical values */
1182   ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr);
1183   cnt  = 0;
1184   for (i=0; i<a->mbs; i++) {
1185     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1186     for (j=B->i[i]; j<B->i[i+1]; j++) {
1187       if (garray[B->j[j]] > cstart) break;
1188       for (l=0; l<bs; l++) {
1189         for (ll=0; ll<bs; ll++) {
1190           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1191         }
1192       }
1193       cnt += bs;
1194     }
1195     for (k=A->i[i]; k<A->i[i+1]; k++) {
1196       for (l=0; l<bs; l++) {
1197         for (ll=0; ll<bs; ll++) {
1198           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1199         }
1200       }
1201       cnt += bs;
1202     }
1203     for (; j<B->i[i+1]; j++) {
1204       for (l=0; l<bs; l++) {
1205         for (ll=0; ll<bs; ll++) {
1206           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1207         }
1208       }
1209       cnt += bs;
1210     }
1211     cnt += (bs-1)*rlen;
1212   }
1213   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1214 
1215   /* store the column values to the file */
1216   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1217   if (!rank) {
1218     MPI_Status status;
1219     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1220     for (i=1; i<size; i++) {
1221       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1222       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1223       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1224       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1225     }
1226     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1227   } else {
1228     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1229     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1230     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1231     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1232   }
1233   ierr = PetscFree(column_values);CHKERRQ(ierr);
1234 
1235   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1236   if (file) {
1237     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1238   }
1239   PetscFunctionReturn(0);
1240 }
1241 
1242 #undef __FUNCT__
1243 #define __FUNCT__ "MatView_MPIBAIJ"
1244 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1245 {
1246   PetscErrorCode ierr;
1247   PetscBool      iascii,isdraw,issocket,isbinary;
1248 
1249   PetscFunctionBegin;
1250   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1251   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1252   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1253   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1254   if (iascii || isdraw || issocket) {
1255     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1256   } else if (isbinary) {
1257     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1258   }
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 #undef __FUNCT__
1263 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1264 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1265 {
1266   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1267   PetscErrorCode ierr;
1268 
1269   PetscFunctionBegin;
1270 #if defined(PETSC_USE_LOG)
1271   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1272 #endif
1273   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1274   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1275   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1276   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1277 #if defined(PETSC_USE_CTABLE)
1278   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1279 #else
1280   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1281 #endif
1282   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1283   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1284   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1285   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1286   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1287   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1288   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1289   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1290 
1291   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1292   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1293   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1294   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1295   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1296   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr);
1301   PetscFunctionReturn(0);
1302 }
1303 
1304 #undef __FUNCT__
1305 #define __FUNCT__ "MatMult_MPIBAIJ"
1306 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1307 {
1308   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1309   PetscErrorCode ierr;
1310   PetscInt       nt;
1311 
1312   PetscFunctionBegin;
1313   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1314   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1315   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1316   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1317   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1318   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1319   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1320   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1321   PetscFunctionReturn(0);
1322 }
1323 
1324 #undef __FUNCT__
1325 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1326 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1327 {
1328   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1329   PetscErrorCode ierr;
1330 
1331   PetscFunctionBegin;
1332   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1333   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1334   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1335   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1336   PetscFunctionReturn(0);
1337 }
1338 
1339 #undef __FUNCT__
1340 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1341 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1342 {
1343   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1344   PetscErrorCode ierr;
1345   PetscBool      merged;
1346 
1347   PetscFunctionBegin;
1348   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1349   /* do nondiagonal part */
1350   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1351   if (!merged) {
1352     /* send it on its way */
1353     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1354     /* do local part */
1355     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1356     /* receive remote parts: note this assumes the values are not actually */
1357     /* inserted in yy until the next line */
1358     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1359   } else {
1360     /* do local part */
1361     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1362     /* send it on its way */
1363     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1364     /* values actually were received in the Begin() but we need to call this nop */
1365     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1366   }
1367   PetscFunctionReturn(0);
1368 }
1369 
1370 #undef __FUNCT__
1371 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1372 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1373 {
1374   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1375   PetscErrorCode ierr;
1376 
1377   PetscFunctionBegin;
1378   /* do nondiagonal part */
1379   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1380   /* send it on its way */
1381   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1382   /* do local part */
1383   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1384   /* receive remote parts: note this assumes the values are not actually */
1385   /* inserted in yy until the next line, which is true for my implementation*/
1386   /* but is not perhaps always true. */
1387   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1388   PetscFunctionReturn(0);
1389 }
1390 
1391 /*
1392   This only works correctly for square matrices where the subblock A->A is the
1393    diagonal block
1394 */
1395 #undef __FUNCT__
1396 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1397 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1398 {
1399   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1400   PetscErrorCode ierr;
1401 
1402   PetscFunctionBegin;
1403   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1404   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1405   PetscFunctionReturn(0);
1406 }
1407 
1408 #undef __FUNCT__
1409 #define __FUNCT__ "MatScale_MPIBAIJ"
1410 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1411 {
1412   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1413   PetscErrorCode ierr;
1414 
1415   PetscFunctionBegin;
1416   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1417   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1418   PetscFunctionReturn(0);
1419 }
1420 
1421 #undef __FUNCT__
1422 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1423 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1424 {
1425   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1426   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1427   PetscErrorCode ierr;
1428   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1429   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1430   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1431 
1432   PetscFunctionBegin;
1433   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1434   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1435   mat->getrowactive = PETSC_TRUE;
1436 
1437   if (!mat->rowvalues && (idx || v)) {
1438     /*
1439         allocate enough space to hold information from the longest row.
1440     */
1441     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1442     PetscInt    max = 1,mbs = mat->mbs,tmp;
1443     for (i=0; i<mbs; i++) {
1444       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1445       if (max < tmp) max = tmp;
1446     }
1447     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1448   }
1449   lrow = row - brstart;
1450 
1451   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1452   if (!v)   {pvA = 0; pvB = 0;}
1453   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1454   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1455   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1456   nztot = nzA + nzB;
1457 
1458   cmap = mat->garray;
1459   if (v  || idx) {
1460     if (nztot) {
1461       /* Sort by increasing column numbers, assuming A and B already sorted */
1462       PetscInt imark = -1;
1463       if (v) {
1464         *v = v_p = mat->rowvalues;
1465         for (i=0; i<nzB; i++) {
1466           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1467           else break;
1468         }
1469         imark = i;
1470         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1471         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1472       }
1473       if (idx) {
1474         *idx = idx_p = mat->rowindices;
1475         if (imark > -1) {
1476           for (i=0; i<imark; i++) {
1477             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1478           }
1479         } else {
1480           for (i=0; i<nzB; i++) {
1481             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1482             else break;
1483           }
1484           imark = i;
1485         }
1486         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1487         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1488       }
1489     } else {
1490       if (idx) *idx = 0;
1491       if (v)   *v   = 0;
1492     }
1493   }
1494   *nz  = nztot;
1495   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1496   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1497   PetscFunctionReturn(0);
1498 }
1499 
1500 #undef __FUNCT__
1501 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1502 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1503 {
1504   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1505 
1506   PetscFunctionBegin;
1507   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1508   baij->getrowactive = PETSC_FALSE;
1509   PetscFunctionReturn(0);
1510 }
1511 
1512 #undef __FUNCT__
1513 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1514 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1515 {
1516   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1517   PetscErrorCode ierr;
1518 
1519   PetscFunctionBegin;
1520   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1521   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1522   PetscFunctionReturn(0);
1523 }
1524 
1525 #undef __FUNCT__
1526 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1527 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1528 {
1529   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1530   Mat            A  = a->A,B = a->B;
1531   PetscErrorCode ierr;
1532   PetscReal      isend[5],irecv[5];
1533 
1534   PetscFunctionBegin;
1535   info->block_size = (PetscReal)matin->rmap->bs;
1536 
1537   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1538 
1539   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1540   isend[3] = info->memory;  isend[4] = info->mallocs;
1541 
1542   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1543 
1544   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1545   isend[3] += info->memory;  isend[4] += info->mallocs;
1546 
1547   if (flag == MAT_LOCAL) {
1548     info->nz_used      = isend[0];
1549     info->nz_allocated = isend[1];
1550     info->nz_unneeded  = isend[2];
1551     info->memory       = isend[3];
1552     info->mallocs      = isend[4];
1553   } else if (flag == MAT_GLOBAL_MAX) {
1554     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1555 
1556     info->nz_used      = irecv[0];
1557     info->nz_allocated = irecv[1];
1558     info->nz_unneeded  = irecv[2];
1559     info->memory       = irecv[3];
1560     info->mallocs      = irecv[4];
1561   } else if (flag == MAT_GLOBAL_SUM) {
1562     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1563 
1564     info->nz_used      = irecv[0];
1565     info->nz_allocated = irecv[1];
1566     info->nz_unneeded  = irecv[2];
1567     info->memory       = irecv[3];
1568     info->mallocs      = irecv[4];
1569   } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1570   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1571   info->fill_ratio_needed = 0;
1572   info->factor_mallocs    = 0;
1573   PetscFunctionReturn(0);
1574 }
1575 
1576 #undef __FUNCT__
1577 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1578 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1579 {
1580   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1581   PetscErrorCode ierr;
1582 
1583   PetscFunctionBegin;
1584   switch (op) {
1585   case MAT_NEW_NONZERO_LOCATIONS:
1586   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1587   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1588   case MAT_KEEP_NONZERO_PATTERN:
1589   case MAT_NEW_NONZERO_LOCATION_ERR:
1590     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1591     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1592     break;
1593   case MAT_ROW_ORIENTED:
1594     a->roworiented = flg;
1595 
1596     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1597     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1598     break;
1599   case MAT_NEW_DIAGONALS:
1600     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1601     break;
1602   case MAT_IGNORE_OFF_PROC_ENTRIES:
1603     a->donotstash = flg;
1604     break;
1605   case MAT_USE_HASH_TABLE:
1606     a->ht_flag = flg;
1607     break;
1608   case MAT_SYMMETRIC:
1609   case MAT_STRUCTURALLY_SYMMETRIC:
1610   case MAT_HERMITIAN:
1611   case MAT_SYMMETRY_ETERNAL:
1612     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1613     break;
1614   default:
1615     SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1616   }
1617   PetscFunctionReturn(0);
1618 }
1619 
1620 #undef __FUNCT__
1621 #define __FUNCT__ "MatTranspose_MPIBAIJ"
1622 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1623 {
1624   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1625   Mat_SeqBAIJ    *Aloc;
1626   Mat            B;
1627   PetscErrorCode ierr;
1628   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1629   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1630   MatScalar      *a;
1631 
1632   PetscFunctionBegin;
1633   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1634   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1635     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1636     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1637     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1638     /* Do not know preallocation information, but must set block size */
1639     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr);
1640   } else {
1641     B = *matout;
1642   }
1643 
1644   /* copy over the A part */
1645   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1646   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1647   ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1648 
1649   for (i=0; i<mbs; i++) {
1650     rvals[0] = bs*(baij->rstartbs + i);
1651     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1652     for (j=ai[i]; j<ai[i+1]; j++) {
1653       col = (baij->cstartbs+aj[j])*bs;
1654       for (k=0; k<bs; k++) {
1655         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1656 
1657         col++; a += bs;
1658       }
1659     }
1660   }
1661   /* copy over the B part */
1662   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1663   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1664   for (i=0; i<mbs; i++) {
1665     rvals[0] = bs*(baij->rstartbs + i);
1666     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1667     for (j=ai[i]; j<ai[i+1]; j++) {
1668       col = baij->garray[aj[j]]*bs;
1669       for (k=0; k<bs; k++) {
1670         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1671         col++;
1672         a += bs;
1673       }
1674     }
1675   }
1676   ierr = PetscFree(rvals);CHKERRQ(ierr);
1677   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1678   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1679 
1680   if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B;
1681   else {
1682     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1683   }
1684   PetscFunctionReturn(0);
1685 }
1686 
1687 #undef __FUNCT__
1688 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1689 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1690 {
1691   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1692   Mat            a     = baij->A,b = baij->B;
1693   PetscErrorCode ierr;
1694   PetscInt       s1,s2,s3;
1695 
1696   PetscFunctionBegin;
1697   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1698   if (rr) {
1699     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1700     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1701     /* Overlap communication with computation. */
1702     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1703   }
1704   if (ll) {
1705     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1706     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1707     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1708   }
1709   /* scale  the diagonal block */
1710   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1711 
1712   if (rr) {
1713     /* Do a scatter end and then right scale the off-diagonal block */
1714     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1715     ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1716   }
1717   PetscFunctionReturn(0);
1718 }
1719 
1720 #undef __FUNCT__
1721 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1722 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1723 {
1724   Mat_MPIBAIJ   *l      = (Mat_MPIBAIJ *) A->data;
1725   PetscInt      *owners = A->rmap->range;
1726   PetscInt       n      = A->rmap->n;
1727   PetscSF        sf;
1728   PetscInt      *lrows;
1729   PetscSFNode   *rrows;
1730   PetscInt       r, p = 0, len = 0;
1731   PetscErrorCode ierr;
1732 
1733   PetscFunctionBegin;
1734   /* Create SF where leaves are input rows and roots are owned rows */
1735   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1736   for (r = 0; r < n; ++r) lrows[r] = -1;
1737   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1738   for (r = 0; r < N; ++r) {
1739     const PetscInt idx   = rows[r];
1740     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1741     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1742       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1743     }
1744     rrows[r].rank = p;
1745     rrows[r].index = rows[r] - owners[p];
1746   }
1747   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1748   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1749   /* Collect flags for rows to be zeroed */
1750   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1751   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1752   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1753   /* Compress and put in row numbers */
1754   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1755   /* fix right hand side if needed */
1756   if (x && b) {
1757     const PetscScalar *xx;
1758     PetscScalar       *bb;
1759 
1760     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1761     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1762     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
1763     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1764     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1765   }
1766 
1767   /* actually zap the local rows */
1768   /*
1769         Zero the required rows. If the "diagonal block" of the matrix
1770      is square and the user wishes to set the diagonal we use separate
1771      code so that MatSetValues() is not called for each diagonal allocating
1772      new memory, thus calling lots of mallocs and slowing things down.
1773 
1774   */
1775   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1776   ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,0,0);CHKERRQ(ierr);
1777   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1778     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,0,0);CHKERRQ(ierr);
1779   } else if (diag != 0.0) {
1780     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr);
1781     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1782        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1783     for (r = 0; r < len; ++r) {
1784       const PetscInt row = lrows[r] + A->rmap->rstart;
1785       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1786     }
1787     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1788     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1789   } else {
1790     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr);
1791   }
1792   ierr = PetscFree(lrows);CHKERRQ(ierr);
1793 
1794   /* only change matrix nonzero state if pattern was allowed to be changed */
1795   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1796     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1797     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1798   }
1799   PetscFunctionReturn(0);
1800 }
1801 
1802 #undef __FUNCT__
1803 #define __FUNCT__ "MatZeroRowsColumns_MPIBAIJ"
1804 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1805 {
1806   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1807   PetscErrorCode    ierr;
1808   PetscMPIInt       size = l->size,n = A->rmap->n,lastidx = -1;
1809   PetscInt          i,j,k,r,p = 0,len = 0,row,col,count;
1810   PetscInt          *lrows,*owners = A->rmap->range;
1811   PetscSFNode       *rrows;
1812   PetscSF           sf;
1813   const PetscScalar *xx;
1814   PetscScalar       *bb,*mask;
1815   Vec               xmask,lmask;
1816   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ*)l->B->data;
1817   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1818   PetscScalar       *aa;
1819 #if defined(PETSC_DEBUG)
1820   PetscBool found = PETSC_FALSE;
1821 #endif
1822 
1823   PetscFunctionBegin;
1824   /* Create SF where leaves are input rows and roots are owned rows */
1825   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1826   for (r = 0; r < n; ++r) lrows[r] = -1;
1827   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1828   for (r = 0; r < N; ++r) {
1829     const PetscInt idx   = rows[r];
1830     PetscBool      found = PETSC_FALSE;
1831     /* Trick for efficient searching for sorted rows */
1832     if (lastidx > idx) p = 0;
1833     lastidx = idx;
1834     for (; p < size; ++p) {
1835       if (idx >= owners[p] && idx < owners[p+1]) {
1836         rrows[r].rank  = p;
1837         rrows[r].index = rows[r] - owners[p];
1838         found = PETSC_TRUE;
1839         break;
1840       }
1841     }
1842     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
1843   }
1844   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1845   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1846   /* Collect flags for rows to be zeroed */
1847   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1848   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1849   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1850   /* Compress and put in row numbers */
1851   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1852   /* zero diagonal part of matrix */
1853   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1854   /* handle off diagonal part of matrix */
1855   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
1856   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1857   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1858   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1859   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1860   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1861   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1862   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1863   if (x) {
1864     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1865     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1866     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1867     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1868   }
1869   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1870   /* remove zeroed rows of off diagonal matrix */
1871   for (i = 0; i < len; ++i) {
1872     row   = lrows[i];
1873     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1874     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1875     for (k = 0; k < count; ++k) {
1876       aa[0] = 0.0;
1877       aa   += bs;
1878     }
1879   }
1880   /* loop over all elements of off process part of matrix zeroing removed columns*/
1881   for (i = 0; i < l->B->rmap->N; ++i) {
1882     row = i/bs;
1883     for (j = baij->i[row]; j < baij->i[row+1]; ++j) {
1884       for (k = 0; k < bs; ++k) {
1885         col = bs*baij->j[j] + k;
1886         if (PetscAbsScalar(mask[col])) {
1887           aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
1888           if (b) bb[i] -= aa[0]*xx[col];
1889           aa[0] = 0.0;
1890         }
1891       }
1892     }
1893   }
1894   if (x) {
1895     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1896     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1897   }
1898   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1899   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1900   ierr = PetscFree(lrows);CHKERRQ(ierr);
1901 
1902   /* only change matrix nonzero state if pattern was allowed to be changed */
1903   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1904     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1905     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1906   }
1907   PetscFunctionReturn(0);
1908 }
1909 
1910 #undef __FUNCT__
1911 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1912 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1913 {
1914   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1915   PetscErrorCode ierr;
1916 
1917   PetscFunctionBegin;
1918   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1919   PetscFunctionReturn(0);
1920 }
1921 
1922 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
1923 
1924 #undef __FUNCT__
1925 #define __FUNCT__ "MatEqual_MPIBAIJ"
1926 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1927 {
1928   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1929   Mat            a,b,c,d;
1930   PetscBool      flg;
1931   PetscErrorCode ierr;
1932 
1933   PetscFunctionBegin;
1934   a = matA->A; b = matA->B;
1935   c = matB->A; d = matB->B;
1936 
1937   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1938   if (flg) {
1939     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1940   }
1941   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1942   PetscFunctionReturn(0);
1943 }
1944 
1945 #undef __FUNCT__
1946 #define __FUNCT__ "MatCopy_MPIBAIJ"
1947 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1948 {
1949   PetscErrorCode ierr;
1950   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1951   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
1952 
1953   PetscFunctionBegin;
1954   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1955   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1956     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1957   } else {
1958     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1959     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1960   }
1961   PetscFunctionReturn(0);
1962 }
1963 
1964 #undef __FUNCT__
1965 #define __FUNCT__ "MatSetUp_MPIBAIJ"
1966 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1967 {
1968   PetscErrorCode ierr;
1969 
1970   PetscFunctionBegin;
1971   ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1972   PetscFunctionReturn(0);
1973 }
1974 
1975 #undef __FUNCT__
1976 #define __FUNCT__ "MatAXPYGetPreallocation_MPIBAIJ"
1977 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
1978 {
1979   PetscErrorCode ierr;
1980   PetscInt       bs = Y->rmap->bs,m = Y->rmap->N/bs;
1981   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
1982   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
1983 
1984   PetscFunctionBegin;
1985   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
1986   PetscFunctionReturn(0);
1987 }
1988 
1989 #undef __FUNCT__
1990 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1991 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1992 {
1993   PetscErrorCode ierr;
1994   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
1995   PetscBLASInt   bnz,one=1;
1996   Mat_SeqBAIJ    *x,*y;
1997 
1998   PetscFunctionBegin;
1999   if (str == SAME_NONZERO_PATTERN) {
2000     PetscScalar alpha = a;
2001     x    = (Mat_SeqBAIJ*)xx->A->data;
2002     y    = (Mat_SeqBAIJ*)yy->A->data;
2003     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2004     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2005     x    = (Mat_SeqBAIJ*)xx->B->data;
2006     y    = (Mat_SeqBAIJ*)yy->B->data;
2007     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
2008     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2009     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2010   } else {
2011     Mat      B;
2012     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
2013     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2014     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2015     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2016     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2017     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2018     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2019     ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr);
2020     ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2021     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2022     ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2023     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
2024     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2025     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2026     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2027     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2028   }
2029   PetscFunctionReturn(0);
2030 }
2031 
2032 #undef __FUNCT__
2033 #define __FUNCT__ "MatRealPart_MPIBAIJ"
2034 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
2035 {
2036   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2037   PetscErrorCode ierr;
2038 
2039   PetscFunctionBegin;
2040   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2041   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2042   PetscFunctionReturn(0);
2043 }
2044 
2045 #undef __FUNCT__
2046 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
2047 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2048 {
2049   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2050   PetscErrorCode ierr;
2051 
2052   PetscFunctionBegin;
2053   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2054   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2055   PetscFunctionReturn(0);
2056 }
2057 
2058 #undef __FUNCT__
2059 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
2060 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2061 {
2062   PetscErrorCode ierr;
2063   IS             iscol_local;
2064   PetscInt       csize;
2065 
2066   PetscFunctionBegin;
2067   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2068   if (call == MAT_REUSE_MATRIX) {
2069     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2070     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2071   } else {
2072     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2073   }
2074   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2075   if (call == MAT_INITIAL_MATRIX) {
2076     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2077     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2078   }
2079   PetscFunctionReturn(0);
2080 }
2081 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*);
2082 #undef __FUNCT__
2083 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
2084 /*
2085   Not great since it makes two copies of the submatrix, first an SeqBAIJ
2086   in local and then by concatenating the local matrices the end result.
2087   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
2088 */
2089 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2090 {
2091   PetscErrorCode ierr;
2092   PetscMPIInt    rank,size;
2093   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2094   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow;
2095   Mat            M,Mreuse;
2096   MatScalar      *vwork,*aa;
2097   MPI_Comm       comm;
2098   IS             isrow_new, iscol_new;
2099   PetscBool      idflag,allrows, allcols;
2100   Mat_SeqBAIJ    *aij;
2101 
2102   PetscFunctionBegin;
2103   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2104   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2105   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2106   /* The compression and expansion should be avoided. Doesn't point
2107      out errors, might change the indices, hence buggey */
2108   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
2109   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
2110 
2111   /* Check for special case: each processor gets entire matrix columns */
2112   ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr);
2113   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
2114   if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE;
2115   else allcols = PETSC_FALSE;
2116 
2117   ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr);
2118   ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr);
2119   if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE;
2120   else allrows = PETSC_FALSE;
2121 
2122   if (call ==  MAT_REUSE_MATRIX) {
2123     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
2124     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2125     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2126   } else {
2127     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2128   }
2129   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
2130   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
2131   /*
2132       m - number of local rows
2133       n - number of columns (same on all processors)
2134       rstart - first row in new global matrix generated
2135   */
2136   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2137   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2138   m    = m/bs;
2139   n    = n/bs;
2140 
2141   if (call == MAT_INITIAL_MATRIX) {
2142     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2143     ii  = aij->i;
2144     jj  = aij->j;
2145 
2146     /*
2147         Determine the number of non-zeros in the diagonal and off-diagonal
2148         portions of the matrix in order to do correct preallocation
2149     */
2150 
2151     /* first get start and end of "diagonal" columns */
2152     if (csize == PETSC_DECIDE) {
2153       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2154       if (mglobal == n*bs) { /* square matrix */
2155         nlocal = m;
2156       } else {
2157         nlocal = n/size + ((n % size) > rank);
2158       }
2159     } else {
2160       nlocal = csize/bs;
2161     }
2162     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2163     rstart = rend - nlocal;
2164     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2165 
2166     /* next, compute all the lengths */
2167     ierr  = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr);
2168     for (i=0; i<m; i++) {
2169       jend = ii[i+1] - ii[i];
2170       olen = 0;
2171       dlen = 0;
2172       for (j=0; j<jend; j++) {
2173         if (*jj < rstart || *jj >= rend) olen++;
2174         else dlen++;
2175         jj++;
2176       }
2177       olens[i] = olen;
2178       dlens[i] = dlen;
2179     }
2180     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2181     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2182     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2183     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2184     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
2185   } else {
2186     PetscInt ml,nl;
2187 
2188     M    = *newmat;
2189     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2190     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2191     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2192     /*
2193          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2194        rather than the slower MatSetValues().
2195     */
2196     M->was_assembled = PETSC_TRUE;
2197     M->assembled     = PETSC_FALSE;
2198   }
2199   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2200   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2201   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
2202   ii   = aij->i;
2203   jj   = aij->j;
2204   aa   = aij->a;
2205   for (i=0; i<m; i++) {
2206     row   = rstart/bs + i;
2207     nz    = ii[i+1] - ii[i];
2208     cwork = jj;     jj += nz;
2209     vwork = aa;     aa += nz*bs*bs;
2210     ierr  = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2211   }
2212 
2213   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2214   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2215   *newmat = M;
2216 
2217   /* save submatrix used in processor for next request */
2218   if (call ==  MAT_INITIAL_MATRIX) {
2219     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2220     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2221   }
2222   PetscFunctionReturn(0);
2223 }
2224 
2225 #undef __FUNCT__
2226 #define __FUNCT__ "MatPermute_MPIBAIJ"
2227 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2228 {
2229   MPI_Comm       comm,pcomm;
2230   PetscInt       clocal_size,nrows;
2231   const PetscInt *rows;
2232   PetscMPIInt    size;
2233   IS             crowp,lcolp;
2234   PetscErrorCode ierr;
2235 
2236   PetscFunctionBegin;
2237   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2238   /* make a collective version of 'rowp' */
2239   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2240   if (pcomm==comm) {
2241     crowp = rowp;
2242   } else {
2243     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2244     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2245     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2246     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2247   }
2248   ierr = ISSetPermutation(crowp);CHKERRQ(ierr);
2249   /* make a local version of 'colp' */
2250   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2251   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2252   if (size==1) {
2253     lcolp = colp;
2254   } else {
2255     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2256   }
2257   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2258   /* now we just get the submatrix */
2259   ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr);
2260   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2261   /* clean up */
2262   if (pcomm!=comm) {
2263     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2264   }
2265   if (size>1) {
2266     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2267   }
2268   PetscFunctionReturn(0);
2269 }
2270 
2271 #undef __FUNCT__
2272 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2273 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2274 {
2275   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2276   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
2277 
2278   PetscFunctionBegin;
2279   if (nghosts) *nghosts = B->nbs;
2280   if (ghosts) *ghosts = baij->garray;
2281   PetscFunctionReturn(0);
2282 }
2283 
2284 #undef __FUNCT__
2285 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ"
2286 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2287 {
2288   Mat            B;
2289   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2290   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2291   Mat_SeqAIJ     *b;
2292   PetscErrorCode ierr;
2293   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2294   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2295   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2296 
2297   PetscFunctionBegin;
2298   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2299   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2300 
2301   /* ----------------------------------------------------------------
2302      Tell every processor the number of nonzeros per row
2303   */
2304   ierr = PetscMalloc1((A->rmap->N/bs),&lens);CHKERRQ(ierr);
2305   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2306     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2307   }
2308   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2309   ierr      = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr);
2310   displs    = recvcounts + size;
2311   for (i=0; i<size; i++) {
2312     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2313     displs[i]     = A->rmap->range[i]/bs;
2314   }
2315 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2316   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2317 #else
2318   ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2319 #endif
2320   /* ---------------------------------------------------------------
2321      Create the sequential matrix of the same type as the local block diagonal
2322   */
2323   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2324   ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2325   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2326   ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2327   b    = (Mat_SeqAIJ*)B->data;
2328 
2329   /*--------------------------------------------------------------------
2330     Copy my part of matrix column indices over
2331   */
2332   sendcount  = ad->nz + bd->nz;
2333   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2334   a_jsendbuf = ad->j;
2335   b_jsendbuf = bd->j;
2336   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2337   cnt        = 0;
2338   for (i=0; i<n; i++) {
2339 
2340     /* put in lower diagonal portion */
2341     m = bd->i[i+1] - bd->i[i];
2342     while (m > 0) {
2343       /* is it above diagonal (in bd (compressed) numbering) */
2344       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2345       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2346       m--;
2347     }
2348 
2349     /* put in diagonal portion */
2350     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2351       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2352     }
2353 
2354     /* put in upper diagonal portion */
2355     while (m-- > 0) {
2356       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2357     }
2358   }
2359   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2360 
2361   /*--------------------------------------------------------------------
2362     Gather all column indices to all processors
2363   */
2364   for (i=0; i<size; i++) {
2365     recvcounts[i] = 0;
2366     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2367       recvcounts[i] += lens[j];
2368     }
2369   }
2370   displs[0] = 0;
2371   for (i=1; i<size; i++) {
2372     displs[i] = displs[i-1] + recvcounts[i-1];
2373   }
2374 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2375   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2376 #else
2377   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2378 #endif
2379   /*--------------------------------------------------------------------
2380     Assemble the matrix into useable form (note numerical values not yet set)
2381   */
2382   /* set the b->ilen (length of each row) values */
2383   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2384   /* set the b->i indices */
2385   b->i[0] = 0;
2386   for (i=1; i<=A->rmap->N/bs; i++) {
2387     b->i[i] = b->i[i-1] + lens[i-1];
2388   }
2389   ierr = PetscFree(lens);CHKERRQ(ierr);
2390   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2391   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2392   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2393 
2394   if (A->symmetric) {
2395     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2396   } else if (A->hermitian) {
2397     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2398   } else if (A->structurally_symmetric) {
2399     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2400   }
2401   *newmat = B;
2402   PetscFunctionReturn(0);
2403 }
2404 
2405 #undef __FUNCT__
2406 #define __FUNCT__ "MatSOR_MPIBAIJ"
2407 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2408 {
2409   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2410   PetscErrorCode ierr;
2411   Vec            bb1 = 0;
2412 
2413   PetscFunctionBegin;
2414   if (flag == SOR_APPLY_UPPER) {
2415     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2416     PetscFunctionReturn(0);
2417   }
2418 
2419   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2420     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2421   }
2422 
2423   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2424     if (flag & SOR_ZERO_INITIAL_GUESS) {
2425       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2426       its--;
2427     }
2428 
2429     while (its--) {
2430       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2431       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2432 
2433       /* update rhs: bb1 = bb - B*x */
2434       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2435       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2436 
2437       /* local sweep */
2438       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2439     }
2440   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2441     if (flag & SOR_ZERO_INITIAL_GUESS) {
2442       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2443       its--;
2444     }
2445     while (its--) {
2446       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2447       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2448 
2449       /* update rhs: bb1 = bb - B*x */
2450       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2451       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2452 
2453       /* local sweep */
2454       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2455     }
2456   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2457     if (flag & SOR_ZERO_INITIAL_GUESS) {
2458       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2459       its--;
2460     }
2461     while (its--) {
2462       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2463       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2464 
2465       /* update rhs: bb1 = bb - B*x */
2466       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2467       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2468 
2469       /* local sweep */
2470       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2471     }
2472   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2473 
2474   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2475   PetscFunctionReturn(0);
2476 }
2477 
2478 #undef __FUNCT__
2479 #define __FUNCT__ "MatGetColumnNorms_MPIBAIJ"
2480 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms)
2481 {
2482   PetscErrorCode ierr;
2483   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)A->data;
2484   PetscInt       N,i,*garray = aij->garray;
2485   PetscInt       ib,jb,bs = A->rmap->bs;
2486   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) aij->A->data;
2487   MatScalar      *a_val = a_aij->a;
2488   Mat_SeqBAIJ    *b_aij = (Mat_SeqBAIJ*) aij->B->data;
2489   MatScalar      *b_val = b_aij->a;
2490   PetscReal      *work;
2491 
2492   PetscFunctionBegin;
2493   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
2494   ierr = PetscCalloc1(N,&work);CHKERRQ(ierr);
2495   if (type == NORM_2) {
2496     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2497       for (jb=0; jb<bs; jb++) {
2498         for (ib=0; ib<bs; ib++) {
2499           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2500           a_val++;
2501         }
2502       }
2503     }
2504     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2505       for (jb=0; jb<bs; jb++) {
2506         for (ib=0; ib<bs; ib++) {
2507           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2508           b_val++;
2509         }
2510       }
2511     }
2512   } else if (type == NORM_1) {
2513     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2514       for (jb=0; jb<bs; jb++) {
2515         for (ib=0; ib<bs; ib++) {
2516           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2517           a_val++;
2518         }
2519       }
2520     }
2521     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2522       for (jb=0; jb<bs; jb++) {
2523        for (ib=0; ib<bs; ib++) {
2524           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2525           b_val++;
2526         }
2527       }
2528     }
2529   } else if (type == NORM_INFINITY) {
2530     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2531       for (jb=0; jb<bs; jb++) {
2532         for (ib=0; ib<bs; ib++) {
2533           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2534           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2535           a_val++;
2536         }
2537       }
2538     }
2539     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2540       for (jb=0; jb<bs; jb++) {
2541         for (ib=0; ib<bs; ib++) {
2542           int col = garray[b_aij->j[i]] * bs + jb;
2543           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2544           b_val++;
2545         }
2546       }
2547     }
2548   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
2549   if (type == NORM_INFINITY) {
2550     ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2551   } else {
2552     ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2553   }
2554   ierr = PetscFree(work);CHKERRQ(ierr);
2555   if (type == NORM_2) {
2556     for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]);
2557   }
2558   PetscFunctionReturn(0);
2559 }
2560 
2561 #undef __FUNCT__
2562 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ"
2563 PetscErrorCode  MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2564 {
2565   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2566   PetscErrorCode ierr;
2567 
2568   PetscFunctionBegin;
2569   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2570   PetscFunctionReturn(0);
2571 }
2572 
2573 
2574 /* -------------------------------------------------------------------*/
2575 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2576                                        MatGetRow_MPIBAIJ,
2577                                        MatRestoreRow_MPIBAIJ,
2578                                        MatMult_MPIBAIJ,
2579                                 /* 4*/ MatMultAdd_MPIBAIJ,
2580                                        MatMultTranspose_MPIBAIJ,
2581                                        MatMultTransposeAdd_MPIBAIJ,
2582                                        0,
2583                                        0,
2584                                        0,
2585                                 /*10*/ 0,
2586                                        0,
2587                                        0,
2588                                        MatSOR_MPIBAIJ,
2589                                        MatTranspose_MPIBAIJ,
2590                                 /*15*/ MatGetInfo_MPIBAIJ,
2591                                        MatEqual_MPIBAIJ,
2592                                        MatGetDiagonal_MPIBAIJ,
2593                                        MatDiagonalScale_MPIBAIJ,
2594                                        MatNorm_MPIBAIJ,
2595                                 /*20*/ MatAssemblyBegin_MPIBAIJ,
2596                                        MatAssemblyEnd_MPIBAIJ,
2597                                        MatSetOption_MPIBAIJ,
2598                                        MatZeroEntries_MPIBAIJ,
2599                                 /*24*/ MatZeroRows_MPIBAIJ,
2600                                        0,
2601                                        0,
2602                                        0,
2603                                        0,
2604                                 /*29*/ MatSetUp_MPIBAIJ,
2605                                        0,
2606                                        0,
2607                                        0,
2608                                        0,
2609                                 /*34*/ MatDuplicate_MPIBAIJ,
2610                                        0,
2611                                        0,
2612                                        0,
2613                                        0,
2614                                 /*39*/ MatAXPY_MPIBAIJ,
2615                                        MatGetSubMatrices_MPIBAIJ,
2616                                        MatIncreaseOverlap_MPIBAIJ,
2617                                        MatGetValues_MPIBAIJ,
2618                                        MatCopy_MPIBAIJ,
2619                                 /*44*/ 0,
2620                                        MatScale_MPIBAIJ,
2621                                        0,
2622                                        0,
2623                                        MatZeroRowsColumns_MPIBAIJ,
2624                                 /*49*/ 0,
2625                                        0,
2626                                        0,
2627                                        0,
2628                                        0,
2629                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2630                                        0,
2631                                        MatSetUnfactored_MPIBAIJ,
2632                                        MatPermute_MPIBAIJ,
2633                                        MatSetValuesBlocked_MPIBAIJ,
2634                                 /*59*/ MatGetSubMatrix_MPIBAIJ,
2635                                        MatDestroy_MPIBAIJ,
2636                                        MatView_MPIBAIJ,
2637                                        0,
2638                                        0,
2639                                 /*64*/ 0,
2640                                        0,
2641                                        0,
2642                                        0,
2643                                        0,
2644                                 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2645                                        0,
2646                                        0,
2647                                        0,
2648                                        0,
2649                                 /*74*/ 0,
2650                                        MatFDColoringApply_BAIJ,
2651                                        0,
2652                                        0,
2653                                        0,
2654                                 /*79*/ 0,
2655                                        0,
2656                                        0,
2657                                        0,
2658                                        MatLoad_MPIBAIJ,
2659                                 /*84*/ 0,
2660                                        0,
2661                                        0,
2662                                        0,
2663                                        0,
2664                                 /*89*/ 0,
2665                                        0,
2666                                        0,
2667                                        0,
2668                                        0,
2669                                 /*94*/ 0,
2670                                        0,
2671                                        0,
2672                                        0,
2673                                        0,
2674                                 /*99*/ 0,
2675                                        0,
2676                                        0,
2677                                        0,
2678                                        0,
2679                                 /*104*/0,
2680                                        MatRealPart_MPIBAIJ,
2681                                        MatImaginaryPart_MPIBAIJ,
2682                                        0,
2683                                        0,
2684                                 /*109*/0,
2685                                        0,
2686                                        0,
2687                                        0,
2688                                        0,
2689                                 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2690                                        0,
2691                                        MatGetGhosts_MPIBAIJ,
2692                                        0,
2693                                        0,
2694                                 /*119*/0,
2695                                        0,
2696                                        0,
2697                                        0,
2698                                        MatGetMultiProcBlock_MPIBAIJ,
2699                                 /*124*/0,
2700                                        MatGetColumnNorms_MPIBAIJ,
2701                                        MatInvertBlockDiagonal_MPIBAIJ,
2702                                        0,
2703                                        0,
2704                                /*129*/ 0,
2705                                        0,
2706                                        0,
2707                                        0,
2708                                        0,
2709                                /*134*/ 0,
2710                                        0,
2711                                        0,
2712                                        0,
2713                                        0,
2714                                /*139*/ 0,
2715                                        0,
2716                                        0,
2717                                        MatFDColoringSetUp_MPIXAIJ
2718 };
2719 
2720 #undef __FUNCT__
2721 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2722 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2723 {
2724   PetscFunctionBegin;
2725   *a = ((Mat_MPIBAIJ*)A->data)->A;
2726   PetscFunctionReturn(0);
2727 }
2728 
2729 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2730 
2731 #undef __FUNCT__
2732 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2733 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2734 {
2735   PetscInt       m,rstart,cstart,cend;
2736   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2737   const PetscInt *JJ    =0;
2738   PetscScalar    *values=0;
2739   PetscBool      roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2740   PetscErrorCode ierr;
2741 
2742   PetscFunctionBegin;
2743   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2744   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2745   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2746   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2747   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2748   m      = B->rmap->n/bs;
2749   rstart = B->rmap->rstart/bs;
2750   cstart = B->cmap->rstart/bs;
2751   cend   = B->cmap->rend/bs;
2752 
2753   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2754   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2755   for (i=0; i<m; i++) {
2756     nz = ii[i+1] - ii[i];
2757     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2758     nz_max = PetscMax(nz_max,nz);
2759     JJ     = jj + ii[i];
2760     for (j=0; j<nz; j++) {
2761       if (*JJ >= cstart) break;
2762       JJ++;
2763     }
2764     d = 0;
2765     for (; j<nz; j++) {
2766       if (*JJ++ >= cend) break;
2767       d++;
2768     }
2769     d_nnz[i] = d;
2770     o_nnz[i] = nz - d;
2771   }
2772   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2773   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2774 
2775   values = (PetscScalar*)V;
2776   if (!values) {
2777     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2778     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2779   }
2780   for (i=0; i<m; i++) {
2781     PetscInt          row    = i + rstart;
2782     PetscInt          ncols  = ii[i+1] - ii[i];
2783     const PetscInt    *icols = jj + ii[i];
2784     if (!roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2785       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2786       ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2787     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2788       PetscInt j;
2789       for (j=0; j<ncols; j++) {
2790         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2791         ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2792       }
2793     }
2794   }
2795 
2796   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2797   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2798   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2799   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2800   PetscFunctionReturn(0);
2801 }
2802 
2803 #undef __FUNCT__
2804 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2805 /*@C
2806    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2807    (the default parallel PETSc format).
2808 
2809    Collective on MPI_Comm
2810 
2811    Input Parameters:
2812 +  B - the matrix
2813 .  bs - the block size
2814 .  i - the indices into j for the start of each local row (starts with zero)
2815 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2816 -  v - optional values in the matrix
2817 
2818    Level: developer
2819 
2820    Notes: The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
2821    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
2822    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2823    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2824    block column and the second index is over columns within a block.
2825 
2826 .keywords: matrix, aij, compressed row, sparse, parallel
2827 
2828 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ
2829 @*/
2830 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2831 {
2832   PetscErrorCode ierr;
2833 
2834   PetscFunctionBegin;
2835   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2836   PetscValidType(B,1);
2837   PetscValidLogicalCollectiveInt(B,bs,2);
2838   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2839   PetscFunctionReturn(0);
2840 }
2841 
2842 #undef __FUNCT__
2843 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2844 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2845 {
2846   Mat_MPIBAIJ    *b;
2847   PetscErrorCode ierr;
2848   PetscInt       i;
2849 
2850   PetscFunctionBegin;
2851   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2852   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2853   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2854   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2855 
2856   if (d_nnz) {
2857     for (i=0; i<B->rmap->n/bs; i++) {
2858       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2859     }
2860   }
2861   if (o_nnz) {
2862     for (i=0; i<B->rmap->n/bs; i++) {
2863       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2864     }
2865   }
2866 
2867   b      = (Mat_MPIBAIJ*)B->data;
2868   b->bs2 = bs*bs;
2869   b->mbs = B->rmap->n/bs;
2870   b->nbs = B->cmap->n/bs;
2871   b->Mbs = B->rmap->N/bs;
2872   b->Nbs = B->cmap->N/bs;
2873 
2874   for (i=0; i<=b->size; i++) {
2875     b->rangebs[i] = B->rmap->range[i]/bs;
2876   }
2877   b->rstartbs = B->rmap->rstart/bs;
2878   b->rendbs   = B->rmap->rend/bs;
2879   b->cstartbs = B->cmap->rstart/bs;
2880   b->cendbs   = B->cmap->rend/bs;
2881 
2882   if (!B->preallocated) {
2883     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2884     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2885     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2886     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2887     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2888     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2889     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2890     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2891     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2892   }
2893 
2894   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2895   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2896   B->preallocated = PETSC_TRUE;
2897   PetscFunctionReturn(0);
2898 }
2899 
2900 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2901 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2902 
2903 #undef __FUNCT__
2904 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
2905 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
2906 {
2907   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2908   PetscErrorCode ierr;
2909   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2910   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2911   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2912 
2913   PetscFunctionBegin;
2914   ierr  = PetscMalloc1((M+1),&ii);CHKERRQ(ierr);
2915   ii[0] = 0;
2916   for (i=0; i<M; i++) {
2917     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2918     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
2919     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2920     /* remove one from count of matrix has diagonal */
2921     for (j=id[i]; j<id[i+1]; j++) {
2922       if (jd[j] == i) {ii[i+1]--;break;}
2923     }
2924   }
2925   ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr);
2926   cnt  = 0;
2927   for (i=0; i<M; i++) {
2928     for (j=io[i]; j<io[i+1]; j++) {
2929       if (garray[jo[j]] > rstart) break;
2930       jj[cnt++] = garray[jo[j]];
2931     }
2932     for (k=id[i]; k<id[i+1]; k++) {
2933       if (jd[k] != i) {
2934         jj[cnt++] = rstart + jd[k];
2935       }
2936     }
2937     for (; j<io[i+1]; j++) {
2938       jj[cnt++] = garray[jo[j]];
2939     }
2940   }
2941   ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr);
2942   PetscFunctionReturn(0);
2943 }
2944 
2945 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2946 
2947 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
2948 
2949 #undef __FUNCT__
2950 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ"
2951 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
2952 {
2953   PetscErrorCode ierr;
2954   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2955   Mat            B;
2956   Mat_MPIAIJ     *b;
2957 
2958   PetscFunctionBegin;
2959   if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
2960 
2961   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2962   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2963   ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2964   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
2965   ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
2966   b    = (Mat_MPIAIJ*) B->data;
2967 
2968   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
2969   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2970   ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
2971   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
2972   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
2973   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2974   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2975   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2976   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2977   if (reuse == MAT_REUSE_MATRIX) {
2978     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
2979   } else {
2980    *newmat = B;
2981   }
2982   PetscFunctionReturn(0);
2983 }
2984 
2985 #if defined(PETSC_HAVE_MUMPS)
2986 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
2987 #endif
2988 
2989 /*MC
2990    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2991 
2992    Options Database Keys:
2993 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2994 . -mat_block_size <bs> - set the blocksize used to store the matrix
2995 - -mat_use_hash_table <fact>
2996 
2997   Level: beginner
2998 
2999 .seealso: MatCreateMPIBAIJ
3000 M*/
3001 
3002 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
3003 
3004 #undef __FUNCT__
3005 #define __FUNCT__ "MatCreate_MPIBAIJ"
3006 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
3007 {
3008   Mat_MPIBAIJ    *b;
3009   PetscErrorCode ierr;
3010   PetscBool      flg;
3011 
3012   PetscFunctionBegin;
3013   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3014   B->data = (void*)b;
3015 
3016   ierr         = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3017   B->assembled = PETSC_FALSE;
3018 
3019   B->insertmode = NOT_SET_VALUES;
3020   ierr          = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
3021   ierr          = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
3022 
3023   /* build local table of row and column ownerships */
3024   ierr = PetscMalloc1((b->size+1),&b->rangebs);CHKERRQ(ierr);
3025 
3026   /* build cache for off array entries formed */
3027   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
3028 
3029   b->donotstash  = PETSC_FALSE;
3030   b->colmap      = NULL;
3031   b->garray      = NULL;
3032   b->roworiented = PETSC_TRUE;
3033 
3034   /* stuff used in block assembly */
3035   b->barray = 0;
3036 
3037   /* stuff used for matrix vector multiply */
3038   b->lvec  = 0;
3039   b->Mvctx = 0;
3040 
3041   /* stuff for MatGetRow() */
3042   b->rowindices   = 0;
3043   b->rowvalues    = 0;
3044   b->getrowactive = PETSC_FALSE;
3045 
3046   /* hash table stuff */
3047   b->ht           = 0;
3048   b->hd           = 0;
3049   b->ht_size      = 0;
3050   b->ht_flag      = PETSC_FALSE;
3051   b->ht_fact      = 0;
3052   b->ht_total_ct  = 0;
3053   b->ht_insert_ct = 0;
3054 
3055   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
3056   b->ijonly = PETSC_FALSE;
3057 
3058   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3059   ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr);
3060   if (flg) {
3061     PetscReal fact = 1.39;
3062     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3063     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
3064     if (fact <= 1.0) fact = 1.39;
3065     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3066     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3067   }
3068   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3069 
3070 #if defined(PETSC_HAVE_MUMPS)
3071   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_baij_mumps);CHKERRQ(ierr);
3072 #endif
3073   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3074   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3075   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3076   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3077   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3078   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3079   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3080   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3081   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3082   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3083   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr);
3084   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3085   PetscFunctionReturn(0);
3086 }
3087 
3088 /*MC
3089    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3090 
3091    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3092    and MATMPIBAIJ otherwise.
3093 
3094    Options Database Keys:
3095 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3096 
3097   Level: beginner
3098 
3099 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3100 M*/
3101 
3102 #undef __FUNCT__
3103 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3104 /*@C
3105    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3106    (block compressed row).  For good matrix assembly performance
3107    the user should preallocate the matrix storage by setting the parameters
3108    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3109    performance can be increased by more than a factor of 50.
3110 
3111    Collective on Mat
3112 
3113    Input Parameters:
3114 +  B - the matrix
3115 .  bs   - size of block
3116 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3117            submatrix  (same for all local rows)
3118 .  d_nnz - array containing the number of block nonzeros in the various block rows
3119            of the in diagonal portion of the local (possibly different for each block
3120            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3121            set it even if it is zero.
3122 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3123            submatrix (same for all local rows).
3124 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3125            off-diagonal portion of the local submatrix (possibly different for
3126            each block row) or NULL.
3127 
3128    If the *_nnz parameter is given then the *_nz parameter is ignored
3129 
3130    Options Database Keys:
3131 +   -mat_block_size - size of the blocks to use
3132 -   -mat_use_hash_table <fact>
3133 
3134    Notes:
3135    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3136    than it must be used on all processors that share the object for that argument.
3137 
3138    Storage Information:
3139    For a square global matrix we define each processor's diagonal portion
3140    to be its local rows and the corresponding columns (a square submatrix);
3141    each processor's off-diagonal portion encompasses the remainder of the
3142    local matrix (a rectangular submatrix).
3143 
3144    The user can specify preallocated storage for the diagonal part of
3145    the local submatrix with either d_nz or d_nnz (not both).  Set
3146    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3147    memory allocation.  Likewise, specify preallocated storage for the
3148    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3149 
3150    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3151    the figure below we depict these three local rows and all columns (0-11).
3152 
3153 .vb
3154            0 1 2 3 4 5 6 7 8 9 10 11
3155           --------------------------
3156    row 3  |o o o d d d o o o o  o  o
3157    row 4  |o o o d d d o o o o  o  o
3158    row 5  |o o o d d d o o o o  o  o
3159           --------------------------
3160 .ve
3161 
3162    Thus, any entries in the d locations are stored in the d (diagonal)
3163    submatrix, and any entries in the o locations are stored in the
3164    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3165    stored simply in the MATSEQBAIJ format for compressed row storage.
3166 
3167    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3168    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3169    In general, for PDE problems in which most nonzeros are near the diagonal,
3170    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3171    or you will get TERRIBLE performance; see the users' manual chapter on
3172    matrices.
3173 
3174    You can call MatGetInfo() to get information on how effective the preallocation was;
3175    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3176    You can also run with the option -info and look for messages with the string
3177    malloc in them to see if additional memory allocation was needed.
3178 
3179    Level: intermediate
3180 
3181 .keywords: matrix, block, aij, compressed row, sparse, parallel
3182 
3183 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3184 @*/
3185 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3186 {
3187   PetscErrorCode ierr;
3188 
3189   PetscFunctionBegin;
3190   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3191   PetscValidType(B,1);
3192   PetscValidLogicalCollectiveInt(B,bs,2);
3193   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3194   PetscFunctionReturn(0);
3195 }
3196 
3197 #undef __FUNCT__
3198 #define __FUNCT__ "MatCreateBAIJ"
3199 /*@C
3200    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3201    (block compressed row).  For good matrix assembly performance
3202    the user should preallocate the matrix storage by setting the parameters
3203    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3204    performance can be increased by more than a factor of 50.
3205 
3206    Collective on MPI_Comm
3207 
3208    Input Parameters:
3209 +  comm - MPI communicator
3210 .  bs   - size of blockk
3211 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3212            This value should be the same as the local size used in creating the
3213            y vector for the matrix-vector product y = Ax.
3214 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3215            This value should be the same as the local size used in creating the
3216            x vector for the matrix-vector product y = Ax.
3217 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3218 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3219 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3220            submatrix  (same for all local rows)
3221 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3222            of the in diagonal portion of the local (possibly different for each block
3223            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3224            and set it even if it is zero.
3225 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3226            submatrix (same for all local rows).
3227 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3228            off-diagonal portion of the local submatrix (possibly different for
3229            each block row) or NULL.
3230 
3231    Output Parameter:
3232 .  A - the matrix
3233 
3234    Options Database Keys:
3235 +   -mat_block_size - size of the blocks to use
3236 -   -mat_use_hash_table <fact>
3237 
3238    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3239    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3240    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3241 
3242    Notes:
3243    If the *_nnz parameter is given then the *_nz parameter is ignored
3244 
3245    A nonzero block is any block that as 1 or more nonzeros in it
3246 
3247    The user MUST specify either the local or global matrix dimensions
3248    (possibly both).
3249 
3250    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3251    than it must be used on all processors that share the object for that argument.
3252 
3253    Storage Information:
3254    For a square global matrix we define each processor's diagonal portion
3255    to be its local rows and the corresponding columns (a square submatrix);
3256    each processor's off-diagonal portion encompasses the remainder of the
3257    local matrix (a rectangular submatrix).
3258 
3259    The user can specify preallocated storage for the diagonal part of
3260    the local submatrix with either d_nz or d_nnz (not both).  Set
3261    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3262    memory allocation.  Likewise, specify preallocated storage for the
3263    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3264 
3265    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3266    the figure below we depict these three local rows and all columns (0-11).
3267 
3268 .vb
3269            0 1 2 3 4 5 6 7 8 9 10 11
3270           --------------------------
3271    row 3  |o o o d d d o o o o  o  o
3272    row 4  |o o o d d d o o o o  o  o
3273    row 5  |o o o d d d o o o o  o  o
3274           --------------------------
3275 .ve
3276 
3277    Thus, any entries in the d locations are stored in the d (diagonal)
3278    submatrix, and any entries in the o locations are stored in the
3279    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3280    stored simply in the MATSEQBAIJ format for compressed row storage.
3281 
3282    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3283    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3284    In general, for PDE problems in which most nonzeros are near the diagonal,
3285    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3286    or you will get TERRIBLE performance; see the users' manual chapter on
3287    matrices.
3288 
3289    Level: intermediate
3290 
3291 .keywords: matrix, block, aij, compressed row, sparse, parallel
3292 
3293 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3294 @*/
3295 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3296 {
3297   PetscErrorCode ierr;
3298   PetscMPIInt    size;
3299 
3300   PetscFunctionBegin;
3301   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3302   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3303   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3304   if (size > 1) {
3305     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3306     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3307   } else {
3308     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3309     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3310   }
3311   PetscFunctionReturn(0);
3312 }
3313 
3314 #undef __FUNCT__
3315 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3316 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3317 {
3318   Mat            mat;
3319   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3320   PetscErrorCode ierr;
3321   PetscInt       len=0;
3322 
3323   PetscFunctionBegin;
3324   *newmat = 0;
3325   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3326   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3327   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3328   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3329 
3330   mat->factortype   = matin->factortype;
3331   mat->preallocated = PETSC_TRUE;
3332   mat->assembled    = PETSC_TRUE;
3333   mat->insertmode   = NOT_SET_VALUES;
3334 
3335   a             = (Mat_MPIBAIJ*)mat->data;
3336   mat->rmap->bs = matin->rmap->bs;
3337   a->bs2        = oldmat->bs2;
3338   a->mbs        = oldmat->mbs;
3339   a->nbs        = oldmat->nbs;
3340   a->Mbs        = oldmat->Mbs;
3341   a->Nbs        = oldmat->Nbs;
3342 
3343   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3344   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3345 
3346   a->size         = oldmat->size;
3347   a->rank         = oldmat->rank;
3348   a->donotstash   = oldmat->donotstash;
3349   a->roworiented  = oldmat->roworiented;
3350   a->rowindices   = 0;
3351   a->rowvalues    = 0;
3352   a->getrowactive = PETSC_FALSE;
3353   a->barray       = 0;
3354   a->rstartbs     = oldmat->rstartbs;
3355   a->rendbs       = oldmat->rendbs;
3356   a->cstartbs     = oldmat->cstartbs;
3357   a->cendbs       = oldmat->cendbs;
3358 
3359   /* hash table stuff */
3360   a->ht           = 0;
3361   a->hd           = 0;
3362   a->ht_size      = 0;
3363   a->ht_flag      = oldmat->ht_flag;
3364   a->ht_fact      = oldmat->ht_fact;
3365   a->ht_total_ct  = 0;
3366   a->ht_insert_ct = 0;
3367 
3368   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3369   if (oldmat->colmap) {
3370 #if defined(PETSC_USE_CTABLE)
3371     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3372 #else
3373     ierr = PetscMalloc1((a->Nbs),&a->colmap);CHKERRQ(ierr);
3374     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3375     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3376 #endif
3377   } else a->colmap = 0;
3378 
3379   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3380     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
3381     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3382     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3383   } else a->garray = 0;
3384 
3385   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3386   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3387   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3388   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3389   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3390 
3391   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3392   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3393   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3394   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3395   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3396   *newmat = mat;
3397   PetscFunctionReturn(0);
3398 }
3399 
3400 #undef __FUNCT__
3401 #define __FUNCT__ "MatLoad_MPIBAIJ"
3402 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3403 {
3404   PetscErrorCode ierr;
3405   int            fd;
3406   PetscInt       i,nz,j,rstart,rend;
3407   PetscScalar    *vals,*buf;
3408   MPI_Comm       comm;
3409   MPI_Status     status;
3410   PetscMPIInt    rank,size,maxnz;
3411   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3412   PetscInt       *locrowlens = NULL,*procsnz = NULL,*browners = NULL;
3413   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3414   PetscMPIInt    tag    = ((PetscObject)viewer)->tag;
3415   PetscInt       *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount;
3416   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3417 
3418   PetscFunctionBegin;
3419   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3420   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3421   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3422   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3423 
3424   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3425   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3426   if (!rank) {
3427     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3428     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3429     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3430   }
3431 
3432   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3433 
3434   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3435   M    = header[1]; N = header[2];
3436 
3437   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3438   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3439   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3440 
3441   /* If global sizes are set, check if they are consistent with that given in the file */
3442   if (sizesset) {
3443     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3444   }
3445   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3446   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3447 
3448   if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices");
3449 
3450   /*
3451      This code adds extra rows to make sure the number of rows is
3452      divisible by the blocksize
3453   */
3454   Mbs        = M/bs;
3455   extra_rows = bs - M + bs*Mbs;
3456   if (extra_rows == bs) extra_rows = 0;
3457   else                  Mbs++;
3458   if (extra_rows && !rank) {
3459     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3460   }
3461 
3462   /* determine ownership of all rows */
3463   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3464     mbs = Mbs/size + ((Mbs % size) > rank);
3465     m   = mbs*bs;
3466   } else { /* User set */
3467     m   = newmat->rmap->n;
3468     mbs = m/bs;
3469   }
3470   ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
3471   ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3472 
3473   /* process 0 needs enough room for process with most rows */
3474   if (!rank) {
3475     mmax = rowners[1];
3476     for (i=2; i<=size; i++) {
3477       mmax = PetscMax(mmax,rowners[i]);
3478     }
3479     mmax*=bs;
3480   } else mmax = -1;             /* unused, but compiler warns anyway */
3481 
3482   rowners[0] = 0;
3483   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
3484   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
3485   rstart = rowners[rank];
3486   rend   = rowners[rank+1];
3487 
3488   /* distribute row lengths to all processors */
3489   ierr = PetscMalloc1(m,&locrowlens);CHKERRQ(ierr);
3490   if (!rank) {
3491     mend = m;
3492     if (size == 1) mend = mend - extra_rows;
3493     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3494     for (j=mend; j<m; j++) locrowlens[j] = 1;
3495     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3496     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3497     for (j=0; j<m; j++) {
3498       procsnz[0] += locrowlens[j];
3499     }
3500     for (i=1; i<size; i++) {
3501       mend = browners[i+1] - browners[i];
3502       if (i == size-1) mend = mend - extra_rows;
3503       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3504       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3505       /* calculate the number of nonzeros on each processor */
3506       for (j=0; j<browners[i+1]-browners[i]; j++) {
3507         procsnz[i] += rowlengths[j];
3508       }
3509       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3510     }
3511     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3512   } else {
3513     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3514   }
3515 
3516   if (!rank) {
3517     /* determine max buffer needed and allocate it */
3518     maxnz = procsnz[0];
3519     for (i=1; i<size; i++) {
3520       maxnz = PetscMax(maxnz,procsnz[i]);
3521     }
3522     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3523 
3524     /* read in my part of the matrix column indices  */
3525     nz     = procsnz[0];
3526     ierr   = PetscMalloc1((nz+1),&ibuf);CHKERRQ(ierr);
3527     mycols = ibuf;
3528     if (size == 1) nz -= extra_rows;
3529     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3530     if (size == 1) {
3531       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
3532     }
3533 
3534     /* read in every ones (except the last) and ship off */
3535     for (i=1; i<size-1; i++) {
3536       nz   = procsnz[i];
3537       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3538       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3539     }
3540     /* read in the stuff for the last proc */
3541     if (size != 1) {
3542       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3543       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3544       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3545       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3546     }
3547     ierr = PetscFree(cols);CHKERRQ(ierr);
3548   } else {
3549     /* determine buffer space needed for message */
3550     nz = 0;
3551     for (i=0; i<m; i++) {
3552       nz += locrowlens[i];
3553     }
3554     ierr   = PetscMalloc1((nz+1),&ibuf);CHKERRQ(ierr);
3555     mycols = ibuf;
3556     /* receive message of column indices*/
3557     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3558     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3559     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3560   }
3561 
3562   /* loop over local rows, determining number of off diagonal entries */
3563   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
3564   ierr     = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
3565   rowcount = 0; nzcount = 0;
3566   for (i=0; i<mbs; i++) {
3567     dcount  = 0;
3568     odcount = 0;
3569     for (j=0; j<bs; j++) {
3570       kmax = locrowlens[rowcount];
3571       for (k=0; k<kmax; k++) {
3572         tmp = mycols[nzcount++]/bs;
3573         if (!mask[tmp]) {
3574           mask[tmp] = 1;
3575           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3576           else masked1[dcount++] = tmp;
3577         }
3578       }
3579       rowcount++;
3580     }
3581 
3582     dlens[i]  = dcount;
3583     odlens[i] = odcount;
3584 
3585     /* zero out the mask elements we set */
3586     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3587     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3588   }
3589 
3590 
3591   if (!sizesset) {
3592     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3593   }
3594   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3595 
3596   if (!rank) {
3597     ierr = PetscMalloc1((maxnz+1),&buf);CHKERRQ(ierr);
3598     /* read in my part of the matrix numerical values  */
3599     nz     = procsnz[0];
3600     vals   = buf;
3601     mycols = ibuf;
3602     if (size == 1) nz -= extra_rows;
3603     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3604     if (size == 1) {
3605       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
3606     }
3607 
3608     /* insert into matrix */
3609     jj = rstart*bs;
3610     for (i=0; i<m; i++) {
3611       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3612       mycols += locrowlens[i];
3613       vals   += locrowlens[i];
3614       jj++;
3615     }
3616     /* read in other processors (except the last one) and ship out */
3617     for (i=1; i<size-1; i++) {
3618       nz   = procsnz[i];
3619       vals = buf;
3620       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3621       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3622     }
3623     /* the last proc */
3624     if (size != 1) {
3625       nz   = procsnz[i] - extra_rows;
3626       vals = buf;
3627       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3628       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3629       ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3630     }
3631     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3632   } else {
3633     /* receive numeric values */
3634     ierr = PetscMalloc1((nz+1),&buf);CHKERRQ(ierr);
3635 
3636     /* receive message of values*/
3637     vals   = buf;
3638     mycols = ibuf;
3639     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3640 
3641     /* insert into matrix */
3642     jj = rstart*bs;
3643     for (i=0; i<m; i++) {
3644       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3645       mycols += locrowlens[i];
3646       vals   += locrowlens[i];
3647       jj++;
3648     }
3649   }
3650   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3651   ierr = PetscFree(buf);CHKERRQ(ierr);
3652   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3653   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3654   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3655   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3656   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3657   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3658   PetscFunctionReturn(0);
3659 }
3660 
3661 #undef __FUNCT__
3662 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3663 /*@
3664    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3665 
3666    Input Parameters:
3667 .  mat  - the matrix
3668 .  fact - factor
3669 
3670    Not Collective, each process can use a different factor
3671 
3672    Level: advanced
3673 
3674   Notes:
3675    This can also be set by the command line option: -mat_use_hash_table <fact>
3676 
3677 .keywords: matrix, hashtable, factor, HT
3678 
3679 .seealso: MatSetOption()
3680 @*/
3681 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3682 {
3683   PetscErrorCode ierr;
3684 
3685   PetscFunctionBegin;
3686   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3687   PetscFunctionReturn(0);
3688 }
3689 
3690 #undef __FUNCT__
3691 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3692 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3693 {
3694   Mat_MPIBAIJ *baij;
3695 
3696   PetscFunctionBegin;
3697   baij          = (Mat_MPIBAIJ*)mat->data;
3698   baij->ht_fact = fact;
3699   PetscFunctionReturn(0);
3700 }
3701 
3702 #undef __FUNCT__
3703 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3704 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3705 {
3706   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
3707 
3708   PetscFunctionBegin;
3709   if (Ad)     *Ad     = a->A;
3710   if (Ao)     *Ao     = a->B;
3711   if (colmap) *colmap = a->garray;
3712   PetscFunctionReturn(0);
3713 }
3714 
3715 /*
3716     Special version for direct calls from Fortran (to eliminate two function call overheads
3717 */
3718 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3719 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3720 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3721 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3722 #endif
3723 
3724 #undef __FUNCT__
3725 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3726 /*@C
3727   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3728 
3729   Collective on Mat
3730 
3731   Input Parameters:
3732 + mat - the matrix
3733 . min - number of input rows
3734 . im - input rows
3735 . nin - number of input columns
3736 . in - input columns
3737 . v - numerical values input
3738 - addvin - INSERT_VALUES or ADD_VALUES
3739 
3740   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3741 
3742   Level: advanced
3743 
3744 .seealso:   MatSetValuesBlocked()
3745 @*/
3746 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3747 {
3748   /* convert input arguments to C version */
3749   Mat        mat  = *matin;
3750   PetscInt   m    = *min, n = *nin;
3751   InsertMode addv = *addvin;
3752 
3753   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3754   const MatScalar *value;
3755   MatScalar       *barray     = baij->barray;
3756   PetscBool       roworiented = baij->roworiented;
3757   PetscErrorCode  ierr;
3758   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3759   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3760   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3761 
3762   PetscFunctionBegin;
3763   /* tasks normally handled by MatSetValuesBlocked() */
3764   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3765 #if defined(PETSC_USE_DEBUG)
3766   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3767   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3768 #endif
3769   if (mat->assembled) {
3770     mat->was_assembled = PETSC_TRUE;
3771     mat->assembled     = PETSC_FALSE;
3772   }
3773   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3774 
3775 
3776   if (!barray) {
3777     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
3778     baij->barray = barray;
3779   }
3780 
3781   if (roworiented) stepval = (n-1)*bs;
3782   else stepval = (m-1)*bs;
3783 
3784   for (i=0; i<m; i++) {
3785     if (im[i] < 0) continue;
3786 #if defined(PETSC_USE_DEBUG)
3787     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3788 #endif
3789     if (im[i] >= rstart && im[i] < rend) {
3790       row = im[i] - rstart;
3791       for (j=0; j<n; j++) {
3792         /* If NumCol = 1 then a copy is not required */
3793         if ((roworiented) && (n == 1)) {
3794           barray = (MatScalar*)v + i*bs2;
3795         } else if ((!roworiented) && (m == 1)) {
3796           barray = (MatScalar*)v + j*bs2;
3797         } else { /* Here a copy is required */
3798           if (roworiented) {
3799             value = v + i*(stepval+bs)*bs + j*bs;
3800           } else {
3801             value = v + j*(stepval+bs)*bs + i*bs;
3802           }
3803           for (ii=0; ii<bs; ii++,value+=stepval) {
3804             for (jj=0; jj<bs; jj++) {
3805               *barray++ = *value++;
3806             }
3807           }
3808           barray -=bs2;
3809         }
3810 
3811         if (in[j] >= cstart && in[j] < cend) {
3812           col  = in[j] - cstart;
3813           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3814         } else if (in[j] < 0) continue;
3815 #if defined(PETSC_USE_DEBUG)
3816         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3817 #endif
3818         else {
3819           if (mat->was_assembled) {
3820             if (!baij->colmap) {
3821               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3822             }
3823 
3824 #if defined(PETSC_USE_DEBUG)
3825 #if defined(PETSC_USE_CTABLE)
3826             { PetscInt data;
3827               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3828               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3829             }
3830 #else
3831             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3832 #endif
3833 #endif
3834 #if defined(PETSC_USE_CTABLE)
3835             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3836             col  = (col - 1)/bs;
3837 #else
3838             col = (baij->colmap[in[j]] - 1)/bs;
3839 #endif
3840             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3841               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3842               col  =  in[j];
3843             }
3844           } else col = in[j];
3845           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3846         }
3847       }
3848     } else {
3849       if (!baij->donotstash) {
3850         if (roworiented) {
3851           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3852         } else {
3853           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3854         }
3855       }
3856     }
3857   }
3858 
3859   /* task normally handled by MatSetValuesBlocked() */
3860   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3861   PetscFunctionReturn(0);
3862 }
3863 
3864 #undef __FUNCT__
3865 #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
3866 /*@
3867      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
3868          CSR format the local rows.
3869 
3870    Collective on MPI_Comm
3871 
3872    Input Parameters:
3873 +  comm - MPI communicator
3874 .  bs - the block size, only a block size of 1 is supported
3875 .  m - number of local rows (Cannot be PETSC_DECIDE)
3876 .  n - This value should be the same as the local size used in creating the
3877        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3878        calculated if N is given) For square matrices n is almost always m.
3879 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3880 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3881 .   i - row indices
3882 .   j - column indices
3883 -   a - matrix values
3884 
3885    Output Parameter:
3886 .   mat - the matrix
3887 
3888    Level: intermediate
3889 
3890    Notes:
3891        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3892      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3893      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3894 
3895      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3896      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3897      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3898      with column-major ordering within blocks.
3899 
3900        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3901 
3902 .keywords: matrix, aij, compressed row, sparse, parallel
3903 
3904 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3905           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3906 @*/
3907 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3908 {
3909   PetscErrorCode ierr;
3910 
3911   PetscFunctionBegin;
3912   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3913   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3914   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3915   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3916   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
3917   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3918   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3919   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3920   PetscFunctionReturn(0);
3921 }
3922