xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 6e1abc8b04b7dcce9b938acdb9f0f2ecc419474f)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 #include <petscblaslapack.h>
4 
5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat);
7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec);
13 
14 #undef __FUNCT__
15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
17 {
18   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
19   PetscErrorCode ierr;
20   PetscInt       i,*idxb = 0;
21   PetscScalar    *va,*vb;
22   Vec            vtmp;
23 
24   PetscFunctionBegin;
25   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
26   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
27   if (idx) {
28     for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;}
29   }
30 
31   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
32   if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
33   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
34   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
35 
36   for (i=0; i<A->rmap->n; i++){
37     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);}
38   }
39 
40   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
41   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
42   ierr = PetscFree(idxb);CHKERRQ(ierr);
43   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
44   PetscFunctionReturn(0);
45 }
46 
47 EXTERN_C_BEGIN
48 #undef __FUNCT__
49 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
50 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
51 {
52   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
53   PetscErrorCode ierr;
54 
55   PetscFunctionBegin;
56   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
57   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
58   PetscFunctionReturn(0);
59 }
60 EXTERN_C_END
61 
62 EXTERN_C_BEGIN
63 #undef __FUNCT__
64 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
65 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
66 {
67   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
68   PetscErrorCode ierr;
69 
70   PetscFunctionBegin;
71   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
72   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
73   PetscFunctionReturn(0);
74 }
75 EXTERN_C_END
76 
77 /*
78      Local utility routine that creates a mapping from the global column
79    number to the local number in the off-diagonal part of the local
80    storage of the matrix.  This is done in a non scalable way since the
81    length of colmap equals the global matrix length.
82 */
83 #undef __FUNCT__
84 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private"
85 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
86 {
87   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
88   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
89   PetscErrorCode ierr;
90   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
91 
92   PetscFunctionBegin;
93 #if defined (PETSC_USE_CTABLE)
94   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
95   for (i=0; i<nbs; i++){
96     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
97   }
98 #else
99   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
100   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
101   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
102   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
103 #endif
104   PetscFunctionReturn(0);
105 }
106 
107 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
108 { \
109  \
110     brow = row/bs;  \
111     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
112     rmax = aimax[brow]; nrow = ailen[brow]; \
113       bcol = col/bs; \
114       ridx = row % bs; cidx = col % bs; \
115       low = 0; high = nrow; \
116       while (high-low > 3) { \
117         t = (low+high)/2; \
118         if (rp[t] > bcol) high = t; \
119         else              low  = t; \
120       } \
121       for (_i=low; _i<high; _i++) { \
122         if (rp[_i] > bcol) break; \
123         if (rp[_i] == bcol) { \
124           bap  = ap +  bs2*_i + bs*cidx + ridx; \
125           if (addv == ADD_VALUES) *bap += value;  \
126           else                    *bap  = value;  \
127           goto a_noinsert; \
128         } \
129       } \
130       if (a->nonew == 1) goto a_noinsert; \
131       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
132       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
133       N = nrow++ - 1;  \
134       /* shift up all the later entries in this row */ \
135       for (ii=N; ii>=_i; ii--) { \
136         rp[ii+1] = rp[ii]; \
137         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
138       } \
139       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
140       rp[_i]                      = bcol;  \
141       ap[bs2*_i + bs*cidx + ridx] = value;  \
142       a_noinsert:; \
143     ailen[brow] = nrow; \
144 }
145 
146 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
147 { \
148     brow = row/bs;  \
149     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
150     rmax = bimax[brow]; nrow = bilen[brow]; \
151       bcol = col/bs; \
152       ridx = row % bs; cidx = col % bs; \
153       low = 0; high = nrow; \
154       while (high-low > 3) { \
155         t = (low+high)/2; \
156         if (rp[t] > bcol) high = t; \
157         else              low  = t; \
158       } \
159       for (_i=low; _i<high; _i++) { \
160         if (rp[_i] > bcol) break; \
161         if (rp[_i] == bcol) { \
162           bap  = ap +  bs2*_i + bs*cidx + ridx; \
163           if (addv == ADD_VALUES) *bap += value;  \
164           else                    *bap  = value;  \
165           goto b_noinsert; \
166         } \
167       } \
168       if (b->nonew == 1) goto b_noinsert; \
169       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
170       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
171       CHKMEMQ;\
172       N = nrow++ - 1;  \
173       /* shift up all the later entries in this row */ \
174       for (ii=N; ii>=_i; ii--) { \
175         rp[ii+1] = rp[ii]; \
176         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
177       } \
178       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
179       rp[_i]                      = bcol;  \
180       ap[bs2*_i + bs*cidx + ridx] = value;  \
181       b_noinsert:; \
182     bilen[brow] = nrow; \
183 }
184 
185 #undef __FUNCT__
186 #define __FUNCT__ "MatSetValues_MPIBAIJ"
187 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
188 {
189   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
190   MatScalar      value;
191   PetscBool      roworiented = baij->roworiented;
192   PetscErrorCode ierr;
193   PetscInt       i,j,row,col;
194   PetscInt       rstart_orig=mat->rmap->rstart;
195   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
196   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
197 
198   /* Some Variables required in the macro */
199   Mat            A = baij->A;
200   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
201   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
202   MatScalar      *aa=a->a;
203 
204   Mat            B = baij->B;
205   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
206   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
207   MatScalar      *ba=b->a;
208 
209   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
210   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
211   MatScalar      *ap,*bap;
212 
213   PetscFunctionBegin;
214   if (v) PetscValidScalarPointer(v,6);
215   for (i=0; i<m; i++) {
216     if (im[i] < 0) continue;
217 #if defined(PETSC_USE_DEBUG)
218     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
219 #endif
220     if (im[i] >= rstart_orig && im[i] < rend_orig) {
221       row = im[i] - rstart_orig;
222       for (j=0; j<n; j++) {
223         if (in[j] >= cstart_orig && in[j] < cend_orig){
224           col = in[j] - cstart_orig;
225           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
226           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
227           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
228         } else if (in[j] < 0) continue;
229 #if defined(PETSC_USE_DEBUG)
230         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
231 #endif
232         else {
233           if (mat->was_assembled) {
234             if (!baij->colmap) {
235               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
236             }
237 #if defined (PETSC_USE_CTABLE)
238             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
239             col  = col - 1;
240 #else
241             col = baij->colmap[in[j]/bs] - 1;
242 #endif
243             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
244               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
245               col =  in[j];
246               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
247               B = baij->B;
248               b = (Mat_SeqBAIJ*)(B)->data;
249               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
250               ba=b->a;
251             } else col += in[j]%bs;
252           } else col = in[j];
253           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
254           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
255           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
256         }
257       }
258     } else {
259       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
260       if (!baij->donotstash) {
261         mat->assembled = PETSC_FALSE;
262         if (roworiented) {
263           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
264         } else {
265           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
266         }
267       }
268     }
269   }
270   PetscFunctionReturn(0);
271 }
272 
273 #undef __FUNCT__
274 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
275 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
276 {
277   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
278   const PetscScalar *value;
279   MatScalar         *barray=baij->barray;
280   PetscBool         roworiented = baij->roworiented;
281   PetscErrorCode    ierr;
282   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
283   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
284   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
285 
286   PetscFunctionBegin;
287   if(!barray) {
288     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
289     baij->barray = barray;
290   }
291 
292   if (roworiented) {
293     stepval = (n-1)*bs;
294   } else {
295     stepval = (m-1)*bs;
296   }
297   for (i=0; i<m; i++) {
298     if (im[i] < 0) continue;
299 #if defined(PETSC_USE_DEBUG)
300     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
301 #endif
302     if (im[i] >= rstart && im[i] < rend) {
303       row = im[i] - rstart;
304       for (j=0; j<n; j++) {
305         /* If NumCol = 1 then a copy is not required */
306         if ((roworiented) && (n == 1)) {
307           barray = (MatScalar*)v + i*bs2;
308         } else if((!roworiented) && (m == 1)) {
309           barray = (MatScalar*)v + j*bs2;
310         } else { /* Here a copy is required */
311           if (roworiented) {
312             value = v + (i*(stepval+bs) + j)*bs;
313           } else {
314 	    value = v + (j*(stepval+bs) + i)*bs;
315           }
316           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
317             for (jj=0; jj<bs; jj++) {
318               barray[jj]  = value[jj];
319             }
320             barray += bs;
321           }
322           barray -= bs2;
323         }
324 
325         if (in[j] >= cstart && in[j] < cend){
326           col  = in[j] - cstart;
327           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
328         }
329         else if (in[j] < 0) continue;
330 #if defined(PETSC_USE_DEBUG)
331         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
332 #endif
333         else {
334           if (mat->was_assembled) {
335             if (!baij->colmap) {
336               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
337             }
338 
339 #if defined(PETSC_USE_DEBUG)
340 #if defined (PETSC_USE_CTABLE)
341             { PetscInt data;
342               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
343               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
344             }
345 #else
346             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
347 #endif
348 #endif
349 #if defined (PETSC_USE_CTABLE)
350 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
351             col  = (col - 1)/bs;
352 #else
353             col = (baij->colmap[in[j]] - 1)/bs;
354 #endif
355             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
356               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
357               col =  in[j];
358             }
359           }
360           else col = in[j];
361           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
362         }
363       }
364     } else {
365       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
366       if (!baij->donotstash) {
367         if (roworiented) {
368           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
369         } else {
370           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
371         }
372       }
373     }
374   }
375   PetscFunctionReturn(0);
376 }
377 
378 #define HASH_KEY 0.6180339887
379 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
380 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
381 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
382 #undef __FUNCT__
383 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
384 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
385 {
386   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
387   PetscBool      roworiented = baij->roworiented;
388   PetscErrorCode ierr;
389   PetscInt       i,j,row,col;
390   PetscInt       rstart_orig=mat->rmap->rstart;
391   PetscInt       rend_orig=mat->rmap->rend,Nbs=baij->Nbs;
392   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
393   PetscReal      tmp;
394   MatScalar      **HD = baij->hd,value;
395 #if defined(PETSC_USE_DEBUG)
396   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
397 #endif
398 
399   PetscFunctionBegin;
400   if (v) PetscValidScalarPointer(v,6);
401   for (i=0; i<m; i++) {
402 #if defined(PETSC_USE_DEBUG)
403     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
404     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
405 #endif
406       row = im[i];
407     if (row >= rstart_orig && row < rend_orig) {
408       for (j=0; j<n; j++) {
409         col = in[j];
410         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
411         /* Look up PetscInto the Hash Table */
412         key = (row/bs)*Nbs+(col/bs)+1;
413         h1  = HASH(size,key,tmp);
414 
415 
416         idx = h1;
417 #if defined(PETSC_USE_DEBUG)
418         insert_ct++;
419         total_ct++;
420         if (HT[idx] != key) {
421           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
422           if (idx == size) {
423             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
424             if (idx == h1) {
425               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
426             }
427           }
428         }
429 #else
430         if (HT[idx] != key) {
431           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
432           if (idx == size) {
433             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
434             if (idx == h1) {
435               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
436             }
437           }
438         }
439 #endif
440         /* A HASH table entry is found, so insert the values at the correct address */
441         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
442         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
443       }
444     } else {
445       if (!baij->donotstash) {
446         if (roworiented) {
447           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
448         } else {
449           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
450         }
451       }
452     }
453   }
454 #if defined(PETSC_USE_DEBUG)
455   baij->ht_total_ct = total_ct;
456   baij->ht_insert_ct = insert_ct;
457 #endif
458   PetscFunctionReturn(0);
459 }
460 
461 #undef __FUNCT__
462 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
463 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
464 {
465   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
466   PetscBool         roworiented = baij->roworiented;
467   PetscErrorCode    ierr;
468   PetscInt          i,j,ii,jj,row,col;
469   PetscInt          rstart=baij->rstartbs;
470   PetscInt          rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
471   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
472   PetscReal         tmp;
473   MatScalar         **HD = baij->hd,*baij_a;
474   const PetscScalar *v_t,*value;
475 #if defined(PETSC_USE_DEBUG)
476   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
477 #endif
478 
479   PetscFunctionBegin;
480 
481   if (roworiented) {
482     stepval = (n-1)*bs;
483   } else {
484     stepval = (m-1)*bs;
485   }
486   for (i=0; i<m; i++) {
487 #if defined(PETSC_USE_DEBUG)
488     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
489     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
490 #endif
491     row   = im[i];
492     v_t   = v + i*nbs2;
493     if (row >= rstart && row < rend) {
494       for (j=0; j<n; j++) {
495         col = in[j];
496 
497         /* Look up into the Hash Table */
498         key = row*Nbs+col+1;
499         h1  = HASH(size,key,tmp);
500 
501         idx = h1;
502 #if defined(PETSC_USE_DEBUG)
503         total_ct++;
504         insert_ct++;
505        if (HT[idx] != key) {
506           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
507           if (idx == size) {
508             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
509             if (idx == h1) {
510               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
511             }
512           }
513         }
514 #else
515         if (HT[idx] != key) {
516           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
517           if (idx == size) {
518             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
519             if (idx == h1) {
520               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
521             }
522           }
523         }
524 #endif
525         baij_a = HD[idx];
526         if (roworiented) {
527           /*value = v + i*(stepval+bs)*bs + j*bs;*/
528           /* value = v + (i*(stepval+bs)+j)*bs; */
529           value = v_t;
530           v_t  += bs;
531           if (addv == ADD_VALUES) {
532             for (ii=0; ii<bs; ii++,value+=stepval) {
533               for (jj=ii; jj<bs2; jj+=bs) {
534                 baij_a[jj]  += *value++;
535               }
536             }
537           } else {
538             for (ii=0; ii<bs; ii++,value+=stepval) {
539               for (jj=ii; jj<bs2; jj+=bs) {
540                 baij_a[jj]  = *value++;
541               }
542             }
543           }
544         } else {
545           value = v + j*(stepval+bs)*bs + i*bs;
546           if (addv == ADD_VALUES) {
547             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
548               for (jj=0; jj<bs; jj++) {
549                 baij_a[jj]  += *value++;
550               }
551             }
552           } else {
553             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
554               for (jj=0; jj<bs; jj++) {
555                 baij_a[jj]  = *value++;
556               }
557             }
558           }
559         }
560       }
561     } else {
562       if (!baij->donotstash) {
563         if (roworiented) {
564           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
565         } else {
566           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
567         }
568       }
569     }
570   }
571 #if defined(PETSC_USE_DEBUG)
572   baij->ht_total_ct = total_ct;
573   baij->ht_insert_ct = insert_ct;
574 #endif
575   PetscFunctionReturn(0);
576 }
577 
578 #undef __FUNCT__
579 #define __FUNCT__ "MatGetValues_MPIBAIJ"
580 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
581 {
582   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
583   PetscErrorCode ierr;
584   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
585   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
586 
587   PetscFunctionBegin;
588   for (i=0; i<m; i++) {
589     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
590     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
591     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
592       row = idxm[i] - bsrstart;
593       for (j=0; j<n; j++) {
594         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
595         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
596         if (idxn[j] >= bscstart && idxn[j] < bscend){
597           col = idxn[j] - bscstart;
598           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
599         } else {
600           if (!baij->colmap) {
601             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
602           }
603 #if defined (PETSC_USE_CTABLE)
604           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
605           data --;
606 #else
607           data = baij->colmap[idxn[j]/bs]-1;
608 #endif
609           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
610           else {
611             col  = data + idxn[j]%bs;
612             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
613           }
614         }
615       }
616     } else {
617       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
618     }
619   }
620  PetscFunctionReturn(0);
621 }
622 
623 #undef __FUNCT__
624 #define __FUNCT__ "MatNorm_MPIBAIJ"
625 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
626 {
627   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
628   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
629   PetscErrorCode ierr;
630   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
631   PetscReal      sum = 0.0;
632   MatScalar      *v;
633 
634   PetscFunctionBegin;
635   if (baij->size == 1) {
636     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
637   } else {
638     if (type == NORM_FROBENIUS) {
639       v = amat->a;
640       nz = amat->nz*bs2;
641       for (i=0; i<nz; i++) {
642 #if defined(PETSC_USE_COMPLEX)
643         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
644 #else
645         sum += (*v)*(*v); v++;
646 #endif
647       }
648       v = bmat->a;
649       nz = bmat->nz*bs2;
650       for (i=0; i<nz; i++) {
651 #if defined(PETSC_USE_COMPLEX)
652         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
653 #else
654         sum += (*v)*(*v); v++;
655 #endif
656       }
657       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
658       *nrm = PetscSqrtReal(*nrm);
659     } else if (type == NORM_1) { /* max column sum */
660       PetscReal *tmp,*tmp2;
661       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
662       ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
663       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
664       v = amat->a; jj = amat->j;
665       for (i=0; i<amat->nz; i++) {
666         for (j=0; j<bs; j++){
667           col = bs*(cstart + *jj) + j; /* column index */
668           for (row=0; row<bs; row++){
669             tmp[col] += PetscAbsScalar(*v);  v++;
670           }
671         }
672         jj++;
673       }
674       v = bmat->a; jj = bmat->j;
675       for (i=0; i<bmat->nz; i++) {
676         for (j=0; j<bs; j++){
677           col = bs*garray[*jj] + j;
678           for (row=0; row<bs; row++){
679             tmp[col] += PetscAbsScalar(*v); v++;
680           }
681         }
682         jj++;
683       }
684       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
685       *nrm = 0.0;
686       for (j=0; j<mat->cmap->N; j++) {
687         if (tmp2[j] > *nrm) *nrm = tmp2[j];
688       }
689       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
690     } else if (type == NORM_INFINITY) { /* max row sum */
691       PetscReal *sums;
692       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr);
693       sum = 0.0;
694       for (j=0; j<amat->mbs; j++) {
695         for (row=0; row<bs; row++) sums[row] = 0.0;
696         v = amat->a + bs2*amat->i[j];
697         nz = amat->i[j+1]-amat->i[j];
698         for (i=0; i<nz; i++) {
699           for (col=0; col<bs; col++){
700             for (row=0; row<bs; row++){
701               sums[row] += PetscAbsScalar(*v); v++;
702             }
703           }
704         }
705         v = bmat->a + bs2*bmat->i[j];
706         nz = bmat->i[j+1]-bmat->i[j];
707         for (i=0; i<nz; i++) {
708           for (col=0; col<bs; col++){
709             for (row=0; row<bs; row++){
710               sums[row] += PetscAbsScalar(*v); v++;
711             }
712           }
713         }
714         for (row=0; row<bs; row++){
715           if (sums[row] > sum) sum = sums[row];
716         }
717       }
718       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
719       ierr = PetscFree(sums);CHKERRQ(ierr);
720     } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet");
721   }
722   PetscFunctionReturn(0);
723 }
724 
725 /*
726   Creates the hash table, and sets the table
727   This table is created only once.
728   If new entried need to be added to the matrix
729   then the hash table has to be destroyed and
730   recreated.
731 */
732 #undef __FUNCT__
733 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
734 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
735 {
736   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
737   Mat            A = baij->A,B=baij->B;
738   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
739   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
740   PetscErrorCode ierr;
741   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
742   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
743   PetscInt       *HT,key;
744   MatScalar      **HD;
745   PetscReal      tmp;
746 #if defined(PETSC_USE_INFO)
747   PetscInt       ct=0,max=0;
748 #endif
749 
750   PetscFunctionBegin;
751   if (baij->ht) PetscFunctionReturn(0);
752 
753   baij->ht_size = (PetscInt)(factor*nz);
754   ht_size       = baij->ht_size;
755 
756   /* Allocate Memory for Hash Table */
757   ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr);
758   ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr);
759   ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr);
760   HD   = baij->hd;
761   HT   = baij->ht;
762 
763   /* Loop Over A */
764   for (i=0; i<a->mbs; i++) {
765     for (j=ai[i]; j<ai[i+1]; j++) {
766       row = i+rstart;
767       col = aj[j]+cstart;
768 
769       key = row*Nbs + col + 1;
770       h1  = HASH(ht_size,key,tmp);
771       for (k=0; k<ht_size; k++){
772         if (!HT[(h1+k)%ht_size]) {
773           HT[(h1+k)%ht_size] = key;
774           HD[(h1+k)%ht_size] = a->a + j*bs2;
775           break;
776 #if defined(PETSC_USE_INFO)
777         } else {
778           ct++;
779 #endif
780         }
781       }
782 #if defined(PETSC_USE_INFO)
783       if (k> max) max = k;
784 #endif
785     }
786   }
787   /* Loop Over B */
788   for (i=0; i<b->mbs; i++) {
789     for (j=bi[i]; j<bi[i+1]; j++) {
790       row = i+rstart;
791       col = garray[bj[j]];
792       key = row*Nbs + col + 1;
793       h1  = HASH(ht_size,key,tmp);
794       for (k=0; k<ht_size; k++){
795         if (!HT[(h1+k)%ht_size]) {
796           HT[(h1+k)%ht_size] = key;
797           HD[(h1+k)%ht_size] = b->a + j*bs2;
798           break;
799 #if defined(PETSC_USE_INFO)
800         } else {
801           ct++;
802 #endif
803         }
804       }
805 #if defined(PETSC_USE_INFO)
806       if (k> max) max = k;
807 #endif
808     }
809   }
810 
811   /* Print Summary */
812 #if defined(PETSC_USE_INFO)
813   for (i=0,j=0; i<ht_size; i++) {
814     if (HT[i]) {j++;}
815   }
816   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
817 #endif
818   PetscFunctionReturn(0);
819 }
820 
821 #undef __FUNCT__
822 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
823 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
824 {
825   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
826   PetscErrorCode ierr;
827   PetscInt       nstash,reallocs;
828   InsertMode     addv;
829 
830   PetscFunctionBegin;
831   if (baij->donotstash || mat->nooffprocentries) {
832     PetscFunctionReturn(0);
833   }
834 
835   /* make sure all processors are either in INSERTMODE or ADDMODE */
836   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
837   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
838   mat->insertmode = addv; /* in case this processor had no cache */
839 
840   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
841   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
842   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
843   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
844   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
845   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
846   PetscFunctionReturn(0);
847 }
848 
849 #undef __FUNCT__
850 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
851 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
852 {
853   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
854   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
855   PetscErrorCode ierr;
856   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
857   PetscInt       *row,*col;
858   PetscBool      r1,r2,r3,other_disassembled;
859   MatScalar      *val;
860   InsertMode     addv = mat->insertmode;
861   PetscMPIInt    n;
862 
863   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
864   PetscFunctionBegin;
865   if (!baij->donotstash && !mat->nooffprocentries) {
866     while (1) {
867       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
868       if (!flg) break;
869 
870       for (i=0; i<n;) {
871         /* Now identify the consecutive vals belonging to the same row */
872         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
873         if (j < n) ncols = j-i;
874         else       ncols = n-i;
875         /* Now assemble all these values with a single function call */
876         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
877         i = j;
878       }
879     }
880     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
881     /* Now process the block-stash. Since the values are stashed column-oriented,
882        set the roworiented flag to column oriented, and after MatSetValues()
883        restore the original flags */
884     r1 = baij->roworiented;
885     r2 = a->roworiented;
886     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
887     baij->roworiented = PETSC_FALSE;
888     a->roworiented    = PETSC_FALSE;
889     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
890     while (1) {
891       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
892       if (!flg) break;
893 
894       for (i=0; i<n;) {
895         /* Now identify the consecutive vals belonging to the same row */
896         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
897         if (j < n) ncols = j-i;
898         else       ncols = n-i;
899         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
900         i = j;
901       }
902     }
903     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
904     baij->roworiented = r1;
905     a->roworiented    = r2;
906     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
907   }
908 
909   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
910   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
911 
912   /* determine if any processor has disassembled, if so we must
913      also disassemble ourselfs, in order that we may reassemble. */
914   /*
915      if nonzero structure of submatrix B cannot change then we know that
916      no processor disassembled thus we can skip this stuff
917   */
918   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
919     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
920     if (mat->was_assembled && !other_disassembled) {
921       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
922     }
923   }
924 
925   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
926     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
927   }
928   ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
929   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
930   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
931 
932 #if defined(PETSC_USE_INFO)
933   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
934     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
935     baij->ht_total_ct  = 0;
936     baij->ht_insert_ct = 0;
937   }
938 #endif
939   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
940     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
941     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
942     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
943   }
944 
945   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
946   baij->rowvalues = 0;
947   PetscFunctionReturn(0);
948 }
949 
950 #undef __FUNCT__
951 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
952 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
953 {
954   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
955   PetscErrorCode    ierr;
956   PetscMPIInt       size = baij->size,rank = baij->rank;
957   PetscInt          bs = mat->rmap->bs;
958   PetscBool         iascii,isdraw;
959   PetscViewer       sviewer;
960   PetscViewerFormat format;
961 
962   PetscFunctionBegin;
963   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
964   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
965   if (iascii) {
966     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
967     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
968       MatInfo info;
969       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
970       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
971       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
972       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
973              rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
974       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
975       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
976       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
977       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
978       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
979       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
980       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
981       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
982       PetscFunctionReturn(0);
983     } else if (format == PETSC_VIEWER_ASCII_INFO) {
984       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
985       PetscFunctionReturn(0);
986     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
987       PetscFunctionReturn(0);
988     }
989   }
990 
991   if (isdraw) {
992     PetscDraw       draw;
993     PetscBool  isnull;
994     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
995     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
996   }
997 
998   if (size == 1) {
999     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1000     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1001   } else {
1002     /* assemble the entire matrix onto first processor. */
1003     Mat         A;
1004     Mat_SeqBAIJ *Aloc;
1005     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1006     MatScalar   *a;
1007 
1008     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1009     /* Perhaps this should be the type of mat? */
1010     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1011     if (!rank) {
1012       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1013     } else {
1014       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1015     }
1016     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1017     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1018     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1019     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1020 
1021     /* copy over the A part */
1022     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1023     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1024     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1025 
1026     for (i=0; i<mbs; i++) {
1027       rvals[0] = bs*(baij->rstartbs + i);
1028       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1029       for (j=ai[i]; j<ai[i+1]; j++) {
1030         col = (baij->cstartbs+aj[j])*bs;
1031         for (k=0; k<bs; k++) {
1032           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1033           col++; a += bs;
1034         }
1035       }
1036     }
1037     /* copy over the B part */
1038     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1039     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1040     for (i=0; i<mbs; i++) {
1041       rvals[0] = bs*(baij->rstartbs + i);
1042       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1043       for (j=ai[i]; j<ai[i+1]; j++) {
1044         col = baij->garray[aj[j]]*bs;
1045         for (k=0; k<bs; k++) {
1046           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1047           col++; a += bs;
1048         }
1049       }
1050     }
1051     ierr = PetscFree(rvals);CHKERRQ(ierr);
1052     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1053     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1054     /*
1055        Everyone has to call to draw the matrix since the graphics waits are
1056        synchronized across all processors that share the PetscDraw object
1057     */
1058     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1059     if (!rank) {
1060       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1061     /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/
1062       PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ);
1063       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1064     }
1065     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1066     ierr = MatDestroy(&A);CHKERRQ(ierr);
1067   }
1068   PetscFunctionReturn(0);
1069 }
1070 
1071 #undef __FUNCT__
1072 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1073 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1074 {
1075   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1076   Mat_SeqBAIJ*   A = (Mat_SeqBAIJ*)a->A->data;
1077   Mat_SeqBAIJ*   B = (Mat_SeqBAIJ*)a->B->data;
1078   PetscErrorCode ierr;
1079   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1080   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1081   int            fd;
1082   PetscScalar    *column_values;
1083   FILE           *file;
1084   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1085   PetscInt       message_count,flowcontrolcount;
1086 
1087   PetscFunctionBegin;
1088   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1089   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1090   nz   = bs2*(A->nz + B->nz);
1091   rlen = mat->rmap->n;
1092   if (!rank) {
1093     header[0] = MAT_FILE_CLASSID;
1094     header[1] = mat->rmap->N;
1095     header[2] = mat->cmap->N;
1096     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1097     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1098     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1099     /* get largest number of rows any processor has */
1100     range = mat->rmap->range;
1101     for (i=1; i<size; i++) {
1102       rlen = PetscMax(rlen,range[i+1] - range[i]);
1103     }
1104   } else {
1105     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1106   }
1107 
1108   ierr  = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr);
1109   /* compute lengths of each row  */
1110   for (i=0; i<a->mbs; i++) {
1111     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1112   }
1113   /* store the row lengths to the file */
1114   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1115   if (!rank) {
1116     MPI_Status status;
1117     ierr  = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr);
1118     rlen  = (range[1] - range[0])/bs;
1119     for (i=0; i<rlen; i++) {
1120       for (j=0; j<bs; j++) {
1121         row_lens[i*bs+j] = bs*crow_lens[i];
1122       }
1123     }
1124     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1125     for (i=1; i<size; i++) {
1126       rlen = (range[i+1] - range[i])/bs;
1127       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1128       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1129       for (k=0; k<rlen; k++) {
1130 	for (j=0; j<bs; j++) {
1131 	  row_lens[k*bs+j] = bs*crow_lens[k];
1132 	}
1133       }
1134       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1135     }
1136     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1137     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1138   } else {
1139     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1140     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1141     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1142   }
1143   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1144 
1145   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1146      information needed to make it for each row from a block row. This does require more communication but still not more than
1147      the communication needed for the nonzero values  */
1148   nzmax = nz; /*  space a largest processor needs */
1149   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1150   ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1151   cnt  = 0;
1152   for (i=0; i<a->mbs; i++) {
1153     pcnt = cnt;
1154     for (j=B->i[i]; j<B->i[i+1]; j++) {
1155       if ( (col = garray[B->j[j]]) > cstart) break;
1156       for (l=0; l<bs; l++) {
1157 	column_indices[cnt++] = bs*col+l;
1158       }
1159     }
1160     for (k=A->i[i]; k<A->i[i+1]; k++) {
1161       for (l=0; l<bs; l++) {
1162         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1163       }
1164     }
1165     for (; j<B->i[i+1]; j++) {
1166       for (l=0; l<bs; l++) {
1167         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1168       }
1169     }
1170     len = cnt - pcnt;
1171     for (k=1; k<bs; k++) {
1172       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1173       cnt += len;
1174     }
1175   }
1176   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1177 
1178   /* store the columns to the file */
1179   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1180   if (!rank) {
1181     MPI_Status status;
1182     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1183     for (i=1; i<size; i++) {
1184       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1185       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1186       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1187       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1188     }
1189     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1190   } else {
1191     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1192     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1193     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1194     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1195   }
1196   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1197 
1198   /* load up the numerical values */
1199   ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1200   cnt = 0;
1201   for (i=0; i<a->mbs; i++) {
1202     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1203     for (j=B->i[i]; j<B->i[i+1]; j++) {
1204       if ( garray[B->j[j]] > cstart) break;
1205       for (l=0; l<bs; l++) {
1206         for (ll=0; ll<bs; ll++) {
1207 	  column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1208         }
1209       }
1210       cnt += bs;
1211     }
1212     for (k=A->i[i]; k<A->i[i+1]; k++) {
1213       for (l=0; l<bs; l++) {
1214         for (ll=0; ll<bs; ll++) {
1215           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1216         }
1217       }
1218       cnt += bs;
1219     }
1220     for (; j<B->i[i+1]; j++) {
1221       for (l=0; l<bs; l++) {
1222         for (ll=0; ll<bs; ll++) {
1223 	  column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1224         }
1225       }
1226       cnt += bs;
1227     }
1228     cnt += (bs-1)*rlen;
1229   }
1230   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1231 
1232   /* store the column values to the file */
1233   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1234   if (!rank) {
1235     MPI_Status status;
1236     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1237     for (i=1; i<size; i++) {
1238       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1239       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1240       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1241       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1242     }
1243     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1244   } else {
1245     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1246     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1247     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1248     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1249   }
1250   ierr = PetscFree(column_values);CHKERRQ(ierr);
1251 
1252   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1253   if (file) {
1254     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1255   }
1256   PetscFunctionReturn(0);
1257 }
1258 
1259 #undef __FUNCT__
1260 #define __FUNCT__ "MatView_MPIBAIJ"
1261 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1262 {
1263   PetscErrorCode ierr;
1264   PetscBool      iascii,isdraw,issocket,isbinary;
1265 
1266   PetscFunctionBegin;
1267   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1268   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1269   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1270   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1271   if (iascii || isdraw || issocket) {
1272     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1273   } else if (isbinary) {
1274     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1275   } else {
1276     SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1277   }
1278   PetscFunctionReturn(0);
1279 }
1280 
1281 #undef __FUNCT__
1282 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1283 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1284 {
1285   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1286   PetscErrorCode ierr;
1287 
1288   PetscFunctionBegin;
1289 #if defined(PETSC_USE_LOG)
1290   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1291 #endif
1292   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1293   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1294   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1295   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1296 #if defined (PETSC_USE_CTABLE)
1297   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1298 #else
1299   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1300 #endif
1301   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1302   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1303   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1304   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1305   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1306   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1307   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1308   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1309 
1310   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1311   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1312   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1313   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1314   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1315   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1316   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1317   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1318   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1319   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C","",PETSC_NULL);CHKERRQ(ierr);
1320   PetscFunctionReturn(0);
1321 }
1322 
1323 #undef __FUNCT__
1324 #define __FUNCT__ "MatMult_MPIBAIJ"
1325 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1326 {
1327   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1328   PetscErrorCode ierr;
1329   PetscInt       nt;
1330 
1331   PetscFunctionBegin;
1332   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1333   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1334   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1335   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1336   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1337   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1338   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1339   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1340   PetscFunctionReturn(0);
1341 }
1342 
1343 #undef __FUNCT__
1344 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1345 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1346 {
1347   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1348   PetscErrorCode ierr;
1349 
1350   PetscFunctionBegin;
1351   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1352   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1353   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1354   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1355   PetscFunctionReturn(0);
1356 }
1357 
1358 #undef __FUNCT__
1359 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1360 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1361 {
1362   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1363   PetscErrorCode ierr;
1364   PetscBool      merged;
1365 
1366   PetscFunctionBegin;
1367   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1368   /* do nondiagonal part */
1369   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1370   if (!merged) {
1371     /* send it on its way */
1372     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1373     /* do local part */
1374     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1375     /* receive remote parts: note this assumes the values are not actually */
1376     /* inserted in yy until the next line */
1377     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1378   } else {
1379     /* do local part */
1380     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1381     /* send it on its way */
1382     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1383     /* values actually were received in the Begin() but we need to call this nop */
1384     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1385   }
1386   PetscFunctionReturn(0);
1387 }
1388 
1389 #undef __FUNCT__
1390 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1391 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1392 {
1393   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1394   PetscErrorCode ierr;
1395 
1396   PetscFunctionBegin;
1397   /* do nondiagonal part */
1398   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1399   /* send it on its way */
1400   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1401   /* do local part */
1402   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1403   /* receive remote parts: note this assumes the values are not actually */
1404   /* inserted in yy until the next line, which is true for my implementation*/
1405   /* but is not perhaps always true. */
1406   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1407   PetscFunctionReturn(0);
1408 }
1409 
1410 /*
1411   This only works correctly for square matrices where the subblock A->A is the
1412    diagonal block
1413 */
1414 #undef __FUNCT__
1415 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1416 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1417 {
1418   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1419   PetscErrorCode ierr;
1420 
1421   PetscFunctionBegin;
1422   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1423   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1424   PetscFunctionReturn(0);
1425 }
1426 
1427 #undef __FUNCT__
1428 #define __FUNCT__ "MatScale_MPIBAIJ"
1429 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1430 {
1431   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1432   PetscErrorCode ierr;
1433 
1434   PetscFunctionBegin;
1435   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1436   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1437   PetscFunctionReturn(0);
1438 }
1439 
1440 #undef __FUNCT__
1441 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1442 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1443 {
1444   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1445   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1446   PetscErrorCode ierr;
1447   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1448   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1449   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1450 
1451   PetscFunctionBegin;
1452   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1453   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1454   mat->getrowactive = PETSC_TRUE;
1455 
1456   if (!mat->rowvalues && (idx || v)) {
1457     /*
1458         allocate enough space to hold information from the longest row.
1459     */
1460     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1461     PetscInt     max = 1,mbs = mat->mbs,tmp;
1462     for (i=0; i<mbs; i++) {
1463       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1464       if (max < tmp) { max = tmp; }
1465     }
1466     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1467   }
1468   lrow = row - brstart;
1469 
1470   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1471   if (!v)   {pvA = 0; pvB = 0;}
1472   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1473   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1474   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1475   nztot = nzA + nzB;
1476 
1477   cmap  = mat->garray;
1478   if (v  || idx) {
1479     if (nztot) {
1480       /* Sort by increasing column numbers, assuming A and B already sorted */
1481       PetscInt imark = -1;
1482       if (v) {
1483         *v = v_p = mat->rowvalues;
1484         for (i=0; i<nzB; i++) {
1485           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1486           else break;
1487         }
1488         imark = i;
1489         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1490         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1491       }
1492       if (idx) {
1493         *idx = idx_p = mat->rowindices;
1494         if (imark > -1) {
1495           for (i=0; i<imark; i++) {
1496             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1497           }
1498         } else {
1499           for (i=0; i<nzB; i++) {
1500             if (cmap[cworkB[i]/bs] < cstart)
1501               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1502             else break;
1503           }
1504           imark = i;
1505         }
1506         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1507         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1508       }
1509     } else {
1510       if (idx) *idx = 0;
1511       if (v)   *v   = 0;
1512     }
1513   }
1514   *nz = nztot;
1515   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1516   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1517   PetscFunctionReturn(0);
1518 }
1519 
1520 #undef __FUNCT__
1521 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1522 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1523 {
1524   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1525 
1526   PetscFunctionBegin;
1527   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1528   baij->getrowactive = PETSC_FALSE;
1529   PetscFunctionReturn(0);
1530 }
1531 
1532 #undef __FUNCT__
1533 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1534 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1535 {
1536   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1537   PetscErrorCode ierr;
1538 
1539   PetscFunctionBegin;
1540   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1541   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1542   PetscFunctionReturn(0);
1543 }
1544 
1545 #undef __FUNCT__
1546 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1547 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1548 {
1549   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1550   Mat            A = a->A,B = a->B;
1551   PetscErrorCode ierr;
1552   PetscReal      isend[5],irecv[5];
1553 
1554   PetscFunctionBegin;
1555   info->block_size     = (PetscReal)matin->rmap->bs;
1556   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1557   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1558   isend[3] = info->memory;  isend[4] = info->mallocs;
1559   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1560   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1561   isend[3] += info->memory;  isend[4] += info->mallocs;
1562   if (flag == MAT_LOCAL) {
1563     info->nz_used      = isend[0];
1564     info->nz_allocated = isend[1];
1565     info->nz_unneeded  = isend[2];
1566     info->memory       = isend[3];
1567     info->mallocs      = isend[4];
1568   } else if (flag == MAT_GLOBAL_MAX) {
1569     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1570     info->nz_used      = irecv[0];
1571     info->nz_allocated = irecv[1];
1572     info->nz_unneeded  = irecv[2];
1573     info->memory       = irecv[3];
1574     info->mallocs      = irecv[4];
1575   } else if (flag == MAT_GLOBAL_SUM) {
1576     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1577     info->nz_used      = irecv[0];
1578     info->nz_allocated = irecv[1];
1579     info->nz_unneeded  = irecv[2];
1580     info->memory       = irecv[3];
1581     info->mallocs      = irecv[4];
1582   } else {
1583     SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1584   }
1585   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1586   info->fill_ratio_needed = 0;
1587   info->factor_mallocs    = 0;
1588   PetscFunctionReturn(0);
1589 }
1590 
1591 #undef __FUNCT__
1592 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1593 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool  flg)
1594 {
1595   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1596   PetscErrorCode ierr;
1597 
1598   PetscFunctionBegin;
1599   switch (op) {
1600   case MAT_NEW_NONZERO_LOCATIONS:
1601   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1602   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1603   case MAT_KEEP_NONZERO_PATTERN:
1604   case MAT_NEW_NONZERO_LOCATION_ERR:
1605     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1606     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1607     break;
1608   case MAT_ROW_ORIENTED:
1609     a->roworiented = flg;
1610     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1611     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1612     break;
1613   case MAT_NEW_DIAGONALS:
1614     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1615     break;
1616   case MAT_IGNORE_OFF_PROC_ENTRIES:
1617     a->donotstash = flg;
1618     break;
1619   case MAT_USE_HASH_TABLE:
1620     a->ht_flag = flg;
1621     break;
1622   case MAT_SYMMETRIC:
1623   case MAT_STRUCTURALLY_SYMMETRIC:
1624   case MAT_HERMITIAN:
1625   case MAT_SYMMETRY_ETERNAL:
1626     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1627     break;
1628   default:
1629     SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op);
1630   }
1631   PetscFunctionReturn(0);
1632 }
1633 
1634 #undef __FUNCT__
1635 #define __FUNCT__ "MatTranspose_MPIBAIJ"
1636 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1637 {
1638   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1639   Mat_SeqBAIJ    *Aloc;
1640   Mat            B;
1641   PetscErrorCode ierr;
1642   PetscInt       M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1643   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1644   MatScalar      *a;
1645 
1646   PetscFunctionBegin;
1647   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1648   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1649     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1650     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1651     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1652     /* Do not know preallocation information, but must set block size */
1653     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,PETSC_NULL,PETSC_DECIDE,PETSC_NULL);CHKERRQ(ierr);
1654   } else {
1655     B = *matout;
1656   }
1657 
1658   /* copy over the A part */
1659   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1660   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1661   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1662 
1663   for (i=0; i<mbs; i++) {
1664     rvals[0] = bs*(baij->rstartbs + i);
1665     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1666     for (j=ai[i]; j<ai[i+1]; j++) {
1667       col = (baij->cstartbs+aj[j])*bs;
1668       for (k=0; k<bs; k++) {
1669         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1670         col++; a += bs;
1671       }
1672     }
1673   }
1674   /* copy over the B part */
1675   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1676   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1677   for (i=0; i<mbs; i++) {
1678     rvals[0] = bs*(baij->rstartbs + i);
1679     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1680     for (j=ai[i]; j<ai[i+1]; j++) {
1681       col = baij->garray[aj[j]]*bs;
1682       for (k=0; k<bs; k++) {
1683         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1684         col++; a += bs;
1685       }
1686     }
1687   }
1688   ierr = PetscFree(rvals);CHKERRQ(ierr);
1689   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1690   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1691 
1692   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1693     *matout = B;
1694   } else {
1695     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1696   }
1697   PetscFunctionReturn(0);
1698 }
1699 
1700 #undef __FUNCT__
1701 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1702 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1703 {
1704   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1705   Mat            a = baij->A,b = baij->B;
1706   PetscErrorCode ierr;
1707   PetscInt       s1,s2,s3;
1708 
1709   PetscFunctionBegin;
1710   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1711   if (rr) {
1712     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1713     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1714     /* Overlap communication with computation. */
1715     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1716   }
1717   if (ll) {
1718     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1719     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1720     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1721   }
1722   /* scale  the diagonal block */
1723   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1724 
1725   if (rr) {
1726     /* Do a scatter end and then right scale the off-diagonal block */
1727     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1728     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1729   }
1730 
1731   PetscFunctionReturn(0);
1732 }
1733 
1734 #undef __FUNCT__
1735 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1736 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1737 {
1738   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1739   PetscErrorCode    ierr;
1740   PetscMPIInt       imdex,size = l->size,n,rank = l->rank;
1741   PetscInt          i,*owners = A->rmap->range;
1742   PetscInt          *nprocs,j,idx,nsends,row;
1743   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
1744   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1745   PetscInt          *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1746   MPI_Comm          comm = ((PetscObject)A)->comm;
1747   MPI_Request       *send_waits,*recv_waits;
1748   MPI_Status        recv_status,*send_status;
1749   const PetscScalar *xx;
1750   PetscScalar       *bb;
1751 #if defined(PETSC_DEBUG)
1752   PetscBool         found = PETSC_FALSE;
1753 #endif
1754 
1755   PetscFunctionBegin;
1756   /*  first count number of contributors to each processor */
1757   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1758   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1759   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1760   j = 0;
1761   for (i=0; i<N; i++) {
1762     if (lastidx > (idx = rows[i])) j = 0;
1763     lastidx = idx;
1764     for (; j<size; j++) {
1765       if (idx >= owners[j] && idx < owners[j+1]) {
1766         nprocs[2*j]++;
1767         nprocs[2*j+1] = 1;
1768         owner[i] = j;
1769 #if defined(PETSC_DEBUG)
1770         found = PETSC_TRUE;
1771 #endif
1772         break;
1773       }
1774     }
1775 #if defined(PETSC_DEBUG)
1776     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1777     found = PETSC_FALSE;
1778 #endif
1779   }
1780   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1781 
1782   if (A->nooffproczerorows) {
1783     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
1784     nrecvs = nsends;
1785     nmax   = N;
1786   } else {
1787     /* inform other processors of number of messages and max length*/
1788     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1789   }
1790 
1791   /* post receives:   */
1792   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1793   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1794   for (i=0; i<nrecvs; i++) {
1795     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1796   }
1797 
1798   /* do sends:
1799      1) starts[i] gives the starting index in svalues for stuff going to
1800      the ith processor
1801   */
1802   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1803   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1804   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1805   starts[0]  = 0;
1806   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1807   for (i=0; i<N; i++) {
1808     svalues[starts[owner[i]]++] = rows[i];
1809   }
1810 
1811   starts[0] = 0;
1812   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1813   count = 0;
1814   for (i=0; i<size; i++) {
1815     if (nprocs[2*i+1]) {
1816       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1817     }
1818   }
1819   ierr = PetscFree(starts);CHKERRQ(ierr);
1820 
1821   base = owners[rank];
1822 
1823   /*  wait on receives */
1824   ierr   = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr);
1825   count  = nrecvs;
1826   slen = 0;
1827   while (count) {
1828     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1829     /* unpack receives into our local space */
1830     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1831     source[imdex]  = recv_status.MPI_SOURCE;
1832     lens[imdex]    = n;
1833     slen          += n;
1834     count--;
1835   }
1836   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1837 
1838   /* move the data into the send scatter */
1839   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1840   count = 0;
1841   for (i=0; i<nrecvs; i++) {
1842     values = rvalues + i*nmax;
1843     for (j=0; j<lens[i]; j++) {
1844       lrows[count++] = values[j] - base;
1845     }
1846   }
1847   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1848   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1849   ierr = PetscFree(owner);CHKERRQ(ierr);
1850   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1851 
1852   /* fix right hand side if needed */
1853   if (x && b) {
1854     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1855     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1856     for (i=0; i<slen; i++) {
1857       bb[lrows[i]] = diag*xx[lrows[i]];
1858     }
1859     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1860     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1861   }
1862 
1863   /* actually zap the local rows */
1864   /*
1865         Zero the required rows. If the "diagonal block" of the matrix
1866      is square and the user wishes to set the diagonal we use separate
1867      code so that MatSetValues() is not called for each diagonal allocating
1868      new memory, thus calling lots of mallocs and slowing things down.
1869 
1870   */
1871   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1872   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1873   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1874     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
1875   } else if (diag != 0.0) {
1876     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1877     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1878        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1879     for (i=0; i<slen; i++) {
1880       row  = lrows[i] + rstart_bs;
1881       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1882     }
1883     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1884     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1885   } else {
1886     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1887   }
1888 
1889   ierr = PetscFree(lrows);CHKERRQ(ierr);
1890 
1891   /* wait on sends */
1892   if (nsends) {
1893     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1894     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1895     ierr = PetscFree(send_status);CHKERRQ(ierr);
1896   }
1897   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1898   ierr = PetscFree(svalues);CHKERRQ(ierr);
1899 
1900   PetscFunctionReturn(0);
1901 }
1902 
1903 #undef __FUNCT__
1904 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1905 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1906 {
1907   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1908   PetscErrorCode ierr;
1909 
1910   PetscFunctionBegin;
1911   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1912   PetscFunctionReturn(0);
1913 }
1914 
1915 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1916 
1917 #undef __FUNCT__
1918 #define __FUNCT__ "MatEqual_MPIBAIJ"
1919 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1920 {
1921   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1922   Mat            a,b,c,d;
1923   PetscBool      flg;
1924   PetscErrorCode ierr;
1925 
1926   PetscFunctionBegin;
1927   a = matA->A; b = matA->B;
1928   c = matB->A; d = matB->B;
1929 
1930   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1931   if (flg) {
1932     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1933   }
1934   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1935   PetscFunctionReturn(0);
1936 }
1937 
1938 #undef __FUNCT__
1939 #define __FUNCT__ "MatCopy_MPIBAIJ"
1940 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1941 {
1942   PetscErrorCode ierr;
1943   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1944   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1945 
1946   PetscFunctionBegin;
1947   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1948   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1949     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1950   } else {
1951     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1952     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1953   }
1954   PetscFunctionReturn(0);
1955 }
1956 
1957 #undef __FUNCT__
1958 #define __FUNCT__ "MatSetUp_MPIBAIJ"
1959 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1960 {
1961   PetscErrorCode ierr;
1962 
1963   PetscFunctionBegin;
1964   ierr =  MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1965   PetscFunctionReturn(0);
1966 }
1967 
1968 #undef __FUNCT__
1969 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1970 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1971 {
1972   PetscErrorCode ierr;
1973   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1974   PetscBLASInt   bnz,one=1;
1975   Mat_SeqBAIJ    *x,*y;
1976 
1977   PetscFunctionBegin;
1978   if (str == SAME_NONZERO_PATTERN) {
1979     PetscScalar alpha = a;
1980     x = (Mat_SeqBAIJ *)xx->A->data;
1981     y = (Mat_SeqBAIJ *)yy->A->data;
1982     bnz = PetscBLASIntCast(x->nz);
1983     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1984     x = (Mat_SeqBAIJ *)xx->B->data;
1985     y = (Mat_SeqBAIJ *)yy->B->data;
1986     bnz = PetscBLASIntCast(x->nz);
1987     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1988   } else {
1989     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1990   }
1991   PetscFunctionReturn(0);
1992 }
1993 
1994 #undef __FUNCT__
1995 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1996 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1997 {
1998   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1999   PetscErrorCode ierr;
2000 
2001   PetscFunctionBegin;
2002   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2003   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2004   PetscFunctionReturn(0);
2005 }
2006 
2007 #undef __FUNCT__
2008 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
2009 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2010 {
2011   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
2012   PetscErrorCode ierr;
2013 
2014   PetscFunctionBegin;
2015   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2016   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2017   PetscFunctionReturn(0);
2018 }
2019 
2020 #undef __FUNCT__
2021 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
2022 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2023 {
2024   PetscErrorCode ierr;
2025   IS             iscol_local;
2026   PetscInt       csize;
2027 
2028   PetscFunctionBegin;
2029   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2030   if (call == MAT_REUSE_MATRIX) {
2031     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2032     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2033   } else {
2034     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2035   }
2036   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2037   if (call == MAT_INITIAL_MATRIX) {
2038     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2039     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2040   }
2041   PetscFunctionReturn(0);
2042 }
2043 
2044 #undef __FUNCT__
2045 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
2046 /*
2047     Not great since it makes two copies of the submatrix, first an SeqBAIJ
2048   in local and then by concatenating the local matrices the end result.
2049   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
2050 */
2051 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2052 {
2053   PetscErrorCode ierr;
2054   PetscMPIInt    rank,size;
2055   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2056   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2057   Mat            *local,M,Mreuse;
2058   MatScalar      *vwork,*aa;
2059   MPI_Comm       comm = ((PetscObject)mat)->comm;
2060   Mat_SeqBAIJ    *aij;
2061 
2062 
2063   PetscFunctionBegin;
2064   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2065   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2066 
2067   if (call ==  MAT_REUSE_MATRIX) {
2068     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2069     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2070     local = &Mreuse;
2071     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2072   } else {
2073     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2074     Mreuse = *local;
2075     ierr   = PetscFree(local);CHKERRQ(ierr);
2076   }
2077 
2078   /*
2079       m - number of local rows
2080       n - number of columns (same on all processors)
2081       rstart - first row in new global matrix generated
2082   */
2083   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2084   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2085   m    = m/bs;
2086   n    = n/bs;
2087 
2088   if (call == MAT_INITIAL_MATRIX) {
2089     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2090     ii  = aij->i;
2091     jj  = aij->j;
2092 
2093     /*
2094         Determine the number of non-zeros in the diagonal and off-diagonal
2095         portions of the matrix in order to do correct preallocation
2096     */
2097 
2098     /* first get start and end of "diagonal" columns */
2099     if (csize == PETSC_DECIDE) {
2100       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2101       if (mglobal == n*bs) { /* square matrix */
2102 	nlocal = m;
2103       } else {
2104         nlocal = n/size + ((n % size) > rank);
2105       }
2106     } else {
2107       nlocal = csize/bs;
2108     }
2109     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2110     rstart = rend - nlocal;
2111     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2112 
2113     /* next, compute all the lengths */
2114     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2115     olens = dlens + m;
2116     for (i=0; i<m; i++) {
2117       jend = ii[i+1] - ii[i];
2118       olen = 0;
2119       dlen = 0;
2120       for (j=0; j<jend; j++) {
2121         if (*jj < rstart || *jj >= rend) olen++;
2122         else dlen++;
2123         jj++;
2124       }
2125       olens[i] = olen;
2126       dlens[i] = dlen;
2127     }
2128     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2129     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2130     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2131     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2132     ierr = PetscFree(dlens);CHKERRQ(ierr);
2133   } else {
2134     PetscInt ml,nl;
2135 
2136     M = *newmat;
2137     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2138     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2139     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2140     /*
2141          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2142        rather than the slower MatSetValues().
2143     */
2144     M->was_assembled = PETSC_TRUE;
2145     M->assembled     = PETSC_FALSE;
2146   }
2147   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2148   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2149   aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2150   ii  = aij->i;
2151   jj  = aij->j;
2152   aa  = aij->a;
2153   for (i=0; i<m; i++) {
2154     row   = rstart/bs + i;
2155     nz    = ii[i+1] - ii[i];
2156     cwork = jj;     jj += nz;
2157     vwork = aa;     aa += nz;
2158     ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2159   }
2160 
2161   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2162   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2163   *newmat = M;
2164 
2165   /* save submatrix used in processor for next request */
2166   if (call ==  MAT_INITIAL_MATRIX) {
2167     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2168     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2169   }
2170 
2171   PetscFunctionReturn(0);
2172 }
2173 
2174 #undef __FUNCT__
2175 #define __FUNCT__ "MatPermute_MPIBAIJ"
2176 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2177 {
2178   MPI_Comm       comm,pcomm;
2179   PetscInt       first,local_size,nrows;
2180   const PetscInt *rows;
2181   PetscMPIInt    size;
2182   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
2183   PetscErrorCode ierr;
2184 
2185   PetscFunctionBegin;
2186   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2187   /* make a collective version of 'rowp' */
2188   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2189   if (pcomm==comm) {
2190     crowp = rowp;
2191   } else {
2192     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2193     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2194     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2195     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2196   }
2197   /* collect the global row permutation and invert it */
2198   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
2199   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
2200   if (pcomm!=comm) {
2201     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2202   }
2203   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
2204   /* get the local target indices */
2205   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
2206   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
2207   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
2208   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
2209   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
2210   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
2211   /* the column permutation is so much easier;
2212      make a local version of 'colp' and invert it */
2213   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2214   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2215   if (size==1) {
2216     lcolp = colp;
2217   } else {
2218     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
2219     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
2220     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr);
2221   }
2222   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2223   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
2224   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
2225   if (size>1) {
2226     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
2227     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2228   }
2229   /* now we just get the submatrix */
2230   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2231   /* clean up */
2232   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
2233   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
2234   PetscFunctionReturn(0);
2235 }
2236 
2237 #undef __FUNCT__
2238 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2239 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2240 {
2241   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*) mat->data;
2242   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
2243 
2244   PetscFunctionBegin;
2245   if (nghosts) { *nghosts = B->nbs;}
2246   if (ghosts) {*ghosts = baij->garray;}
2247   PetscFunctionReturn(0);
2248 }
2249 
2250 extern PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat);
2251 
2252 #undef __FUNCT__
2253 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ"
2254 /*
2255     This routine is almost identical to MatFDColoringCreate_MPIBAIJ()!
2256 */
2257 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
2258 {
2259   Mat_MPIBAIJ            *baij = (Mat_MPIBAIJ*)mat->data;
2260   PetscErrorCode        ierr;
2261   PetscMPIInt           size,*ncolsonproc,*disp,nn;
2262   PetscInt              bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col;
2263   const PetscInt        *is;
2264   PetscInt              nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj;
2265   PetscInt              *rowhit,M,cstart,cend,colb;
2266   PetscInt              *columnsforrow,l;
2267   IS                    *isa;
2268   PetscBool              done,flg;
2269   ISLocalToGlobalMapping map = mat->cmap->bmapping;
2270   PetscInt               *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype;
2271 
2272   PetscFunctionBegin;
2273   if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();");
2274   if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock");
2275 
2276   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
2277   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2278   M                = mat->rmap->n/bs;
2279   cstart           = mat->cmap->rstart/bs;
2280   cend             = mat->cmap->rend/bs;
2281   c->M             = mat->rmap->N/bs;  /* set the global rows and columns and local rows */
2282   c->N             = mat->cmap->N/bs;
2283   c->m             = mat->rmap->n/bs;
2284   c->rstart        = mat->rmap->rstart/bs;
2285 
2286   c->ncolors       = nis;
2287   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
2288   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
2289   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
2290   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
2291   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
2292   ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
2293 
2294   /* Allow access to data structures of local part of matrix */
2295   if (!baij->colmap) {
2296     ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
2297   }
2298   ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2299   ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2300 
2301   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
2302   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
2303 
2304   for (i=0; i<nis; i++) {
2305     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
2306     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
2307     c->ncolumns[i] = n;
2308     if (n) {
2309       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
2310       ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr);
2311       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
2312     } else {
2313       c->columns[i]  = 0;
2314     }
2315 
2316     if (ctype == IS_COLORING_GLOBAL){
2317       /* Determine the total (parallel) number of columns of this color */
2318       ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
2319       ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr);
2320 
2321       nn   = PetscMPIIntCast(n);
2322       ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2323       nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];}
2324       if (!nctot) {
2325         ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
2326       }
2327 
2328       disp[0] = 0;
2329       for (j=1; j<size; j++) {
2330         disp[j] = disp[j-1] + ncolsonproc[j-1];
2331       }
2332 
2333       /* Get complete list of columns for color on each processor */
2334       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2335       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2336       ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr);
2337     } else if (ctype == IS_COLORING_GHOSTED){
2338       /* Determine local number of columns of this color on this process, including ghost points */
2339       nctot = n;
2340       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2341       ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
2342     } else {
2343       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
2344     }
2345 
2346     /*
2347        Mark all rows affect by these columns
2348     */
2349     /* Temporary option to allow for debugging/testing */
2350     flg  = PETSC_FALSE;
2351     ierr = PetscOptionsGetBool(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr);
2352     if (!flg) {/*-----------------------------------------------------------------------------*/
2353       /* crude, fast version */
2354       ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
2355       /* loop over columns*/
2356       for (j=0; j<nctot; j++) {
2357         if (ctype == IS_COLORING_GHOSTED) {
2358           col = ltog[cols[j]];
2359         } else {
2360           col  = cols[j];
2361         }
2362         if (col >= cstart && col < cend) {
2363           /* column is in diagonal block of matrix */
2364           rows = A_cj + A_ci[col-cstart];
2365           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2366         } else {
2367 #if defined (PETSC_USE_CTABLE)
2368           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2369 	  colb --;
2370 #else
2371           colb = baij->colmap[col] - 1;
2372 #endif
2373           if (colb == -1) {
2374             m = 0;
2375           } else {
2376             colb = colb/bs;
2377             rows = B_cj + B_ci[colb];
2378             m    = B_ci[colb+1] - B_ci[colb];
2379           }
2380         }
2381         /* loop over columns marking them in rowhit */
2382         for (k=0; k<m; k++) {
2383           rowhit[*rows++] = col + 1;
2384         }
2385       }
2386 
2387       /* count the number of hits */
2388       nrows = 0;
2389       for (j=0; j<M; j++) {
2390         if (rowhit[j]) nrows++;
2391       }
2392       c->nrows[i]         = nrows;
2393       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2394       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2395       ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2396       nrows = 0;
2397       for (j=0; j<M; j++) {
2398         if (rowhit[j]) {
2399           c->rows[i][nrows]           = j;
2400           c->columnsforrow[i][nrows] = rowhit[j] - 1;
2401           nrows++;
2402         }
2403       }
2404     } else {/*-------------------------------------------------------------------------------*/
2405       /* slow version, using rowhit as a linked list */
2406       PetscInt currentcol,fm,mfm;
2407       rowhit[M] = M;
2408       nrows     = 0;
2409       /* loop over columns*/
2410       for (j=0; j<nctot; j++) {
2411         if (ctype == IS_COLORING_GHOSTED) {
2412           col = ltog[cols[j]];
2413         } else {
2414           col  = cols[j];
2415         }
2416         if (col >= cstart && col < cend) {
2417           /* column is in diagonal block of matrix */
2418           rows = A_cj + A_ci[col-cstart];
2419           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2420         } else {
2421 #if defined (PETSC_USE_CTABLE)
2422           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2423           colb --;
2424 #else
2425           colb = baij->colmap[col] - 1;
2426 #endif
2427           if (colb == -1) {
2428             m = 0;
2429           } else {
2430             colb = colb/bs;
2431             rows = B_cj + B_ci[colb];
2432             m    = B_ci[colb+1] - B_ci[colb];
2433           }
2434         }
2435 
2436         /* loop over columns marking them in rowhit */
2437         fm    = M; /* fm points to first entry in linked list */
2438         for (k=0; k<m; k++) {
2439           currentcol = *rows++;
2440 	  /* is it already in the list? */
2441           do {
2442             mfm  = fm;
2443             fm   = rowhit[fm];
2444           } while (fm < currentcol);
2445           /* not in list so add it */
2446           if (fm != currentcol) {
2447             nrows++;
2448             columnsforrow[currentcol] = col;
2449             /* next three lines insert new entry into linked list */
2450             rowhit[mfm]               = currentcol;
2451             rowhit[currentcol]        = fm;
2452             fm                        = currentcol;
2453             /* fm points to present position in list since we know the columns are sorted */
2454           } else {
2455             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
2456           }
2457         }
2458       }
2459       c->nrows[i]         = nrows;
2460       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2461       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2462       ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2463       /* now store the linked list of rows into c->rows[i] */
2464       nrows = 0;
2465       fm    = rowhit[M];
2466       do {
2467         c->rows[i][nrows]            = fm;
2468         c->columnsforrow[i][nrows++] = columnsforrow[fm];
2469         fm                           = rowhit[fm];
2470       } while (fm < M);
2471     } /* ---------------------------------------------------------------------------------------*/
2472     ierr = PetscFree(cols);CHKERRQ(ierr);
2473   }
2474 
2475   /* Optimize by adding the vscale, and scaleforrow[][] fields */
2476   /*
2477        vscale will contain the "diagonal" on processor scalings followed by the off processor
2478   */
2479   if (ctype == IS_COLORING_GLOBAL) {
2480     PetscInt *garray;
2481     ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2482     for (i=0; i<baij->B->cmap->n/bs; i++) {
2483       for (j=0; j<bs; j++) {
2484         garray[i*bs+j] = bs*baij->garray[i]+j;
2485       }
2486     }
2487     ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr);
2488     ierr = PetscFree(garray);CHKERRQ(ierr);
2489     CHKMEMQ;
2490     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2491     for (k=0; k<c->ncolors; k++) {
2492       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2493       for (l=0; l<c->nrows[k]; l++) {
2494         col = c->columnsforrow[k][l];
2495         if (col >= cstart && col < cend) {
2496           /* column is in diagonal block of matrix */
2497           colb = col - cstart;
2498         } else {
2499           /* column  is in "off-processor" part */
2500 #if defined (PETSC_USE_CTABLE)
2501           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2502           colb --;
2503 #else
2504           colb = baij->colmap[col] - 1;
2505 #endif
2506           colb = colb/bs;
2507           colb += cend - cstart;
2508         }
2509         c->vscaleforrow[k][l] = colb;
2510       }
2511     }
2512   } else if (ctype == IS_COLORING_GHOSTED) {
2513     /* Get gtol mapping */
2514     PetscInt N = mat->cmap->N, *gtol;
2515     ierr = PetscMalloc((N+1)*sizeof(PetscInt),&gtol);CHKERRQ(ierr);
2516     for (i=0; i<N; i++) gtol[i] = -1;
2517     for (i=0; i<map->n; i++) gtol[ltog[i]] = i;
2518 
2519     c->vscale = 0; /* will be created in MatFDColoringApply() */
2520     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2521     for (k=0; k<c->ncolors; k++) {
2522       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2523       for (l=0; l<c->nrows[k]; l++) {
2524         col = c->columnsforrow[k][l];      /* global column index */
2525         c->vscaleforrow[k][l] = gtol[col]; /* local column index */
2526       }
2527     }
2528     ierr = PetscFree(gtol);CHKERRQ(ierr);
2529   }
2530   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
2531 
2532   ierr = PetscFree(rowhit);CHKERRQ(ierr);
2533   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
2534   ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2535   ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2536     CHKMEMQ;
2537   PetscFunctionReturn(0);
2538 }
2539 
2540 #undef __FUNCT__
2541 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ"
2542 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2543 {
2544   Mat            B;
2545   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
2546   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2547   Mat_SeqAIJ     *b;
2548   PetscErrorCode ierr;
2549   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2550   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2551   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2552 
2553   PetscFunctionBegin;
2554   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2555   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2556 
2557   /* ----------------------------------------------------------------
2558      Tell every processor the number of nonzeros per row
2559   */
2560   ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
2561   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2562     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2563   }
2564   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2565   ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr);
2566   displs     = recvcounts + size;
2567   for (i=0; i<size; i++) {
2568     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2569     displs[i]     = A->rmap->range[i]/bs;
2570   }
2571 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2572   ierr  = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2573 #else
2574   ierr  = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2575 #endif
2576   /* ---------------------------------------------------------------
2577      Create the sequential matrix of the same type as the local block diagonal
2578   */
2579   ierr  = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2580   ierr  = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2581   ierr  = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2582   ierr  = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2583   b = (Mat_SeqAIJ *)B->data;
2584 
2585   /*--------------------------------------------------------------------
2586     Copy my part of matrix column indices over
2587   */
2588   sendcount  = ad->nz + bd->nz;
2589   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2590   a_jsendbuf = ad->j;
2591   b_jsendbuf = bd->j;
2592   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2593   cnt        = 0;
2594   for (i=0; i<n; i++) {
2595 
2596     /* put in lower diagonal portion */
2597     m = bd->i[i+1] - bd->i[i];
2598     while (m > 0) {
2599       /* is it above diagonal (in bd (compressed) numbering) */
2600       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2601       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2602       m--;
2603     }
2604 
2605     /* put in diagonal portion */
2606     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2607       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2608     }
2609 
2610     /* put in upper diagonal portion */
2611     while (m-- > 0) {
2612       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2613     }
2614   }
2615   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2616 
2617   /*--------------------------------------------------------------------
2618     Gather all column indices to all processors
2619   */
2620   for (i=0; i<size; i++) {
2621     recvcounts[i] = 0;
2622     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2623       recvcounts[i] += lens[j];
2624     }
2625   }
2626   displs[0]  = 0;
2627   for (i=1; i<size; i++) {
2628     displs[i] = displs[i-1] + recvcounts[i-1];
2629   }
2630 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2631   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2632 #else
2633   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2634 #endif
2635   /*--------------------------------------------------------------------
2636     Assemble the matrix into useable form (note numerical values not yet set)
2637   */
2638   /* set the b->ilen (length of each row) values */
2639   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2640   /* set the b->i indices */
2641   b->i[0] = 0;
2642   for (i=1; i<=A->rmap->N/bs; i++) {
2643     b->i[i] = b->i[i-1] + lens[i-1];
2644   }
2645   ierr = PetscFree(lens);CHKERRQ(ierr);
2646   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2647   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2648   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2649 
2650   if (A->symmetric){
2651     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2652   } else if (A->hermitian) {
2653     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2654   } else if (A->structurally_symmetric) {
2655     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2656   }
2657   *newmat = B;
2658   PetscFunctionReturn(0);
2659 }
2660 
2661 #undef __FUNCT__
2662 #define __FUNCT__ "MatSOR_MPIBAIJ"
2663 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2664 {
2665   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2666   PetscErrorCode ierr;
2667   Vec            bb1 = 0;
2668 
2669   PetscFunctionBegin;
2670   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2671     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2672   }
2673 
2674   if (flag == SOR_APPLY_UPPER) {
2675     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2676     PetscFunctionReturn(0);
2677   }
2678 
2679   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2680     if (flag & SOR_ZERO_INITIAL_GUESS) {
2681       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2682       its--;
2683     }
2684 
2685     while (its--) {
2686       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2687       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2688 
2689       /* update rhs: bb1 = bb - B*x */
2690       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2691       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2692 
2693       /* local sweep */
2694       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2695     }
2696   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
2697     if (flag & SOR_ZERO_INITIAL_GUESS) {
2698       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2699       its--;
2700     }
2701     while (its--) {
2702       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2703       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2704 
2705       /* update rhs: bb1 = bb - B*x */
2706       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2707       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2708 
2709       /* local sweep */
2710       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2711     }
2712   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
2713     if (flag & SOR_ZERO_INITIAL_GUESS) {
2714       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2715       its--;
2716     }
2717     while (its--) {
2718       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2719       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2720 
2721       /* update rhs: bb1 = bb - B*x */
2722       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2723       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2724 
2725       /* local sweep */
2726       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2727     }
2728   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2729 
2730   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2731   PetscFunctionReturn(0);
2732 }
2733 
2734 extern PetscErrorCode  MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2735 
2736 #undef __FUNCT__
2737 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ"
2738 PetscErrorCode  MatInvertBlockDiagonal_MPIBAIJ(Mat A,PetscScalar **values)
2739 {
2740   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2741   PetscErrorCode ierr;
2742 
2743   PetscFunctionBegin;
2744   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2745   PetscFunctionReturn(0);
2746 }
2747 
2748 
2749 /* -------------------------------------------------------------------*/
2750 static struct _MatOps MatOps_Values = {
2751        MatSetValues_MPIBAIJ,
2752        MatGetRow_MPIBAIJ,
2753        MatRestoreRow_MPIBAIJ,
2754        MatMult_MPIBAIJ,
2755 /* 4*/ MatMultAdd_MPIBAIJ,
2756        MatMultTranspose_MPIBAIJ,
2757        MatMultTransposeAdd_MPIBAIJ,
2758        0,
2759        0,
2760        0,
2761 /*10*/ 0,
2762        0,
2763        0,
2764        MatSOR_MPIBAIJ,
2765        MatTranspose_MPIBAIJ,
2766 /*15*/ MatGetInfo_MPIBAIJ,
2767        MatEqual_MPIBAIJ,
2768        MatGetDiagonal_MPIBAIJ,
2769        MatDiagonalScale_MPIBAIJ,
2770        MatNorm_MPIBAIJ,
2771 /*20*/ MatAssemblyBegin_MPIBAIJ,
2772        MatAssemblyEnd_MPIBAIJ,
2773        MatSetOption_MPIBAIJ,
2774        MatZeroEntries_MPIBAIJ,
2775 /*24*/ MatZeroRows_MPIBAIJ,
2776        0,
2777        0,
2778        0,
2779        0,
2780 /*29*/ MatSetUp_MPIBAIJ,
2781        0,
2782        0,
2783        0,
2784        0,
2785 /*34*/ MatDuplicate_MPIBAIJ,
2786        0,
2787        0,
2788        0,
2789        0,
2790 /*39*/ MatAXPY_MPIBAIJ,
2791        MatGetSubMatrices_MPIBAIJ,
2792        MatIncreaseOverlap_MPIBAIJ,
2793        MatGetValues_MPIBAIJ,
2794        MatCopy_MPIBAIJ,
2795 /*44*/ 0,
2796        MatScale_MPIBAIJ,
2797        0,
2798        0,
2799        0,
2800 /*49*/ 0,
2801        0,
2802        0,
2803        0,
2804        0,
2805 /*54*/ MatFDColoringCreate_MPIBAIJ,
2806        0,
2807        MatSetUnfactored_MPIBAIJ,
2808        MatPermute_MPIBAIJ,
2809        MatSetValuesBlocked_MPIBAIJ,
2810 /*59*/ MatGetSubMatrix_MPIBAIJ,
2811        MatDestroy_MPIBAIJ,
2812        MatView_MPIBAIJ,
2813        0,
2814        0,
2815 /*64*/ 0,
2816        0,
2817        0,
2818        0,
2819        0,
2820 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2821        0,
2822        0,
2823        0,
2824        0,
2825 /*74*/ 0,
2826        MatFDColoringApply_BAIJ,
2827        0,
2828        0,
2829        0,
2830 /*79*/ 0,
2831        0,
2832        0,
2833        0,
2834        MatLoad_MPIBAIJ,
2835 /*84*/ 0,
2836        0,
2837        0,
2838        0,
2839        0,
2840 /*89*/ 0,
2841        0,
2842        0,
2843        0,
2844        0,
2845 /*94*/ 0,
2846        0,
2847        0,
2848        0,
2849        0,
2850 /*99*/ 0,
2851        0,
2852        0,
2853        0,
2854        0,
2855 /*104*/0,
2856        MatRealPart_MPIBAIJ,
2857        MatImaginaryPart_MPIBAIJ,
2858        0,
2859        0,
2860 /*109*/0,
2861        0,
2862        0,
2863        0,
2864        0,
2865 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2866        0,
2867        MatGetGhosts_MPIBAIJ,
2868        0,
2869        0,
2870 /*119*/0,
2871        0,
2872        0,
2873        0,
2874        0,
2875 /*124*/0,
2876        0,
2877        MatInvertBlockDiagonal_MPIBAIJ
2878 };
2879 
2880 EXTERN_C_BEGIN
2881 #undef __FUNCT__
2882 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2883 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2884 {
2885   PetscFunctionBegin;
2886   *a = ((Mat_MPIBAIJ *)A->data)->A;
2887   PetscFunctionReturn(0);
2888 }
2889 EXTERN_C_END
2890 
2891 EXTERN_C_BEGIN
2892 extern PetscErrorCode  MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2893 EXTERN_C_END
2894 
2895 EXTERN_C_BEGIN
2896 #undef __FUNCT__
2897 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2898 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2899 {
2900   PetscInt       m,rstart,cstart,cend;
2901   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2902   const PetscInt *JJ=0;
2903   PetscScalar    *values=0;
2904   PetscErrorCode ierr;
2905 
2906   PetscFunctionBegin;
2907   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2908   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2909   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2910   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2911   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2912   m      = B->rmap->n/bs;
2913   rstart = B->rmap->rstart/bs;
2914   cstart = B->cmap->rstart/bs;
2915   cend   = B->cmap->rend/bs;
2916 
2917   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2918   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
2919   for (i=0; i<m; i++) {
2920     nz = ii[i+1] - ii[i];
2921     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2922     nz_max = PetscMax(nz_max,nz);
2923     JJ  = jj + ii[i];
2924     for (j=0; j<nz; j++) {
2925       if (*JJ >= cstart) break;
2926       JJ++;
2927     }
2928     d = 0;
2929     for (; j<nz; j++) {
2930       if (*JJ++ >= cend) break;
2931       d++;
2932     }
2933     d_nnz[i] = d;
2934     o_nnz[i] = nz - d;
2935   }
2936   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2937   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2938 
2939   values = (PetscScalar*)V;
2940   if (!values) {
2941     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2942     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2943   }
2944   for (i=0; i<m; i++) {
2945     PetscInt          row    = i + rstart;
2946     PetscInt          ncols  = ii[i+1] - ii[i];
2947     const PetscInt    *icols = jj + ii[i];
2948     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2949     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2950   }
2951 
2952   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2953   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2954   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2955   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2956   PetscFunctionReturn(0);
2957 }
2958 EXTERN_C_END
2959 
2960 #undef __FUNCT__
2961 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2962 /*@C
2963    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2964    (the default parallel PETSc format).
2965 
2966    Collective on MPI_Comm
2967 
2968    Input Parameters:
2969 +  A - the matrix
2970 .  bs - the block size
2971 .  i - the indices into j for the start of each local row (starts with zero)
2972 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2973 -  v - optional values in the matrix
2974 
2975    Level: developer
2976 
2977 .keywords: matrix, aij, compressed row, sparse, parallel
2978 
2979 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2980 @*/
2981 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2982 {
2983   PetscErrorCode ierr;
2984 
2985   PetscFunctionBegin;
2986   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2987   PetscValidType(B,1);
2988   PetscValidLogicalCollectiveInt(B,bs,2);
2989   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2990   PetscFunctionReturn(0);
2991 }
2992 
2993 EXTERN_C_BEGIN
2994 #undef __FUNCT__
2995 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2996 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2997 {
2998   Mat_MPIBAIJ    *b;
2999   PetscErrorCode ierr;
3000   PetscInt       i;
3001   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3002 
3003   PetscFunctionBegin;
3004   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3005   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3006 
3007   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3008   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3009   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3010   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3011 
3012   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3013   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3014   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3015   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3016   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3017 
3018   if (d_nnz) {
3019     for (i=0; i<B->rmap->n/bs; i++) {
3020       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
3021     }
3022   }
3023   if (o_nnz) {
3024     for (i=0; i<B->rmap->n/bs; i++) {
3025       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
3026     }
3027   }
3028 
3029   b = (Mat_MPIBAIJ*)B->data;
3030   b->bs2 = bs*bs;
3031   b->mbs = B->rmap->n/bs;
3032   b->nbs = B->cmap->n/bs;
3033   b->Mbs = B->rmap->N/bs;
3034   b->Nbs = B->cmap->N/bs;
3035 
3036   for (i=0; i<=b->size; i++) {
3037     b->rangebs[i] = B->rmap->range[i]/bs;
3038   }
3039   b->rstartbs = B->rmap->rstart/bs;
3040   b->rendbs   = B->rmap->rend/bs;
3041   b->cstartbs = B->cmap->rstart/bs;
3042   b->cendbs   = B->cmap->rend/bs;
3043 
3044   if (!B->preallocated) {
3045     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3046     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3047     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
3048     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3049     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3050     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3051     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
3052     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3053     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
3054   }
3055 
3056   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3057   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
3058   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3059   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3060   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3061   B->preallocated = PETSC_TRUE;
3062   PetscFunctionReturn(0);
3063 }
3064 EXTERN_C_END
3065 
3066 EXTERN_C_BEGIN
3067 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
3068 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
3069 EXTERN_C_END
3070 
3071 
3072 EXTERN_C_BEGIN
3073 #undef __FUNCT__
3074 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
3075 PetscErrorCode  MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj)
3076 {
3077   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
3078   PetscErrorCode ierr;
3079   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
3080   PetscInt       M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
3081   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
3082 
3083   PetscFunctionBegin;
3084   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr);
3085   ii[0] = 0;
3086   CHKMEMQ;
3087   for (i=0; i<M; i++) {
3088     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
3089     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
3090     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
3091     /* remove one from count of matrix has diagonal */
3092     for (j=id[i]; j<id[i+1]; j++) {
3093       if (jd[j] == i) {ii[i+1]--;break;}
3094     }
3095   CHKMEMQ;
3096   }
3097   ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3098   cnt = 0;
3099   for (i=0; i<M; i++) {
3100     for (j=io[i]; j<io[i+1]; j++) {
3101       if (garray[jo[j]] > rstart) break;
3102       jj[cnt++] = garray[jo[j]];
3103   CHKMEMQ;
3104     }
3105     for (k=id[i]; k<id[i+1]; k++) {
3106       if (jd[k] != i) {
3107         jj[cnt++] = rstart + jd[k];
3108   CHKMEMQ;
3109       }
3110     }
3111     for (;j<io[i+1]; j++) {
3112       jj[cnt++] = garray[jo[j]];
3113   CHKMEMQ;
3114     }
3115   }
3116   ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr);
3117   PetscFunctionReturn(0);
3118 }
3119 EXTERN_C_END
3120 
3121 #include <../src/mat/impls/aij/mpi/mpiaij.h>
3122 EXTERN_C_BEGIN
3123 PetscErrorCode  MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*);
3124 EXTERN_C_END
3125 
3126 EXTERN_C_BEGIN
3127 #undef __FUNCT__
3128 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ"
3129 PetscErrorCode  MatConvert_MPIBAIJ_MPIAIJ(Mat A,const MatType newtype,MatReuse reuse,Mat *newmat)
3130 {
3131   PetscErrorCode ierr;
3132   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3133   Mat            B;
3134   Mat_MPIAIJ     *b;
3135 
3136   PetscFunctionBegin;
3137   if (!A->assembled) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Matrix must be assembled");
3138 
3139   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
3140   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3141   ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
3142   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3143   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
3144   b = (Mat_MPIAIJ*) B->data;
3145 
3146   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
3147   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
3148   ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
3149   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
3150   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
3151   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3152   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3153   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3154   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3155   if (reuse == MAT_REUSE_MATRIX) {
3156     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
3157   } else {
3158    *newmat = B;
3159   }
3160   PetscFunctionReturn(0);
3161 }
3162 EXTERN_C_END
3163 
3164 EXTERN_C_BEGIN
3165 #if defined(PETSC_HAVE_MUMPS)
3166 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3167 #endif
3168 EXTERN_C_END
3169 
3170 /*MC
3171    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
3172 
3173    Options Database Keys:
3174 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
3175 . -mat_block_size <bs> - set the blocksize used to store the matrix
3176 - -mat_use_hash_table <fact>
3177 
3178   Level: beginner
3179 
3180 .seealso: MatCreateMPIBAIJ
3181 M*/
3182 
3183 EXTERN_C_BEGIN
3184 extern PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,const MatType,MatReuse,Mat*);
3185 EXTERN_C_END
3186 
3187 EXTERN_C_BEGIN
3188 #undef __FUNCT__
3189 #define __FUNCT__ "MatCreate_MPIBAIJ"
3190 PetscErrorCode  MatCreate_MPIBAIJ(Mat B)
3191 {
3192   Mat_MPIBAIJ    *b;
3193   PetscErrorCode ierr;
3194   PetscBool      flg;
3195 
3196   PetscFunctionBegin;
3197   ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
3198   B->data = (void*)b;
3199 
3200   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3201   B->assembled  = PETSC_FALSE;
3202 
3203   B->insertmode = NOT_SET_VALUES;
3204   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
3205   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
3206 
3207   /* build local table of row and column ownerships */
3208   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
3209 
3210   /* build cache for off array entries formed */
3211   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
3212   b->donotstash  = PETSC_FALSE;
3213   b->colmap      = PETSC_NULL;
3214   b->garray      = PETSC_NULL;
3215   b->roworiented = PETSC_TRUE;
3216 
3217   /* stuff used in block assembly */
3218   b->barray       = 0;
3219 
3220   /* stuff used for matrix vector multiply */
3221   b->lvec         = 0;
3222   b->Mvctx        = 0;
3223 
3224   /* stuff for MatGetRow() */
3225   b->rowindices   = 0;
3226   b->rowvalues    = 0;
3227   b->getrowactive = PETSC_FALSE;
3228 
3229   /* hash table stuff */
3230   b->ht           = 0;
3231   b->hd           = 0;
3232   b->ht_size      = 0;
3233   b->ht_flag      = PETSC_FALSE;
3234   b->ht_fact      = 0;
3235   b->ht_total_ct  = 0;
3236   b->ht_insert_ct = 0;
3237 
3238   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
3239   b->ijonly       = PETSC_FALSE;
3240 
3241   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3242     ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3243     if (flg) {
3244       PetscReal fact = 1.39;
3245       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3246       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
3247       if (fact <= 1.0) fact = 1.39;
3248       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3249       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3250     }
3251   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3252 
3253 #if defined(PETSC_HAVE_MUMPS)
3254   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr);
3255 #endif
3256   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",
3257                                      "MatConvert_MPIBAIJ_MPIAdj",
3258                                       MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3259   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",
3260                                      "MatConvert_MPIBAIJ_MPIAIJ",
3261                                       MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3262   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",
3263                                      "MatConvert_MPIBAIJ_MPISBAIJ",
3264                                       MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3265   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3266                                      "MatStoreValues_MPIBAIJ",
3267                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3268   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3269                                      "MatRetrieveValues_MPIBAIJ",
3270                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3271   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3272                                      "MatGetDiagonalBlock_MPIBAIJ",
3273                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3274   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
3275                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
3276                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3277   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
3278 				     "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
3279 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3280   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3281                                      "MatDiagonalScaleLocal_MPIBAIJ",
3282                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3283   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
3284                                      "MatSetHashTableFactor_MPIBAIJ",
3285                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3286   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",
3287                                      "MatConvert_MPIBAIJ_MPIBSTRM",
3288                                       MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr);
3289   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3290   PetscFunctionReturn(0);
3291 }
3292 EXTERN_C_END
3293 
3294 /*MC
3295    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3296 
3297    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3298    and MATMPIBAIJ otherwise.
3299 
3300    Options Database Keys:
3301 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3302 
3303   Level: beginner
3304 
3305 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3306 M*/
3307 
3308 #undef __FUNCT__
3309 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3310 /*@C
3311    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3312    (block compressed row).  For good matrix assembly performance
3313    the user should preallocate the matrix storage by setting the parameters
3314    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3315    performance can be increased by more than a factor of 50.
3316 
3317    Collective on Mat
3318 
3319    Input Parameters:
3320 +  A - the matrix
3321 .  bs   - size of blockk
3322 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3323            submatrix  (same for all local rows)
3324 .  d_nnz - array containing the number of block nonzeros in the various block rows
3325            of the in diagonal portion of the local (possibly different for each block
3326            row) or PETSC_NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3327            set it even if it is zero.
3328 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3329            submatrix (same for all local rows).
3330 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3331            off-diagonal portion of the local submatrix (possibly different for
3332            each block row) or PETSC_NULL.
3333 
3334    If the *_nnz parameter is given then the *_nz parameter is ignored
3335 
3336    Options Database Keys:
3337 +   -mat_block_size - size of the blocks to use
3338 -   -mat_use_hash_table <fact>
3339 
3340    Notes:
3341    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3342    than it must be used on all processors that share the object for that argument.
3343 
3344    Storage Information:
3345    For a square global matrix we define each processor's diagonal portion
3346    to be its local rows and the corresponding columns (a square submatrix);
3347    each processor's off-diagonal portion encompasses the remainder of the
3348    local matrix (a rectangular submatrix).
3349 
3350    The user can specify preallocated storage for the diagonal part of
3351    the local submatrix with either d_nz or d_nnz (not both).  Set
3352    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3353    memory allocation.  Likewise, specify preallocated storage for the
3354    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3355 
3356    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3357    the figure below we depict these three local rows and all columns (0-11).
3358 
3359 .vb
3360            0 1 2 3 4 5 6 7 8 9 10 11
3361           -------------------
3362    row 3  |  o o o d d d o o o o o o
3363    row 4  |  o o o d d d o o o o o o
3364    row 5  |  o o o d d d o o o o o o
3365           -------------------
3366 .ve
3367 
3368    Thus, any entries in the d locations are stored in the d (diagonal)
3369    submatrix, and any entries in the o locations are stored in the
3370    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3371    stored simply in the MATSEQBAIJ format for compressed row storage.
3372 
3373    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3374    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3375    In general, for PDE problems in which most nonzeros are near the diagonal,
3376    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3377    or you will get TERRIBLE performance; see the users' manual chapter on
3378    matrices.
3379 
3380    You can call MatGetInfo() to get information on how effective the preallocation was;
3381    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3382    You can also run with the option -info and look for messages with the string
3383    malloc in them to see if additional memory allocation was needed.
3384 
3385    Level: intermediate
3386 
3387 .keywords: matrix, block, aij, compressed row, sparse, parallel
3388 
3389 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR()
3390 @*/
3391 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3392 {
3393   PetscErrorCode ierr;
3394 
3395   PetscFunctionBegin;
3396   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3397   PetscValidType(B,1);
3398   PetscValidLogicalCollectiveInt(B,bs,2);
3399   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3400   PetscFunctionReturn(0);
3401 }
3402 
3403 #undef __FUNCT__
3404 #define __FUNCT__ "MatCreateBAIJ"
3405 /*@C
3406    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3407    (block compressed row).  For good matrix assembly performance
3408    the user should preallocate the matrix storage by setting the parameters
3409    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3410    performance can be increased by more than a factor of 50.
3411 
3412    Collective on MPI_Comm
3413 
3414    Input Parameters:
3415 +  comm - MPI communicator
3416 .  bs   - size of blockk
3417 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3418            This value should be the same as the local size used in creating the
3419            y vector for the matrix-vector product y = Ax.
3420 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3421            This value should be the same as the local size used in creating the
3422            x vector for the matrix-vector product y = Ax.
3423 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3424 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3425 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3426            submatrix  (same for all local rows)
3427 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3428            of the in diagonal portion of the local (possibly different for each block
3429            row) or PETSC_NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3430            and set it even if it is zero.
3431 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3432            submatrix (same for all local rows).
3433 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3434            off-diagonal portion of the local submatrix (possibly different for
3435            each block row) or PETSC_NULL.
3436 
3437    Output Parameter:
3438 .  A - the matrix
3439 
3440    Options Database Keys:
3441 +   -mat_block_size - size of the blocks to use
3442 -   -mat_use_hash_table <fact>
3443 
3444    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3445    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3446    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3447 
3448    Notes:
3449    If the *_nnz parameter is given then the *_nz parameter is ignored
3450 
3451    A nonzero block is any block that as 1 or more nonzeros in it
3452 
3453    The user MUST specify either the local or global matrix dimensions
3454    (possibly both).
3455 
3456    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3457    than it must be used on all processors that share the object for that argument.
3458 
3459    Storage Information:
3460    For a square global matrix we define each processor's diagonal portion
3461    to be its local rows and the corresponding columns (a square submatrix);
3462    each processor's off-diagonal portion encompasses the remainder of the
3463    local matrix (a rectangular submatrix).
3464 
3465    The user can specify preallocated storage for the diagonal part of
3466    the local submatrix with either d_nz or d_nnz (not both).  Set
3467    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3468    memory allocation.  Likewise, specify preallocated storage for the
3469    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3470 
3471    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3472    the figure below we depict these three local rows and all columns (0-11).
3473 
3474 .vb
3475            0 1 2 3 4 5 6 7 8 9 10 11
3476           -------------------
3477    row 3  |  o o o d d d o o o o o o
3478    row 4  |  o o o d d d o o o o o o
3479    row 5  |  o o o d d d o o o o o o
3480           -------------------
3481 .ve
3482 
3483    Thus, any entries in the d locations are stored in the d (diagonal)
3484    submatrix, and any entries in the o locations are stored in the
3485    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3486    stored simply in the MATSEQBAIJ format for compressed row storage.
3487 
3488    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3489    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3490    In general, for PDE problems in which most nonzeros are near the diagonal,
3491    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3492    or you will get TERRIBLE performance; see the users' manual chapter on
3493    matrices.
3494 
3495    Level: intermediate
3496 
3497 .keywords: matrix, block, aij, compressed row, sparse, parallel
3498 
3499 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3500 @*/
3501 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3502 {
3503   PetscErrorCode ierr;
3504   PetscMPIInt    size;
3505 
3506   PetscFunctionBegin;
3507   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3508   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3509   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3510   if (size > 1) {
3511     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3512     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3513   } else {
3514     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3515     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3516   }
3517   PetscFunctionReturn(0);
3518 }
3519 
3520 #undef __FUNCT__
3521 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3522 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3523 {
3524   Mat            mat;
3525   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3526   PetscErrorCode ierr;
3527   PetscInt       len=0;
3528 
3529   PetscFunctionBegin;
3530   *newmat       = 0;
3531   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3532   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3533   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3534   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3535 
3536   mat->factortype   = matin->factortype;
3537   mat->preallocated = PETSC_TRUE;
3538   mat->assembled    = PETSC_TRUE;
3539   mat->insertmode   = NOT_SET_VALUES;
3540 
3541   a      = (Mat_MPIBAIJ*)mat->data;
3542   mat->rmap->bs  = matin->rmap->bs;
3543   a->bs2   = oldmat->bs2;
3544   a->mbs   = oldmat->mbs;
3545   a->nbs   = oldmat->nbs;
3546   a->Mbs   = oldmat->Mbs;
3547   a->Nbs   = oldmat->Nbs;
3548 
3549   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3550   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3551 
3552   a->size         = oldmat->size;
3553   a->rank         = oldmat->rank;
3554   a->donotstash   = oldmat->donotstash;
3555   a->roworiented  = oldmat->roworiented;
3556   a->rowindices   = 0;
3557   a->rowvalues    = 0;
3558   a->getrowactive = PETSC_FALSE;
3559   a->barray       = 0;
3560   a->rstartbs     = oldmat->rstartbs;
3561   a->rendbs       = oldmat->rendbs;
3562   a->cstartbs     = oldmat->cstartbs;
3563   a->cendbs       = oldmat->cendbs;
3564 
3565   /* hash table stuff */
3566   a->ht           = 0;
3567   a->hd           = 0;
3568   a->ht_size      = 0;
3569   a->ht_flag      = oldmat->ht_flag;
3570   a->ht_fact      = oldmat->ht_fact;
3571   a->ht_total_ct  = 0;
3572   a->ht_insert_ct = 0;
3573 
3574   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3575   if (oldmat->colmap) {
3576 #if defined (PETSC_USE_CTABLE)
3577   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3578 #else
3579   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3580   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3581   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3582 #endif
3583   } else a->colmap = 0;
3584 
3585   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3586     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3587     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3588     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3589   } else a->garray = 0;
3590 
3591   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3592   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3593   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3594   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3595   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3596 
3597   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3598   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3599   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3600   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3601   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3602   *newmat = mat;
3603 
3604   PetscFunctionReturn(0);
3605 }
3606 
3607 #undef __FUNCT__
3608 #define __FUNCT__ "MatLoad_MPIBAIJ"
3609 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3610 {
3611   PetscErrorCode ierr;
3612   int            fd;
3613   PetscInt       i,nz,j,rstart,rend;
3614   PetscScalar    *vals,*buf;
3615   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3616   MPI_Status     status;
3617   PetscMPIInt    rank,size,maxnz;
3618   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3619   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
3620   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3621   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
3622   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
3623   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3624 
3625   PetscFunctionBegin;
3626   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3627     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3628   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3629 
3630   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3631   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3632   if (!rank) {
3633     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3634     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3635     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3636   }
3637 
3638   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3639 
3640   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3641   M = header[1]; N = header[2];
3642 
3643   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3644   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3645   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3646 
3647   /* If global sizes are set, check if they are consistent with that given in the file */
3648   if (sizesset) {
3649     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3650   }
3651   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3652   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3653 
3654   if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices");
3655 
3656   /*
3657      This code adds extra rows to make sure the number of rows is
3658      divisible by the blocksize
3659   */
3660   Mbs        = M/bs;
3661   extra_rows = bs - M + bs*Mbs;
3662   if (extra_rows == bs) extra_rows = 0;
3663   else                  Mbs++;
3664   if (extra_rows && !rank) {
3665     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3666   }
3667 
3668   /* determine ownership of all rows */
3669   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3670     mbs        = Mbs/size + ((Mbs % size) > rank);
3671     m          = mbs*bs;
3672   } else { /* User set */
3673     m          = newmat->rmap->n;
3674     mbs        = m/bs;
3675   }
3676   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
3677   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3678 
3679   /* process 0 needs enough room for process with most rows */
3680   if (!rank) {
3681     mmax = rowners[1];
3682     for (i=2; i<size; i++) {
3683       mmax = PetscMax(mmax,rowners[i]);
3684     }
3685     mmax*=bs;
3686   } else mmax = m;
3687 
3688   rowners[0] = 0;
3689   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
3690   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
3691   rstart = rowners[rank];
3692   rend   = rowners[rank+1];
3693 
3694   /* distribute row lengths to all processors */
3695   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
3696   if (!rank) {
3697     mend = m;
3698     if (size == 1) mend = mend - extra_rows;
3699     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3700     for (j=mend; j<m; j++) locrowlens[j] = 1;
3701     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3702     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3703     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3704     for (j=0; j<m; j++) {
3705       procsnz[0] += locrowlens[j];
3706     }
3707     for (i=1; i<size; i++) {
3708       mend = browners[i+1] - browners[i];
3709       if (i == size-1) mend = mend - extra_rows;
3710       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3711       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3712       /* calculate the number of nonzeros on each processor */
3713       for (j=0; j<browners[i+1]-browners[i]; j++) {
3714         procsnz[i] += rowlengths[j];
3715       }
3716       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3717     }
3718     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3719   } else {
3720     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3721   }
3722 
3723   if (!rank) {
3724     /* determine max buffer needed and allocate it */
3725     maxnz = procsnz[0];
3726     for (i=1; i<size; i++) {
3727       maxnz = PetscMax(maxnz,procsnz[i]);
3728     }
3729     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3730 
3731     /* read in my part of the matrix column indices  */
3732     nz     = procsnz[0];
3733     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3734     mycols = ibuf;
3735     if (size == 1)  nz -= extra_rows;
3736     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3737     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
3738 
3739     /* read in every ones (except the last) and ship off */
3740     for (i=1; i<size-1; i++) {
3741       nz   = procsnz[i];
3742       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3743       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3744     }
3745     /* read in the stuff for the last proc */
3746     if (size != 1) {
3747       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3748       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3749       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3750       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3751     }
3752     ierr = PetscFree(cols);CHKERRQ(ierr);
3753   } else {
3754     /* determine buffer space needed for message */
3755     nz = 0;
3756     for (i=0; i<m; i++) {
3757       nz += locrowlens[i];
3758     }
3759     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3760     mycols = ibuf;
3761     /* receive message of column indices*/
3762     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3763     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3764     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3765   }
3766 
3767   /* loop over local rows, determining number of off diagonal entries */
3768   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
3769   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
3770   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3771   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3772   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3773   rowcount = 0; nzcount = 0;
3774   for (i=0; i<mbs; i++) {
3775     dcount  = 0;
3776     odcount = 0;
3777     for (j=0; j<bs; j++) {
3778       kmax = locrowlens[rowcount];
3779       for (k=0; k<kmax; k++) {
3780         tmp = mycols[nzcount++]/bs;
3781         if (!mask[tmp]) {
3782           mask[tmp] = 1;
3783           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3784           else masked1[dcount++] = tmp;
3785         }
3786       }
3787       rowcount++;
3788     }
3789 
3790     dlens[i]  = dcount;
3791     odlens[i] = odcount;
3792 
3793     /* zero out the mask elements we set */
3794     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3795     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3796   }
3797 
3798 
3799   if (!sizesset) {
3800     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3801   }
3802   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3803 
3804   if (!rank) {
3805     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3806     /* read in my part of the matrix numerical values  */
3807     nz = procsnz[0];
3808     vals = buf;
3809     mycols = ibuf;
3810     if (size == 1)  nz -= extra_rows;
3811     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3812     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
3813 
3814     /* insert into matrix */
3815     jj      = rstart*bs;
3816     for (i=0; i<m; i++) {
3817       ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3818       mycols += locrowlens[i];
3819       vals   += locrowlens[i];
3820       jj++;
3821     }
3822     /* read in other processors (except the last one) and ship out */
3823     for (i=1; i<size-1; i++) {
3824       nz   = procsnz[i];
3825       vals = buf;
3826       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3827       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3828     }
3829     /* the last proc */
3830     if (size != 1){
3831       nz   = procsnz[i] - extra_rows;
3832       vals = buf;
3833       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3834       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3835       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3836     }
3837     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3838   } else {
3839     /* receive numeric values */
3840     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3841 
3842     /* receive message of values*/
3843     vals   = buf;
3844     mycols = ibuf;
3845     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
3846     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
3847     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3848 
3849     /* insert into matrix */
3850     jj      = rstart*bs;
3851     for (i=0; i<m; i++) {
3852       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3853       mycols += locrowlens[i];
3854       vals   += locrowlens[i];
3855       jj++;
3856     }
3857   }
3858   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3859   ierr = PetscFree(buf);CHKERRQ(ierr);
3860   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3861   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3862   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3863   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3864   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3865   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3866 
3867   PetscFunctionReturn(0);
3868 }
3869 
3870 #undef __FUNCT__
3871 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3872 /*@
3873    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3874 
3875    Input Parameters:
3876 .  mat  - the matrix
3877 .  fact - factor
3878 
3879    Not Collective, each process can use a different factor
3880 
3881    Level: advanced
3882 
3883   Notes:
3884    This can also be set by the command line option: -mat_use_hash_table <fact>
3885 
3886 .keywords: matrix, hashtable, factor, HT
3887 
3888 .seealso: MatSetOption()
3889 @*/
3890 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3891 {
3892   PetscErrorCode ierr;
3893 
3894   PetscFunctionBegin;
3895   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3896   PetscFunctionReturn(0);
3897 }
3898 
3899 EXTERN_C_BEGIN
3900 #undef __FUNCT__
3901 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3902 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3903 {
3904   Mat_MPIBAIJ *baij;
3905 
3906   PetscFunctionBegin;
3907   baij = (Mat_MPIBAIJ*)mat->data;
3908   baij->ht_fact = fact;
3909   PetscFunctionReturn(0);
3910 }
3911 EXTERN_C_END
3912 
3913 #undef __FUNCT__
3914 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3915 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3916 {
3917   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3918   PetscFunctionBegin;
3919   *Ad     = a->A;
3920   *Ao     = a->B;
3921   *colmap = a->garray;
3922   PetscFunctionReturn(0);
3923 }
3924 
3925 /*
3926     Special version for direct calls from Fortran (to eliminate two function call overheads
3927 */
3928 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3929 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3930 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3931 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3932 #endif
3933 
3934 #undef __FUNCT__
3935 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3936 /*@C
3937   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3938 
3939   Collective on Mat
3940 
3941   Input Parameters:
3942 + mat - the matrix
3943 . min - number of input rows
3944 . im - input rows
3945 . nin - number of input columns
3946 . in - input columns
3947 . v - numerical values input
3948 - addvin - INSERT_VALUES or ADD_VALUES
3949 
3950   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3951 
3952   Level: advanced
3953 
3954 .seealso:   MatSetValuesBlocked()
3955 @*/
3956 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3957 {
3958   /* convert input arguments to C version */
3959   Mat             mat = *matin;
3960   PetscInt        m = *min, n = *nin;
3961   InsertMode      addv = *addvin;
3962 
3963   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3964   const MatScalar *value;
3965   MatScalar       *barray=baij->barray;
3966   PetscBool       roworiented = baij->roworiented;
3967   PetscErrorCode  ierr;
3968   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3969   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3970   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3971 
3972   PetscFunctionBegin;
3973   /* tasks normally handled by MatSetValuesBlocked() */
3974   if (mat->insertmode == NOT_SET_VALUES) {
3975     mat->insertmode = addv;
3976   }
3977 #if defined(PETSC_USE_DEBUG)
3978   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3979   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3980 #endif
3981   if (mat->assembled) {
3982     mat->was_assembled = PETSC_TRUE;
3983     mat->assembled     = PETSC_FALSE;
3984   }
3985   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3986 
3987 
3988   if(!barray) {
3989     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
3990     baij->barray = barray;
3991   }
3992 
3993   if (roworiented) {
3994     stepval = (n-1)*bs;
3995   } else {
3996     stepval = (m-1)*bs;
3997   }
3998   for (i=0; i<m; i++) {
3999     if (im[i] < 0) continue;
4000 #if defined(PETSC_USE_DEBUG)
4001     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
4002 #endif
4003     if (im[i] >= rstart && im[i] < rend) {
4004       row = im[i] - rstart;
4005       for (j=0; j<n; j++) {
4006         /* If NumCol = 1 then a copy is not required */
4007         if ((roworiented) && (n == 1)) {
4008           barray = (MatScalar*)v + i*bs2;
4009         } else if((!roworiented) && (m == 1)) {
4010           barray = (MatScalar*)v + j*bs2;
4011         } else { /* Here a copy is required */
4012           if (roworiented) {
4013             value = v + i*(stepval+bs)*bs + j*bs;
4014           } else {
4015             value = v + j*(stepval+bs)*bs + i*bs;
4016           }
4017           for (ii=0; ii<bs; ii++,value+=stepval) {
4018             for (jj=0; jj<bs; jj++) {
4019               *barray++  = *value++;
4020             }
4021           }
4022           barray -=bs2;
4023         }
4024 
4025         if (in[j] >= cstart && in[j] < cend){
4026           col  = in[j] - cstart;
4027           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4028         }
4029         else if (in[j] < 0) continue;
4030 #if defined(PETSC_USE_DEBUG)
4031         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
4032 #endif
4033         else {
4034           if (mat->was_assembled) {
4035             if (!baij->colmap) {
4036               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
4037             }
4038 
4039 #if defined(PETSC_USE_DEBUG)
4040 #if defined (PETSC_USE_CTABLE)
4041             { PetscInt data;
4042               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
4043               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4044             }
4045 #else
4046             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4047 #endif
4048 #endif
4049 #if defined (PETSC_USE_CTABLE)
4050 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4051             col  = (col - 1)/bs;
4052 #else
4053             col = (baij->colmap[in[j]] - 1)/bs;
4054 #endif
4055             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
4056               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
4057               col =  in[j];
4058             }
4059           }
4060           else col = in[j];
4061           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4062         }
4063       }
4064     } else {
4065       if (!baij->donotstash) {
4066         if (roworiented) {
4067           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4068         } else {
4069           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4070         }
4071       }
4072     }
4073   }
4074 
4075   /* task normally handled by MatSetValuesBlocked() */
4076   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
4077   PetscFunctionReturn(0);
4078 }
4079 
4080 #undef __FUNCT__
4081 #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
4082 /*@
4083      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
4084          CSR format the local rows.
4085 
4086    Collective on MPI_Comm
4087 
4088    Input Parameters:
4089 +  comm - MPI communicator
4090 .  bs - the block size, only a block size of 1 is supported
4091 .  m - number of local rows (Cannot be PETSC_DECIDE)
4092 .  n - This value should be the same as the local size used in creating the
4093        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4094        calculated if N is given) For square matrices n is almost always m.
4095 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4096 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4097 .   i - row indices
4098 .   j - column indices
4099 -   a - matrix values
4100 
4101    Output Parameter:
4102 .   mat - the matrix
4103 
4104    Level: intermediate
4105 
4106    Notes:
4107        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4108      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4109      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4110 
4111        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4112 
4113 .keywords: matrix, aij, compressed row, sparse, parallel
4114 
4115 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4116           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4117 @*/
4118 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4119 {
4120   PetscErrorCode ierr;
4121 
4122 
4123  PetscFunctionBegin;
4124   if (i[0]) {
4125     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4126   }
4127   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4128   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4129   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4130   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
4131   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
4132   PetscFunctionReturn(0);
4133 }
4134