xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 58cd72c3d0f0c5a63e84afc03ac54bb1bb84cd8d)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4 
5 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
7 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
8 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
9 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
10 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
11 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
12 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
14 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar);
15 
16 /*  UGLY, ugly, ugly
17    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
18    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
19    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
20    converts the entries into single precision and then calls ..._MatScalar() to put them
21    into the single precision data structures.
22 */
23 #if defined(PETSC_USE_MAT_SINGLE)
24 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
25 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
26 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
27 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
28 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
29 #else
30 #define MatSetValuesBlocked_SeqBAIJ_MatScalar      MatSetValuesBlocked_SeqBAIJ
31 #define MatSetValues_MPIBAIJ_MatScalar             MatSetValues_MPIBAIJ
32 #define MatSetValuesBlocked_MPIBAIJ_MatScalar      MatSetValuesBlocked_MPIBAIJ
33 #define MatSetValues_MPIBAIJ_HT_MatScalar          MatSetValues_MPIBAIJ_HT
34 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar   MatSetValuesBlocked_MPIBAIJ_HT
35 #endif
36 
37 #undef __FUNCT__
38 #define __FUNCT__ "MatGetRowMax_MPIBAIJ"
39 PetscErrorCode MatGetRowMax_MPIBAIJ(Mat A,Vec v)
40 {
41   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
42   PetscErrorCode ierr;
43   PetscInt       i;
44   PetscScalar    *va,*vb;
45   Vec            vtmp;
46 
47   PetscFunctionBegin;
48 
49   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
50   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
51 
52   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap.n,&vtmp);CHKERRQ(ierr);
53   ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr);
54   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
55 
56   for (i=0; i<A->rmap.n; i++){
57     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i];
58   }
59 
60   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
61   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
62   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
63 
64   PetscFunctionReturn(0);
65 }
66 
67 EXTERN_C_BEGIN
68 #undef __FUNCT__
69 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
70 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat)
71 {
72   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
73   PetscErrorCode ierr;
74 
75   PetscFunctionBegin;
76   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
77   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
78   PetscFunctionReturn(0);
79 }
80 EXTERN_C_END
81 
82 EXTERN_C_BEGIN
83 #undef __FUNCT__
84 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
85 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat)
86 {
87   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
88   PetscErrorCode ierr;
89 
90   PetscFunctionBegin;
91   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
92   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
93   PetscFunctionReturn(0);
94 }
95 EXTERN_C_END
96 
97 /*
98      Local utility routine that creates a mapping from the global column
99    number to the local number in the off-diagonal part of the local
100    storage of the matrix.  This is done in a non scable way since the
101    length of colmap equals the global matrix length.
102 */
103 #undef __FUNCT__
104 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
105 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
106 {
107   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
108   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
109   PetscErrorCode ierr;
110   PetscInt       nbs = B->nbs,i,bs=mat->rmap.bs;
111 
112   PetscFunctionBegin;
113 #if defined (PETSC_USE_CTABLE)
114   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
115   for (i=0; i<nbs; i++){
116     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
117   }
118 #else
119   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
120   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
121   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
122   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
123 #endif
124   PetscFunctionReturn(0);
125 }
126 
127 #define CHUNKSIZE  10
128 
129 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
130 { \
131  \
132     brow = row/bs;  \
133     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
134     rmax = aimax[brow]; nrow = ailen[brow]; \
135       bcol = col/bs; \
136       ridx = row % bs; cidx = col % bs; \
137       low = 0; high = nrow; \
138       while (high-low > 3) { \
139         t = (low+high)/2; \
140         if (rp[t] > bcol) high = t; \
141         else              low  = t; \
142       } \
143       for (_i=low; _i<high; _i++) { \
144         if (rp[_i] > bcol) break; \
145         if (rp[_i] == bcol) { \
146           bap  = ap +  bs2*_i + bs*cidx + ridx; \
147           if (addv == ADD_VALUES) *bap += value;  \
148           else                    *bap  = value;  \
149           goto a_noinsert; \
150         } \
151       } \
152       if (a->nonew == 1) goto a_noinsert; \
153       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
154       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
155       N = nrow++ - 1;  \
156       /* shift up all the later entries in this row */ \
157       for (ii=N; ii>=_i; ii--) { \
158         rp[ii+1] = rp[ii]; \
159         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
160       } \
161       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
162       rp[_i]                      = bcol;  \
163       ap[bs2*_i + bs*cidx + ridx] = value;  \
164       a_noinsert:; \
165     ailen[brow] = nrow; \
166 }
167 
168 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
169 { \
170     brow = row/bs;  \
171     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
172     rmax = bimax[brow]; nrow = bilen[brow]; \
173       bcol = col/bs; \
174       ridx = row % bs; cidx = col % bs; \
175       low = 0; high = nrow; \
176       while (high-low > 3) { \
177         t = (low+high)/2; \
178         if (rp[t] > bcol) high = t; \
179         else              low  = t; \
180       } \
181       for (_i=low; _i<high; _i++) { \
182         if (rp[_i] > bcol) break; \
183         if (rp[_i] == bcol) { \
184           bap  = ap +  bs2*_i + bs*cidx + ridx; \
185           if (addv == ADD_VALUES) *bap += value;  \
186           else                    *bap  = value;  \
187           goto b_noinsert; \
188         } \
189       } \
190       if (b->nonew == 1) goto b_noinsert; \
191       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
192       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
193       CHKMEMQ;\
194       N = nrow++ - 1;  \
195       /* shift up all the later entries in this row */ \
196       for (ii=N; ii>=_i; ii--) { \
197         rp[ii+1] = rp[ii]; \
198         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
199       } \
200       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
201       rp[_i]                      = bcol;  \
202       ap[bs2*_i + bs*cidx + ridx] = value;  \
203       b_noinsert:; \
204     bilen[brow] = nrow; \
205 }
206 
207 #if defined(PETSC_USE_MAT_SINGLE)
208 #undef __FUNCT__
209 #define __FUNCT__ "MatSetValues_MPIBAIJ"
210 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
211 {
212   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
213   PetscErrorCode ierr;
214   PetscInt       i,N = m*n;
215   MatScalar      *vsingle;
216 
217   PetscFunctionBegin;
218   if (N > b->setvalueslen) {
219     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
220     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
221     b->setvalueslen  = N;
222   }
223   vsingle = b->setvaluescopy;
224 
225   for (i=0; i<N; i++) {
226     vsingle[i] = v[i];
227   }
228   ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
229   PetscFunctionReturn(0);
230 }
231 
232 #undef __FUNCT__
233 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
234 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
235 {
236   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
237   PetscErrorCode ierr;
238   PetscInt       i,N = m*n*b->bs2;
239   MatScalar      *vsingle;
240 
241   PetscFunctionBegin;
242   if (N > b->setvalueslen) {
243     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
244     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
245     b->setvalueslen  = N;
246   }
247   vsingle = b->setvaluescopy;
248   for (i=0; i<N; i++) {
249     vsingle[i] = v[i];
250   }
251   ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
252   PetscFunctionReturn(0);
253 }
254 
255 #undef __FUNCT__
256 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
257 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
258 {
259   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
260   PetscErrorCode ierr;
261   PetscInt       i,N = m*n;
262   MatScalar      *vsingle;
263 
264   PetscFunctionBegin;
265   if (N > b->setvalueslen) {
266     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
267     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
268     b->setvalueslen  = N;
269   }
270   vsingle = b->setvaluescopy;
271   for (i=0; i<N; i++) {
272     vsingle[i] = v[i];
273   }
274   ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
275   PetscFunctionReturn(0);
276 }
277 
278 #undef __FUNCT__
279 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
280 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
281 {
282   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
283   PetscErrorCode ierr;
284   PetscInt       i,N = m*n*b->bs2;
285   MatScalar      *vsingle;
286 
287   PetscFunctionBegin;
288   if (N > b->setvalueslen) {
289     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
290     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
291     b->setvalueslen  = N;
292   }
293   vsingle = b->setvaluescopy;
294   for (i=0; i<N; i++) {
295     vsingle[i] = v[i];
296   }
297   ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
298   PetscFunctionReturn(0);
299 }
300 #endif
301 
302 #undef __FUNCT__
303 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
304 PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
305 {
306   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
307   MatScalar      value;
308   PetscTruth     roworiented = baij->roworiented;
309   PetscErrorCode ierr;
310   PetscInt       i,j,row,col;
311   PetscInt       rstart_orig=mat->rmap.rstart;
312   PetscInt       rend_orig=mat->rmap.rend,cstart_orig=mat->cmap.rstart;
313   PetscInt       cend_orig=mat->cmap.rend,bs=mat->rmap.bs;
314 
315   /* Some Variables required in the macro */
316   Mat            A = baij->A;
317   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
318   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
319   MatScalar      *aa=a->a;
320 
321   Mat            B = baij->B;
322   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
323   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
324   MatScalar      *ba=b->a;
325 
326   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
327   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
328   MatScalar      *ap,*bap;
329 
330   PetscFunctionBegin;
331   for (i=0; i<m; i++) {
332     if (im[i] < 0) continue;
333 #if defined(PETSC_USE_DEBUG)
334     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
335 #endif
336     if (im[i] >= rstart_orig && im[i] < rend_orig) {
337       row = im[i] - rstart_orig;
338       for (j=0; j<n; j++) {
339         if (in[j] >= cstart_orig && in[j] < cend_orig){
340           col = in[j] - cstart_orig;
341           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
342           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
343           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
344         } else if (in[j] < 0) continue;
345 #if defined(PETSC_USE_DEBUG)
346         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap.N-1);}
347 #endif
348         else {
349           if (mat->was_assembled) {
350             if (!baij->colmap) {
351               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
352             }
353 #if defined (PETSC_USE_CTABLE)
354             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
355             col  = col - 1;
356 #else
357             col = baij->colmap[in[j]/bs] - 1;
358 #endif
359             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
360               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
361               col =  in[j];
362               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
363               B = baij->B;
364               b = (Mat_SeqBAIJ*)(B)->data;
365               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
366               ba=b->a;
367             } else col += in[j]%bs;
368           } else col = in[j];
369           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
370           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
371           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
372         }
373       }
374     } else {
375       if (!baij->donotstash) {
376         if (roworiented) {
377           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
378         } else {
379           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
380         }
381       }
382     }
383   }
384   PetscFunctionReturn(0);
385 }
386 
387 #undef __FUNCT__
388 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar"
389 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
390 {
391   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
392   const MatScalar *value;
393   MatScalar       *barray=baij->barray;
394   PetscTruth      roworiented = baij->roworiented;
395   PetscErrorCode  ierr;
396   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
397   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
398   PetscInt        cend=baij->cendbs,bs=mat->rmap.bs,bs2=baij->bs2;
399 
400   PetscFunctionBegin;
401   if(!barray) {
402     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
403     baij->barray = barray;
404   }
405 
406   if (roworiented) {
407     stepval = (n-1)*bs;
408   } else {
409     stepval = (m-1)*bs;
410   }
411   for (i=0; i<m; i++) {
412     if (im[i] < 0) continue;
413 #if defined(PETSC_USE_DEBUG)
414     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
415 #endif
416     if (im[i] >= rstart && im[i] < rend) {
417       row = im[i] - rstart;
418       for (j=0; j<n; j++) {
419         /* If NumCol = 1 then a copy is not required */
420         if ((roworiented) && (n == 1)) {
421           barray = (MatScalar*)v + i*bs2;
422         } else if((!roworiented) && (m == 1)) {
423           barray = (MatScalar*)v + j*bs2;
424         } else { /* Here a copy is required */
425           if (roworiented) {
426             value = v + i*(stepval+bs)*bs + j*bs;
427           } else {
428             value = v + j*(stepval+bs)*bs + i*bs;
429           }
430           for (ii=0; ii<bs; ii++,value+=stepval) {
431             for (jj=0; jj<bs; jj++) {
432               *barray++  = *value++;
433             }
434           }
435           barray -=bs2;
436         }
437 
438         if (in[j] >= cstart && in[j] < cend){
439           col  = in[j] - cstart;
440           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
441         }
442         else if (in[j] < 0) continue;
443 #if defined(PETSC_USE_DEBUG)
444         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
445 #endif
446         else {
447           if (mat->was_assembled) {
448             if (!baij->colmap) {
449               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
450             }
451 
452 #if defined(PETSC_USE_DEBUG)
453 #if defined (PETSC_USE_CTABLE)
454             { PetscInt data;
455               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
456               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
457             }
458 #else
459             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
460 #endif
461 #endif
462 #if defined (PETSC_USE_CTABLE)
463 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
464             col  = (col - 1)/bs;
465 #else
466             col = (baij->colmap[in[j]] - 1)/bs;
467 #endif
468             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
469               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
470               col =  in[j];
471             }
472           }
473           else col = in[j];
474           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
475         }
476       }
477     } else {
478       if (!baij->donotstash) {
479         if (roworiented) {
480           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
481         } else {
482           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
483         }
484       }
485     }
486   }
487   PetscFunctionReturn(0);
488 }
489 
490 #define HASH_KEY 0.6180339887
491 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
492 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
493 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
494 #undef __FUNCT__
495 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar"
496 PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
497 {
498   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
499   PetscTruth     roworiented = baij->roworiented;
500   PetscErrorCode ierr;
501   PetscInt       i,j,row,col;
502   PetscInt       rstart_orig=mat->rmap.rstart;
503   PetscInt       rend_orig=mat->rmap.rend,Nbs=baij->Nbs;
504   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap.bs,*HT=baij->ht,idx;
505   PetscReal      tmp;
506   MatScalar      **HD = baij->hd,value;
507 #if defined(PETSC_USE_DEBUG)
508   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
509 #endif
510 
511   PetscFunctionBegin;
512 
513   for (i=0; i<m; i++) {
514 #if defined(PETSC_USE_DEBUG)
515     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
516     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
517 #endif
518       row = im[i];
519     if (row >= rstart_orig && row < rend_orig) {
520       for (j=0; j<n; j++) {
521         col = in[j];
522         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
523         /* Look up PetscInto the Hash Table */
524         key = (row/bs)*Nbs+(col/bs)+1;
525         h1  = HASH(size,key,tmp);
526 
527 
528         idx = h1;
529 #if defined(PETSC_USE_DEBUG)
530         insert_ct++;
531         total_ct++;
532         if (HT[idx] != key) {
533           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
534           if (idx == size) {
535             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
536             if (idx == h1) {
537               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
538             }
539           }
540         }
541 #else
542         if (HT[idx] != key) {
543           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
544           if (idx == size) {
545             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
546             if (idx == h1) {
547               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
548             }
549           }
550         }
551 #endif
552         /* A HASH table entry is found, so insert the values at the correct address */
553         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
554         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
555       }
556     } else {
557       if (!baij->donotstash) {
558         if (roworiented) {
559           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
560         } else {
561           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
562         }
563       }
564     }
565   }
566 #if defined(PETSC_USE_DEBUG)
567   baij->ht_total_ct = total_ct;
568   baij->ht_insert_ct = insert_ct;
569 #endif
570   PetscFunctionReturn(0);
571 }
572 
573 #undef __FUNCT__
574 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar"
575 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
576 {
577   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
578   PetscTruth      roworiented = baij->roworiented;
579   PetscErrorCode  ierr;
580   PetscInt        i,j,ii,jj,row,col;
581   PetscInt        rstart=baij->rstartbs;
582   PetscInt        rend=mat->rmap.rend,stepval,bs=mat->rmap.bs,bs2=baij->bs2;
583   PetscInt        h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
584   PetscReal       tmp;
585   MatScalar       **HD = baij->hd,*baij_a;
586   const MatScalar *v_t,*value;
587 #if defined(PETSC_USE_DEBUG)
588   PetscInt        total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
589 #endif
590 
591   PetscFunctionBegin;
592 
593   if (roworiented) {
594     stepval = (n-1)*bs;
595   } else {
596     stepval = (m-1)*bs;
597   }
598   for (i=0; i<m; i++) {
599 #if defined(PETSC_USE_DEBUG)
600     if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
601     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
602 #endif
603     row   = im[i];
604     v_t   = v + i*bs2;
605     if (row >= rstart && row < rend) {
606       for (j=0; j<n; j++) {
607         col = in[j];
608 
609         /* Look up into the Hash Table */
610         key = row*Nbs+col+1;
611         h1  = HASH(size,key,tmp);
612 
613         idx = h1;
614 #if defined(PETSC_USE_DEBUG)
615         total_ct++;
616         insert_ct++;
617        if (HT[idx] != key) {
618           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
619           if (idx == size) {
620             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
621             if (idx == h1) {
622               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
623             }
624           }
625         }
626 #else
627         if (HT[idx] != key) {
628           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
629           if (idx == size) {
630             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
631             if (idx == h1) {
632               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
633             }
634           }
635         }
636 #endif
637         baij_a = HD[idx];
638         if (roworiented) {
639           /*value = v + i*(stepval+bs)*bs + j*bs;*/
640           /* value = v + (i*(stepval+bs)+j)*bs; */
641           value = v_t;
642           v_t  += bs;
643           if (addv == ADD_VALUES) {
644             for (ii=0; ii<bs; ii++,value+=stepval) {
645               for (jj=ii; jj<bs2; jj+=bs) {
646                 baij_a[jj]  += *value++;
647               }
648             }
649           } else {
650             for (ii=0; ii<bs; ii++,value+=stepval) {
651               for (jj=ii; jj<bs2; jj+=bs) {
652                 baij_a[jj]  = *value++;
653               }
654             }
655           }
656         } else {
657           value = v + j*(stepval+bs)*bs + i*bs;
658           if (addv == ADD_VALUES) {
659             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
660               for (jj=0; jj<bs; jj++) {
661                 baij_a[jj]  += *value++;
662               }
663             }
664           } else {
665             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
666               for (jj=0; jj<bs; jj++) {
667                 baij_a[jj]  = *value++;
668               }
669             }
670           }
671         }
672       }
673     } else {
674       if (!baij->donotstash) {
675         if (roworiented) {
676           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
677         } else {
678           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
679         }
680       }
681     }
682   }
683 #if defined(PETSC_USE_DEBUG)
684   baij->ht_total_ct = total_ct;
685   baij->ht_insert_ct = insert_ct;
686 #endif
687   PetscFunctionReturn(0);
688 }
689 
690 #undef __FUNCT__
691 #define __FUNCT__ "MatGetValues_MPIBAIJ"
692 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
693 {
694   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
695   PetscErrorCode ierr;
696   PetscInt       bs=mat->rmap.bs,i,j,bsrstart = mat->rmap.rstart,bsrend = mat->rmap.rend;
697   PetscInt       bscstart = mat->cmap.rstart,bscend = mat->cmap.rend,row,col,data;
698 
699   PetscFunctionBegin;
700   for (i=0; i<m; i++) {
701     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
702     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
703     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
704       row = idxm[i] - bsrstart;
705       for (j=0; j<n; j++) {
706         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
707         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
708         if (idxn[j] >= bscstart && idxn[j] < bscend){
709           col = idxn[j] - bscstart;
710           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
711         } else {
712           if (!baij->colmap) {
713             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
714           }
715 #if defined (PETSC_USE_CTABLE)
716           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
717           data --;
718 #else
719           data = baij->colmap[idxn[j]/bs]-1;
720 #endif
721           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
722           else {
723             col  = data + idxn[j]%bs;
724             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
725           }
726         }
727       }
728     } else {
729       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
730     }
731   }
732  PetscFunctionReturn(0);
733 }
734 
735 #undef __FUNCT__
736 #define __FUNCT__ "MatNorm_MPIBAIJ"
737 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
738 {
739   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
740   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
741   PetscErrorCode ierr;
742   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap.bs,nz,row,col;
743   PetscReal      sum = 0.0;
744   MatScalar      *v;
745 
746   PetscFunctionBegin;
747   if (baij->size == 1) {
748     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
749   } else {
750     if (type == NORM_FROBENIUS) {
751       v = amat->a;
752       nz = amat->nz*bs2;
753       for (i=0; i<nz; i++) {
754 #if defined(PETSC_USE_COMPLEX)
755         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
756 #else
757         sum += (*v)*(*v); v++;
758 #endif
759       }
760       v = bmat->a;
761       nz = bmat->nz*bs2;
762       for (i=0; i<nz; i++) {
763 #if defined(PETSC_USE_COMPLEX)
764         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
765 #else
766         sum += (*v)*(*v); v++;
767 #endif
768       }
769       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
770       *nrm = sqrt(*nrm);
771     } else if (type == NORM_1) { /* max column sum */
772       PetscReal *tmp,*tmp2;
773       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
774       ierr = PetscMalloc((2*mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
775       tmp2 = tmp + mat->cmap.N;
776       ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
777       v = amat->a; jj = amat->j;
778       for (i=0; i<amat->nz; i++) {
779         for (j=0; j<bs; j++){
780           col = bs*(cstart + *jj) + j; /* column index */
781           for (row=0; row<bs; row++){
782             tmp[col] += PetscAbsScalar(*v);  v++;
783           }
784         }
785         jj++;
786       }
787       v = bmat->a; jj = bmat->j;
788       for (i=0; i<bmat->nz; i++) {
789         for (j=0; j<bs; j++){
790           col = bs*garray[*jj] + j;
791           for (row=0; row<bs; row++){
792             tmp[col] += PetscAbsScalar(*v); v++;
793           }
794         }
795         jj++;
796       }
797       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
798       *nrm = 0.0;
799       for (j=0; j<mat->cmap.N; j++) {
800         if (tmp2[j] > *nrm) *nrm = tmp2[j];
801       }
802       ierr = PetscFree(tmp);CHKERRQ(ierr);
803     } else if (type == NORM_INFINITY) { /* max row sum */
804       PetscReal *sums;
805       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr)
806       sum = 0.0;
807       for (j=0; j<amat->mbs; j++) {
808         for (row=0; row<bs; row++) sums[row] = 0.0;
809         v = amat->a + bs2*amat->i[j];
810         nz = amat->i[j+1]-amat->i[j];
811         for (i=0; i<nz; i++) {
812           for (col=0; col<bs; col++){
813             for (row=0; row<bs; row++){
814               sums[row] += PetscAbsScalar(*v); v++;
815             }
816           }
817         }
818         v = bmat->a + bs2*bmat->i[j];
819         nz = bmat->i[j+1]-bmat->i[j];
820         for (i=0; i<nz; i++) {
821           for (col=0; col<bs; col++){
822             for (row=0; row<bs; row++){
823               sums[row] += PetscAbsScalar(*v); v++;
824             }
825           }
826         }
827         for (row=0; row<bs; row++){
828           if (sums[row] > sum) sum = sums[row];
829         }
830       }
831       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
832       ierr = PetscFree(sums);CHKERRQ(ierr);
833     } else {
834       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
835     }
836   }
837   PetscFunctionReturn(0);
838 }
839 
840 /*
841   Creates the hash table, and sets the table
842   This table is created only once.
843   If new entried need to be added to the matrix
844   then the hash table has to be destroyed and
845   recreated.
846 */
847 #undef __FUNCT__
848 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
849 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
850 {
851   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
852   Mat            A = baij->A,B=baij->B;
853   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
854   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
855   PetscErrorCode ierr;
856   PetscInt       size,bs2=baij->bs2,rstart=baij->rstartbs;
857   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
858   PetscInt       *HT,key;
859   MatScalar      **HD;
860   PetscReal      tmp;
861 #if defined(PETSC_USE_INFO)
862   PetscInt       ct=0,max=0;
863 #endif
864 
865   PetscFunctionBegin;
866   baij->ht_size=(PetscInt)(factor*nz);
867   size = baij->ht_size;
868 
869   if (baij->ht) {
870     PetscFunctionReturn(0);
871   }
872 
873   /* Allocate Memory for Hash Table */
874   ierr     = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr);
875   baij->ht = (PetscInt*)(baij->hd + size);
876   HD       = baij->hd;
877   HT       = baij->ht;
878 
879 
880   ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr);
881 
882 
883   /* Loop Over A */
884   for (i=0; i<a->mbs; i++) {
885     for (j=ai[i]; j<ai[i+1]; j++) {
886       row = i+rstart;
887       col = aj[j]+cstart;
888 
889       key = row*Nbs + col + 1;
890       h1  = HASH(size,key,tmp);
891       for (k=0; k<size; k++){
892         if (!HT[(h1+k)%size]) {
893           HT[(h1+k)%size] = key;
894           HD[(h1+k)%size] = a->a + j*bs2;
895           break;
896 #if defined(PETSC_USE_INFO)
897         } else {
898           ct++;
899 #endif
900         }
901       }
902 #if defined(PETSC_USE_INFO)
903       if (k> max) max = k;
904 #endif
905     }
906   }
907   /* Loop Over B */
908   for (i=0; i<b->mbs; i++) {
909     for (j=bi[i]; j<bi[i+1]; j++) {
910       row = i+rstart;
911       col = garray[bj[j]];
912       key = row*Nbs + col + 1;
913       h1  = HASH(size,key,tmp);
914       for (k=0; k<size; k++){
915         if (!HT[(h1+k)%size]) {
916           HT[(h1+k)%size] = key;
917           HD[(h1+k)%size] = b->a + j*bs2;
918           break;
919 #if defined(PETSC_USE_INFO)
920         } else {
921           ct++;
922 #endif
923         }
924       }
925 #if defined(PETSC_USE_INFO)
926       if (k> max) max = k;
927 #endif
928     }
929   }
930 
931   /* Print Summary */
932 #if defined(PETSC_USE_INFO)
933   for (i=0,j=0; i<size; i++) {
934     if (HT[i]) {j++;}
935   }
936   ierr = PetscInfo2(0,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
937 #endif
938   PetscFunctionReturn(0);
939 }
940 
941 #undef __FUNCT__
942 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
943 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
944 {
945   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
946   PetscErrorCode ierr;
947   PetscInt       nstash,reallocs;
948   InsertMode     addv;
949 
950   PetscFunctionBegin;
951   if (baij->donotstash) {
952     PetscFunctionReturn(0);
953   }
954 
955   /* make sure all processors are either in INSERTMODE or ADDMODE */
956   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
957   if (addv == (ADD_VALUES|INSERT_VALUES)) {
958     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
959   }
960   mat->insertmode = addv; /* in case this processor had no cache */
961 
962   ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr);
963   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rangebs);CHKERRQ(ierr);
964   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
965   ierr = PetscInfo2(0,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
966   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
967   ierr = PetscInfo2(0,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
968   PetscFunctionReturn(0);
969 }
970 
971 #undef __FUNCT__
972 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
973 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
974 {
975   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
976   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
977   PetscErrorCode ierr;
978   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
979   PetscInt       *row,*col,other_disassembled;
980   PetscTruth     r1,r2,r3;
981   MatScalar      *val;
982   InsertMode     addv = mat->insertmode;
983   PetscMPIInt    n;
984 
985   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
986   PetscFunctionBegin;
987   if (!baij->donotstash) {
988     while (1) {
989       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
990       if (!flg) break;
991 
992       for (i=0; i<n;) {
993         /* Now identify the consecutive vals belonging to the same row */
994         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
995         if (j < n) ncols = j-i;
996         else       ncols = n-i;
997         /* Now assemble all these values with a single function call */
998         ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
999         i = j;
1000       }
1001     }
1002     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
1003     /* Now process the block-stash. Since the values are stashed column-oriented,
1004        set the roworiented flag to column oriented, and after MatSetValues()
1005        restore the original flags */
1006     r1 = baij->roworiented;
1007     r2 = a->roworiented;
1008     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
1009     baij->roworiented = PETSC_FALSE;
1010     a->roworiented    = PETSC_FALSE;
1011     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
1012     while (1) {
1013       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1014       if (!flg) break;
1015 
1016       for (i=0; i<n;) {
1017         /* Now identify the consecutive vals belonging to the same row */
1018         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1019         if (j < n) ncols = j-i;
1020         else       ncols = n-i;
1021         ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
1022         i = j;
1023       }
1024     }
1025     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
1026     baij->roworiented = r1;
1027     a->roworiented    = r2;
1028     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
1029   }
1030 
1031   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
1032   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
1033 
1034   /* determine if any processor has disassembled, if so we must
1035      also disassemble ourselfs, in order that we may reassemble. */
1036   /*
1037      if nonzero structure of submatrix B cannot change then we know that
1038      no processor disassembled thus we can skip this stuff
1039   */
1040   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
1041     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
1042     if (mat->was_assembled && !other_disassembled) {
1043       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
1044     }
1045   }
1046 
1047   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1048     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
1049   }
1050   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
1051   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1052   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1053 
1054 #if defined(PETSC_USE_INFO)
1055   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1056     ierr = PetscInfo1(0,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
1057     baij->ht_total_ct  = 0;
1058     baij->ht_insert_ct = 0;
1059   }
1060 #endif
1061   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1062     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1063     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1064     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1065   }
1066 
1067   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1068   baij->rowvalues = 0;
1069   PetscFunctionReturn(0);
1070 }
1071 
1072 #undef __FUNCT__
1073 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
1074 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1075 {
1076   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
1077   PetscErrorCode    ierr;
1078   PetscMPIInt       size = baij->size,rank = baij->rank;
1079   PetscInt          bs = mat->rmap.bs;
1080   PetscTruth        iascii,isdraw;
1081   PetscViewer       sviewer;
1082   PetscViewerFormat format;
1083 
1084   PetscFunctionBegin;
1085   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1086   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1087   if (iascii) {
1088     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1089     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1090       MatInfo info;
1091       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
1092       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1093       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
1094               rank,mat->rmap.N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
1095               mat->rmap.bs,(PetscInt)info.memory);CHKERRQ(ierr);
1096       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1097       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
1098       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1099       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
1100       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1101       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1102       PetscFunctionReturn(0);
1103     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1104       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1105       PetscFunctionReturn(0);
1106     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1107       PetscFunctionReturn(0);
1108     }
1109   }
1110 
1111   if (isdraw) {
1112     PetscDraw       draw;
1113     PetscTruth isnull;
1114     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1115     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1116   }
1117 
1118   if (size == 1) {
1119     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
1120     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1121   } else {
1122     /* assemble the entire matrix onto first processor. */
1123     Mat         A;
1124     Mat_SeqBAIJ *Aloc;
1125     PetscInt    M = mat->rmap.N,N = mat->cmap.N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1126     MatScalar   *a;
1127 
1128     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1129     /* Perhaps this should be the type of mat? */
1130     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
1131     if (!rank) {
1132       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1133     } else {
1134       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1135     }
1136     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1137     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1138     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1139 
1140     /* copy over the A part */
1141     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1142     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1143     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1144 
1145     for (i=0; i<mbs; i++) {
1146       rvals[0] = bs*(baij->rstartbs + i);
1147       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1148       for (j=ai[i]; j<ai[i+1]; j++) {
1149         col = (baij->cstartbs+aj[j])*bs;
1150         for (k=0; k<bs; k++) {
1151           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1152           col++; a += bs;
1153         }
1154       }
1155     }
1156     /* copy over the B part */
1157     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1158     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1159     for (i=0; i<mbs; i++) {
1160       rvals[0] = bs*(baij->rstartbs + i);
1161       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1162       for (j=ai[i]; j<ai[i+1]; j++) {
1163         col = baij->garray[aj[j]]*bs;
1164         for (k=0; k<bs; k++) {
1165           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1166           col++; a += bs;
1167         }
1168       }
1169     }
1170     ierr = PetscFree(rvals);CHKERRQ(ierr);
1171     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1172     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1173     /*
1174        Everyone has to call to draw the matrix since the graphics waits are
1175        synchronized across all processors that share the PetscDraw object
1176     */
1177     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1178     if (!rank) {
1179       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1180       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1181     }
1182     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1183     ierr = MatDestroy(A);CHKERRQ(ierr);
1184   }
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 #undef __FUNCT__
1189 #define __FUNCT__ "MatView_MPIBAIJ"
1190 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1191 {
1192   PetscErrorCode ierr;
1193   PetscTruth     iascii,isdraw,issocket,isbinary;
1194 
1195   PetscFunctionBegin;
1196   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1197   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1198   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1199   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1200   if (iascii || isdraw || issocket || isbinary) {
1201     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1202   } else {
1203     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1204   }
1205   PetscFunctionReturn(0);
1206 }
1207 
1208 #undef __FUNCT__
1209 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1210 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1211 {
1212   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1213   PetscErrorCode ierr;
1214 
1215   PetscFunctionBegin;
1216 #if defined(PETSC_USE_LOG)
1217   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap.N,mat->cmap.N);
1218 #endif
1219   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1220   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1221   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1222   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1223 #if defined (PETSC_USE_CTABLE)
1224   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1225 #else
1226   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1227 #endif
1228   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1229   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1230   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1231   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1232   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1233   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
1234 #if defined(PETSC_USE_MAT_SINGLE)
1235   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
1236 #endif
1237   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1238   ierr = PetscFree(baij);CHKERRQ(ierr);
1239 
1240   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1241   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1242   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1243   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1244   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1245   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1246   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1247   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1248   PetscFunctionReturn(0);
1249 }
1250 
1251 #undef __FUNCT__
1252 #define __FUNCT__ "MatMult_MPIBAIJ"
1253 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1254 {
1255   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1256   PetscErrorCode ierr;
1257   PetscInt       nt;
1258 
1259   PetscFunctionBegin;
1260   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1261   if (nt != A->cmap.n) {
1262     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1263   }
1264   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1265   if (nt != A->rmap.n) {
1266     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1267   }
1268   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1269   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1270   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1271   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1272   PetscFunctionReturn(0);
1273 }
1274 
1275 #undef __FUNCT__
1276 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1277 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1278 {
1279   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1280   PetscErrorCode ierr;
1281 
1282   PetscFunctionBegin;
1283   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1284   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1285   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1286   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1287   PetscFunctionReturn(0);
1288 }
1289 
1290 #undef __FUNCT__
1291 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1292 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1293 {
1294   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1295   PetscErrorCode ierr;
1296   PetscTruth     merged;
1297 
1298   PetscFunctionBegin;
1299   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1300   /* do nondiagonal part */
1301   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1302   if (!merged) {
1303     /* send it on its way */
1304     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1305     /* do local part */
1306     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1307     /* receive remote parts: note this assumes the values are not actually */
1308     /* inserted in yy until the next line */
1309     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1310   } else {
1311     /* do local part */
1312     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1313     /* send it on its way */
1314     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1315     /* values actually were received in the Begin() but we need to call this nop */
1316     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1317   }
1318   PetscFunctionReturn(0);
1319 }
1320 
1321 #undef __FUNCT__
1322 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1323 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1324 {
1325   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1326   PetscErrorCode ierr;
1327 
1328   PetscFunctionBegin;
1329   /* do nondiagonal part */
1330   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1331   /* send it on its way */
1332   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1333   /* do local part */
1334   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1335   /* receive remote parts: note this assumes the values are not actually */
1336   /* inserted in yy until the next line, which is true for my implementation*/
1337   /* but is not perhaps always true. */
1338   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1339   PetscFunctionReturn(0);
1340 }
1341 
1342 /*
1343   This only works correctly for square matrices where the subblock A->A is the
1344    diagonal block
1345 */
1346 #undef __FUNCT__
1347 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1348 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1349 {
1350   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1351   PetscErrorCode ierr;
1352 
1353   PetscFunctionBegin;
1354   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1355   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1356   PetscFunctionReturn(0);
1357 }
1358 
1359 #undef __FUNCT__
1360 #define __FUNCT__ "MatScale_MPIBAIJ"
1361 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1362 {
1363   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1364   PetscErrorCode ierr;
1365 
1366   PetscFunctionBegin;
1367   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1368   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1369   PetscFunctionReturn(0);
1370 }
1371 
1372 #undef __FUNCT__
1373 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1374 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1375 {
1376   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1377   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1378   PetscErrorCode ierr;
1379   PetscInt       bs = matin->rmap.bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1380   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap.rstart,brend = matin->rmap.rend;
1381   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1382 
1383   PetscFunctionBegin;
1384   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1385   mat->getrowactive = PETSC_TRUE;
1386 
1387   if (!mat->rowvalues && (idx || v)) {
1388     /*
1389         allocate enough space to hold information from the longest row.
1390     */
1391     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1392     PetscInt     max = 1,mbs = mat->mbs,tmp;
1393     for (i=0; i<mbs; i++) {
1394       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1395       if (max < tmp) { max = tmp; }
1396     }
1397     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1398     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1399   }
1400 
1401   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1402   lrow = row - brstart;
1403 
1404   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1405   if (!v)   {pvA = 0; pvB = 0;}
1406   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1407   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1408   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1409   nztot = nzA + nzB;
1410 
1411   cmap  = mat->garray;
1412   if (v  || idx) {
1413     if (nztot) {
1414       /* Sort by increasing column numbers, assuming A and B already sorted */
1415       PetscInt imark = -1;
1416       if (v) {
1417         *v = v_p = mat->rowvalues;
1418         for (i=0; i<nzB; i++) {
1419           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1420           else break;
1421         }
1422         imark = i;
1423         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1424         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1425       }
1426       if (idx) {
1427         *idx = idx_p = mat->rowindices;
1428         if (imark > -1) {
1429           for (i=0; i<imark; i++) {
1430             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1431           }
1432         } else {
1433           for (i=0; i<nzB; i++) {
1434             if (cmap[cworkB[i]/bs] < cstart)
1435               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1436             else break;
1437           }
1438           imark = i;
1439         }
1440         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1441         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1442       }
1443     } else {
1444       if (idx) *idx = 0;
1445       if (v)   *v   = 0;
1446     }
1447   }
1448   *nz = nztot;
1449   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1450   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1451   PetscFunctionReturn(0);
1452 }
1453 
1454 #undef __FUNCT__
1455 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1456 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1457 {
1458   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1459 
1460   PetscFunctionBegin;
1461   if (!baij->getrowactive) {
1462     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1463   }
1464   baij->getrowactive = PETSC_FALSE;
1465   PetscFunctionReturn(0);
1466 }
1467 
1468 #undef __FUNCT__
1469 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1470 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1471 {
1472   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1473   PetscErrorCode ierr;
1474 
1475   PetscFunctionBegin;
1476   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1477   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1478   PetscFunctionReturn(0);
1479 }
1480 
1481 #undef __FUNCT__
1482 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1483 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1484 {
1485   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1486   Mat            A = a->A,B = a->B;
1487   PetscErrorCode ierr;
1488   PetscReal      isend[5],irecv[5];
1489 
1490   PetscFunctionBegin;
1491   info->block_size     = (PetscReal)matin->rmap.bs;
1492   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1493   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1494   isend[3] = info->memory;  isend[4] = info->mallocs;
1495   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1496   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1497   isend[3] += info->memory;  isend[4] += info->mallocs;
1498   if (flag == MAT_LOCAL) {
1499     info->nz_used      = isend[0];
1500     info->nz_allocated = isend[1];
1501     info->nz_unneeded  = isend[2];
1502     info->memory       = isend[3];
1503     info->mallocs      = isend[4];
1504   } else if (flag == MAT_GLOBAL_MAX) {
1505     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1506     info->nz_used      = irecv[0];
1507     info->nz_allocated = irecv[1];
1508     info->nz_unneeded  = irecv[2];
1509     info->memory       = irecv[3];
1510     info->mallocs      = irecv[4];
1511   } else if (flag == MAT_GLOBAL_SUM) {
1512     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1513     info->nz_used      = irecv[0];
1514     info->nz_allocated = irecv[1];
1515     info->nz_unneeded  = irecv[2];
1516     info->memory       = irecv[3];
1517     info->mallocs      = irecv[4];
1518   } else {
1519     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1520   }
1521   info->rows_global       = (PetscReal)A->rmap.N;
1522   info->columns_global    = (PetscReal)A->cmap.N;
1523   info->rows_local        = (PetscReal)A->rmap.N;
1524   info->columns_local     = (PetscReal)A->cmap.N;
1525   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1526   info->fill_ratio_needed = 0;
1527   info->factor_mallocs    = 0;
1528   PetscFunctionReturn(0);
1529 }
1530 
1531 #undef __FUNCT__
1532 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1533 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op)
1534 {
1535   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1536   PetscErrorCode ierr;
1537 
1538   PetscFunctionBegin;
1539   switch (op) {
1540   case MAT_NO_NEW_NONZERO_LOCATIONS:
1541   case MAT_YES_NEW_NONZERO_LOCATIONS:
1542   case MAT_COLUMNS_UNSORTED:
1543   case MAT_COLUMNS_SORTED:
1544   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1545   case MAT_KEEP_ZEROED_ROWS:
1546   case MAT_NEW_NONZERO_LOCATION_ERR:
1547     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1548     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1549     break;
1550   case MAT_ROW_ORIENTED:
1551     a->roworiented = PETSC_TRUE;
1552     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1553     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1554     break;
1555   case MAT_ROWS_SORTED:
1556   case MAT_ROWS_UNSORTED:
1557   case MAT_YES_NEW_DIAGONALS:
1558     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1559     break;
1560   case MAT_COLUMN_ORIENTED:
1561     a->roworiented = PETSC_FALSE;
1562     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1563     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1564     break;
1565   case MAT_IGNORE_OFF_PROC_ENTRIES:
1566     a->donotstash = PETSC_TRUE;
1567     break;
1568   case MAT_NO_NEW_DIAGONALS:
1569     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1570   case MAT_USE_HASH_TABLE:
1571     a->ht_flag = PETSC_TRUE;
1572     break;
1573   case MAT_SYMMETRIC:
1574   case MAT_STRUCTURALLY_SYMMETRIC:
1575   case MAT_HERMITIAN:
1576   case MAT_SYMMETRY_ETERNAL:
1577     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1578     break;
1579   case MAT_NOT_SYMMETRIC:
1580   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1581   case MAT_NOT_HERMITIAN:
1582   case MAT_NOT_SYMMETRY_ETERNAL:
1583     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1584     break;
1585   default:
1586     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1587   }
1588   PetscFunctionReturn(0);
1589 }
1590 
1591 #undef __FUNCT__
1592 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1593 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1594 {
1595   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1596   Mat_SeqBAIJ    *Aloc;
1597   Mat            B;
1598   PetscErrorCode ierr;
1599   PetscInt       M=A->rmap.N,N=A->cmap.N,*ai,*aj,i,*rvals,j,k,col;
1600   PetscInt       bs=A->rmap.bs,mbs=baij->mbs;
1601   MatScalar      *a;
1602 
1603   PetscFunctionBegin;
1604   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1605   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
1606   ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr);
1607   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1608   ierr = MatMPIBAIJSetPreallocation(B,A->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1609 
1610   /* copy over the A part */
1611   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1612   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1613   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1614 
1615   for (i=0; i<mbs; i++) {
1616     rvals[0] = bs*(baij->rstartbs + i);
1617     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1618     for (j=ai[i]; j<ai[i+1]; j++) {
1619       col = (baij->cstartbs+aj[j])*bs;
1620       for (k=0; k<bs; k++) {
1621         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1622         col++; a += bs;
1623       }
1624     }
1625   }
1626   /* copy over the B part */
1627   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1628   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1629   for (i=0; i<mbs; i++) {
1630     rvals[0] = bs*(baij->rstartbs + i);
1631     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1632     for (j=ai[i]; j<ai[i+1]; j++) {
1633       col = baij->garray[aj[j]]*bs;
1634       for (k=0; k<bs; k++) {
1635         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1636         col++; a += bs;
1637       }
1638     }
1639   }
1640   ierr = PetscFree(rvals);CHKERRQ(ierr);
1641   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1642   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1643 
1644   if (matout) {
1645     *matout = B;
1646   } else {
1647     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1648   }
1649   PetscFunctionReturn(0);
1650 }
1651 
1652 #undef __FUNCT__
1653 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1654 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1655 {
1656   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1657   Mat            a = baij->A,b = baij->B;
1658   PetscErrorCode ierr;
1659   PetscInt       s1,s2,s3;
1660 
1661   PetscFunctionBegin;
1662   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1663   if (rr) {
1664     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1665     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1666     /* Overlap communication with computation. */
1667     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1668   }
1669   if (ll) {
1670     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1671     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1672     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1673   }
1674   /* scale  the diagonal block */
1675   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1676 
1677   if (rr) {
1678     /* Do a scatter end and then right scale the off-diagonal block */
1679     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1680     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1681   }
1682 
1683   PetscFunctionReturn(0);
1684 }
1685 
1686 #undef __FUNCT__
1687 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1688 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
1689 {
1690   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1691   PetscErrorCode ierr;
1692   PetscMPIInt    imdex,size = l->size,n,rank = l->rank;
1693   PetscInt       i,*owners = A->rmap.range;
1694   PetscInt       *nprocs,j,idx,nsends,row;
1695   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
1696   PetscInt       *rvalues,tag = A->tag,count,base,slen,*source,lastidx = -1;
1697   PetscInt       *lens,*lrows,*values,rstart_bs=A->rmap.rstart;
1698   MPI_Comm       comm = A->comm;
1699   MPI_Request    *send_waits,*recv_waits;
1700   MPI_Status     recv_status,*send_status;
1701 #if defined(PETSC_DEBUG)
1702   PetscTruth     found = PETSC_FALSE;
1703 #endif
1704 
1705   PetscFunctionBegin;
1706   /*  first count number of contributors to each processor */
1707   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1708   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1709   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1710   j = 0;
1711   for (i=0; i<N; i++) {
1712     if (lastidx > (idx = rows[i])) j = 0;
1713     lastidx = idx;
1714     for (; j<size; j++) {
1715       if (idx >= owners[j] && idx < owners[j+1]) {
1716         nprocs[2*j]++;
1717         nprocs[2*j+1] = 1;
1718         owner[i] = j;
1719 #if defined(PETSC_DEBUG)
1720         found = PETSC_TRUE;
1721 #endif
1722         break;
1723       }
1724     }
1725 #if defined(PETSC_DEBUG)
1726     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1727     found = PETSC_FALSE;
1728 #endif
1729   }
1730   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1731 
1732   /* inform other processors of number of messages and max length*/
1733   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1734 
1735   /* post receives:   */
1736   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1737   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1738   for (i=0; i<nrecvs; i++) {
1739     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1740   }
1741 
1742   /* do sends:
1743      1) starts[i] gives the starting index in svalues for stuff going to
1744      the ith processor
1745   */
1746   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1747   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1748   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1749   starts[0]  = 0;
1750   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1751   for (i=0; i<N; i++) {
1752     svalues[starts[owner[i]]++] = rows[i];
1753   }
1754 
1755   starts[0] = 0;
1756   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1757   count = 0;
1758   for (i=0; i<size; i++) {
1759     if (nprocs[2*i+1]) {
1760       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1761     }
1762   }
1763   ierr = PetscFree(starts);CHKERRQ(ierr);
1764 
1765   base = owners[rank];
1766 
1767   /*  wait on receives */
1768   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
1769   source = lens + nrecvs;
1770   count  = nrecvs; slen = 0;
1771   while (count) {
1772     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1773     /* unpack receives into our local space */
1774     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1775     source[imdex]  = recv_status.MPI_SOURCE;
1776     lens[imdex]    = n;
1777     slen          += n;
1778     count--;
1779   }
1780   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1781 
1782   /* move the data into the send scatter */
1783   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1784   count = 0;
1785   for (i=0; i<nrecvs; i++) {
1786     values = rvalues + i*nmax;
1787     for (j=0; j<lens[i]; j++) {
1788       lrows[count++] = values[j] - base;
1789     }
1790   }
1791   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1792   ierr = PetscFree(lens);CHKERRQ(ierr);
1793   ierr = PetscFree(owner);CHKERRQ(ierr);
1794   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1795 
1796   /* actually zap the local rows */
1797   /*
1798         Zero the required rows. If the "diagonal block" of the matrix
1799      is square and the user wishes to set the diagonal we use separate
1800      code so that MatSetValues() is not called for each diagonal allocating
1801      new memory, thus calling lots of mallocs and slowing things down.
1802 
1803        Contributed by: Matthew Knepley
1804   */
1805   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1806   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr);
1807   if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
1808     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr);
1809   } else if (diag != 0.0) {
1810     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1811     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1812       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1813 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1814     }
1815     for (i=0; i<slen; i++) {
1816       row  = lrows[i] + rstart_bs;
1817       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1818     }
1819     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1820     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1821   } else {
1822     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1823   }
1824 
1825   ierr = PetscFree(lrows);CHKERRQ(ierr);
1826 
1827   /* wait on sends */
1828   if (nsends) {
1829     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1830     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1831     ierr = PetscFree(send_status);CHKERRQ(ierr);
1832   }
1833   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1834   ierr = PetscFree(svalues);CHKERRQ(ierr);
1835 
1836   PetscFunctionReturn(0);
1837 }
1838 
1839 #undef __FUNCT__
1840 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1841 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1842 {
1843   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1844   PetscErrorCode ierr;
1845 
1846   PetscFunctionBegin;
1847   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1848   PetscFunctionReturn(0);
1849 }
1850 
1851 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1852 
1853 #undef __FUNCT__
1854 #define __FUNCT__ "MatEqual_MPIBAIJ"
1855 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1856 {
1857   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1858   Mat            a,b,c,d;
1859   PetscTruth     flg;
1860   PetscErrorCode ierr;
1861 
1862   PetscFunctionBegin;
1863   a = matA->A; b = matA->B;
1864   c = matB->A; d = matB->B;
1865 
1866   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1867   if (flg) {
1868     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1869   }
1870   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1871   PetscFunctionReturn(0);
1872 }
1873 
1874 #undef __FUNCT__
1875 #define __FUNCT__ "MatCopy_MPIBAIJ"
1876 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1877 {
1878   PetscErrorCode ierr;
1879   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1880   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1881 
1882   PetscFunctionBegin;
1883   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1884   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1885     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1886   } else {
1887     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1888     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1889   }
1890   PetscFunctionReturn(0);
1891 }
1892 
1893 #undef __FUNCT__
1894 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1895 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1896 {
1897   PetscErrorCode ierr;
1898 
1899   PetscFunctionBegin;
1900   ierr =  MatMPIBAIJSetPreallocation(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1901   PetscFunctionReturn(0);
1902 }
1903 
1904 #include "petscblaslapack.h"
1905 #undef __FUNCT__
1906 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1907 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1908 {
1909   PetscErrorCode ierr;
1910   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1911   PetscBLASInt   bnz,one=1;
1912   Mat_SeqBAIJ    *x,*y;
1913 
1914   PetscFunctionBegin;
1915   if (str == SAME_NONZERO_PATTERN) {
1916     PetscScalar alpha = a;
1917     x = (Mat_SeqBAIJ *)xx->A->data;
1918     y = (Mat_SeqBAIJ *)yy->A->data;
1919     bnz = (PetscBLASInt)x->nz;
1920     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1921     x = (Mat_SeqBAIJ *)xx->B->data;
1922     y = (Mat_SeqBAIJ *)yy->B->data;
1923     bnz = (PetscBLASInt)x->nz;
1924     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1925   } else {
1926     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1927   }
1928   PetscFunctionReturn(0);
1929 }
1930 
1931 #undef __FUNCT__
1932 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1933 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1934 {
1935   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1936   PetscErrorCode ierr;
1937 
1938   PetscFunctionBegin;
1939   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1940   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1941   PetscFunctionReturn(0);
1942 }
1943 
1944 #undef __FUNCT__
1945 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
1946 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1947 {
1948   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1949   PetscErrorCode ierr;
1950 
1951   PetscFunctionBegin;
1952   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1953   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1954   PetscFunctionReturn(0);
1955 }
1956 
1957 /* -------------------------------------------------------------------*/
1958 static struct _MatOps MatOps_Values = {
1959        MatSetValues_MPIBAIJ,
1960        MatGetRow_MPIBAIJ,
1961        MatRestoreRow_MPIBAIJ,
1962        MatMult_MPIBAIJ,
1963 /* 4*/ MatMultAdd_MPIBAIJ,
1964        MatMultTranspose_MPIBAIJ,
1965        MatMultTransposeAdd_MPIBAIJ,
1966        0,
1967        0,
1968        0,
1969 /*10*/ 0,
1970        0,
1971        0,
1972        0,
1973        MatTranspose_MPIBAIJ,
1974 /*15*/ MatGetInfo_MPIBAIJ,
1975        MatEqual_MPIBAIJ,
1976        MatGetDiagonal_MPIBAIJ,
1977        MatDiagonalScale_MPIBAIJ,
1978        MatNorm_MPIBAIJ,
1979 /*20*/ MatAssemblyBegin_MPIBAIJ,
1980        MatAssemblyEnd_MPIBAIJ,
1981        0,
1982        MatSetOption_MPIBAIJ,
1983        MatZeroEntries_MPIBAIJ,
1984 /*25*/ MatZeroRows_MPIBAIJ,
1985        0,
1986        0,
1987        0,
1988        0,
1989 /*30*/ MatSetUpPreallocation_MPIBAIJ,
1990        0,
1991        0,
1992        0,
1993        0,
1994 /*35*/ MatDuplicate_MPIBAIJ,
1995        0,
1996        0,
1997        0,
1998        0,
1999 /*40*/ MatAXPY_MPIBAIJ,
2000        MatGetSubMatrices_MPIBAIJ,
2001        MatIncreaseOverlap_MPIBAIJ,
2002        MatGetValues_MPIBAIJ,
2003        MatCopy_MPIBAIJ,
2004 /*45*/ 0,
2005        MatScale_MPIBAIJ,
2006        0,
2007        0,
2008        0,
2009 /*50*/ 0,
2010        0,
2011        0,
2012        0,
2013        0,
2014 /*55*/ 0,
2015        0,
2016        MatSetUnfactored_MPIBAIJ,
2017        0,
2018        MatSetValuesBlocked_MPIBAIJ,
2019 /*60*/ 0,
2020        MatDestroy_MPIBAIJ,
2021        MatView_MPIBAIJ,
2022        0,
2023        0,
2024 /*65*/ 0,
2025        0,
2026        0,
2027        0,
2028        0,
2029 /*70*/ MatGetRowMax_MPIBAIJ,
2030        0,
2031        0,
2032        0,
2033        0,
2034 /*75*/ 0,
2035        0,
2036        0,
2037        0,
2038        0,
2039 /*80*/ 0,
2040        0,
2041        0,
2042        0,
2043        MatLoad_MPIBAIJ,
2044 /*85*/ 0,
2045        0,
2046        0,
2047        0,
2048        0,
2049 /*90*/ 0,
2050        0,
2051        0,
2052        0,
2053        0,
2054 /*95*/ 0,
2055        0,
2056        0,
2057        0,
2058        0,
2059 /*100*/0,
2060        0,
2061        0,
2062        0,
2063        0,
2064 /*105*/0,
2065        MatRealPart_MPIBAIJ,
2066        MatImaginaryPart_MPIBAIJ};
2067 
2068 
2069 EXTERN_C_BEGIN
2070 #undef __FUNCT__
2071 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2072 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
2073 {
2074   PetscFunctionBegin;
2075   *a      = ((Mat_MPIBAIJ *)A->data)->A;
2076   *iscopy = PETSC_FALSE;
2077   PetscFunctionReturn(0);
2078 }
2079 EXTERN_C_END
2080 
2081 EXTERN_C_BEGIN
2082 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2083 EXTERN_C_END
2084 
2085 #undef __FUNCT__
2086 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2087 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
2088 {
2089   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)B->data;
2090   PetscInt       m = B->rmap.n/bs,cstart = baij->cstartbs, cend = baij->cendbs,j,nnz,i,d;
2091   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = baij->rstartbs,ii;
2092   const PetscInt *JJ;
2093   PetscScalar    *values;
2094   PetscErrorCode ierr;
2095 
2096   PetscFunctionBegin;
2097 #if defined(PETSC_OPT_g)
2098   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"Ii[0] must be 0 it is %D",Ii[0]);
2099 #endif
2100   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2101   o_nnz = d_nnz + m;
2102 
2103   for (i=0; i<m; i++) {
2104     nnz     = Ii[i+1]- Ii[i];
2105     JJ      = J + Ii[i];
2106     nnz_max = PetscMax(nnz_max,nnz);
2107 #if defined(PETSC_OPT_g)
2108     if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2109 #endif
2110     for (j=0; j<nnz; j++) {
2111       if (*JJ >= cstart) break;
2112       JJ++;
2113     }
2114     d = 0;
2115     for (; j<nnz; j++) {
2116       if (*JJ++ >= cend) break;
2117       d++;
2118     }
2119     d_nnz[i] = d;
2120     o_nnz[i] = nnz - d;
2121   }
2122   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2123   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2124 
2125   if (v) values = (PetscScalar*)v;
2126   else {
2127     ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2128     ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2129   }
2130 
2131   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2132   for (i=0; i<m; i++) {
2133     ii   = i + rstart;
2134     nnz  = Ii[i+1]- Ii[i];
2135     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2136   }
2137   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2138   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2139   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2140 
2141   if (!v) {
2142     ierr = PetscFree(values);CHKERRQ(ierr);
2143   }
2144   PetscFunctionReturn(0);
2145 }
2146 
2147 #undef __FUNCT__
2148 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2149 /*@C
2150    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2151    (the default parallel PETSc format).
2152 
2153    Collective on MPI_Comm
2154 
2155    Input Parameters:
2156 +  A - the matrix
2157 .  i - the indices into j for the start of each local row (starts with zero)
2158 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2159 -  v - optional values in the matrix
2160 
2161    Level: developer
2162 
2163 .keywords: matrix, aij, compressed row, sparse, parallel
2164 
2165 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2166 @*/
2167 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2168 {
2169   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
2170 
2171   PetscFunctionBegin;
2172   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2173   if (f) {
2174     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
2175   }
2176   PetscFunctionReturn(0);
2177 }
2178 
2179 EXTERN_C_BEGIN
2180 #undef __FUNCT__
2181 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2182 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2183 {
2184   Mat_MPIBAIJ    *b;
2185   PetscErrorCode ierr;
2186   PetscInt       i;
2187 
2188   PetscFunctionBegin;
2189   B->preallocated = PETSC_TRUE;
2190   ierr = PetscOptionsBegin(B->comm,B->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr);
2191     ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2192   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2193 
2194   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2195   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2196   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2197   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2198   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2199 
2200   B->rmap.bs  = bs;
2201   B->cmap.bs  = bs;
2202   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
2203   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
2204 
2205   if (d_nnz) {
2206     for (i=0; i<B->rmap.n/bs; i++) {
2207       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2208     }
2209   }
2210   if (o_nnz) {
2211     for (i=0; i<B->rmap.n/bs; i++) {
2212       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2213     }
2214   }
2215 
2216   b = (Mat_MPIBAIJ*)B->data;
2217   b->bs2 = bs*bs;
2218   b->mbs = B->rmap.n/bs;
2219   b->nbs = B->cmap.n/bs;
2220   b->Mbs = B->rmap.N/bs;
2221   b->Nbs = B->cmap.N/bs;
2222 
2223   for (i=0; i<=b->size; i++) {
2224     b->rangebs[i] = B->rmap.range[i]/bs;
2225   }
2226   b->rstartbs = B->rmap.rstart/bs;
2227   b->rendbs   = B->rmap.rend/bs;
2228   b->cstartbs = B->cmap.rstart/bs;
2229   b->cendbs   = B->cmap.rend/bs;
2230 
2231   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2232   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
2233   ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2234   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2235   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2236   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2237   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
2238   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2239   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2240   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2241 
2242   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
2243 
2244   PetscFunctionReturn(0);
2245 }
2246 EXTERN_C_END
2247 
2248 EXTERN_C_BEGIN
2249 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2250 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2251 EXTERN_C_END
2252 
2253 /*MC
2254    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2255 
2256    Options Database Keys:
2257 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2258 . -mat_block_size <bs> - set the blocksize used to store the matrix
2259 - -mat_use_hash_table <fact>
2260 
2261   Level: beginner
2262 
2263 .seealso: MatCreateMPIBAIJ
2264 M*/
2265 
2266 EXTERN_C_BEGIN
2267 #undef __FUNCT__
2268 #define __FUNCT__ "MatCreate_MPIBAIJ"
2269 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B)
2270 {
2271   Mat_MPIBAIJ    *b;
2272   PetscErrorCode ierr;
2273   PetscTruth     flg;
2274 
2275   PetscFunctionBegin;
2276   ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr);
2277   B->data = (void*)b;
2278 
2279 
2280   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2281   B->mapping    = 0;
2282   B->factor     = 0;
2283   B->assembled  = PETSC_FALSE;
2284 
2285   B->insertmode = NOT_SET_VALUES;
2286   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
2287   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
2288 
2289   /* build local table of row and column ownerships */
2290   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
2291 
2292   /* build cache for off array entries formed */
2293   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
2294   b->donotstash  = PETSC_FALSE;
2295   b->colmap      = PETSC_NULL;
2296   b->garray      = PETSC_NULL;
2297   b->roworiented = PETSC_TRUE;
2298 
2299 #if defined(PETSC_USE_MAT_SINGLE)
2300   /* stuff for MatSetValues_XXX in single precision */
2301   b->setvalueslen     = 0;
2302   b->setvaluescopy    = PETSC_NULL;
2303 #endif
2304 
2305   /* stuff used in block assembly */
2306   b->barray       = 0;
2307 
2308   /* stuff used for matrix vector multiply */
2309   b->lvec         = 0;
2310   b->Mvctx        = 0;
2311 
2312   /* stuff for MatGetRow() */
2313   b->rowindices   = 0;
2314   b->rowvalues    = 0;
2315   b->getrowactive = PETSC_FALSE;
2316 
2317   /* hash table stuff */
2318   b->ht           = 0;
2319   b->hd           = 0;
2320   b->ht_size      = 0;
2321   b->ht_flag      = PETSC_FALSE;
2322   b->ht_fact      = 0;
2323   b->ht_total_ct  = 0;
2324   b->ht_insert_ct = 0;
2325 
2326   ierr = PetscOptionsBegin(B->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
2327     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2328     if (flg) {
2329       PetscReal fact = 1.39;
2330       ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2331       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
2332       if (fact <= 1.0) fact = 1.39;
2333       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2334       ierr = PetscInfo1(0,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2335     }
2336   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2337 
2338   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2339                                      "MatStoreValues_MPIBAIJ",
2340                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2341   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2342                                      "MatRetrieveValues_MPIBAIJ",
2343                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2344   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2345                                      "MatGetDiagonalBlock_MPIBAIJ",
2346                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2347   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
2348                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
2349                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2350   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
2351 				     "MatMPIBAIJSetPreallocationCSR_MPIAIJ",
2352 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
2353   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
2354                                      "MatDiagonalScaleLocal_MPIBAIJ",
2355                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
2356   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
2357                                      "MatSetHashTableFactor_MPIBAIJ",
2358                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2359   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
2360   PetscFunctionReturn(0);
2361 }
2362 EXTERN_C_END
2363 
2364 /*MC
2365    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2366 
2367    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2368    and MATMPIBAIJ otherwise.
2369 
2370    Options Database Keys:
2371 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2372 
2373   Level: beginner
2374 
2375 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2376 M*/
2377 
2378 EXTERN_C_BEGIN
2379 #undef __FUNCT__
2380 #define __FUNCT__ "MatCreate_BAIJ"
2381 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A)
2382 {
2383   PetscErrorCode ierr;
2384   PetscMPIInt    size;
2385 
2386   PetscFunctionBegin;
2387   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2388   if (size == 1) {
2389     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
2390   } else {
2391     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
2392   }
2393   PetscFunctionReturn(0);
2394 }
2395 EXTERN_C_END
2396 
2397 #undef __FUNCT__
2398 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
2399 /*@C
2400    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
2401    (block compressed row).  For good matrix assembly performance
2402    the user should preallocate the matrix storage by setting the parameters
2403    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2404    performance can be increased by more than a factor of 50.
2405 
2406    Collective on Mat
2407 
2408    Input Parameters:
2409 +  A - the matrix
2410 .  bs   - size of blockk
2411 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2412            submatrix  (same for all local rows)
2413 .  d_nnz - array containing the number of block nonzeros in the various block rows
2414            of the in diagonal portion of the local (possibly different for each block
2415            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2416 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2417            submatrix (same for all local rows).
2418 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2419            off-diagonal portion of the local submatrix (possibly different for
2420            each block row) or PETSC_NULL.
2421 
2422    If the *_nnz parameter is given then the *_nz parameter is ignored
2423 
2424    Options Database Keys:
2425 +   -mat_block_size - size of the blocks to use
2426 -   -mat_use_hash_table <fact>
2427 
2428    Notes:
2429    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2430    than it must be used on all processors that share the object for that argument.
2431 
2432    Storage Information:
2433    For a square global matrix we define each processor's diagonal portion
2434    to be its local rows and the corresponding columns (a square submatrix);
2435    each processor's off-diagonal portion encompasses the remainder of the
2436    local matrix (a rectangular submatrix).
2437 
2438    The user can specify preallocated storage for the diagonal part of
2439    the local submatrix with either d_nz or d_nnz (not both).  Set
2440    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2441    memory allocation.  Likewise, specify preallocated storage for the
2442    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2443 
2444    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2445    the figure below we depict these three local rows and all columns (0-11).
2446 
2447 .vb
2448            0 1 2 3 4 5 6 7 8 9 10 11
2449           -------------------
2450    row 3  |  o o o d d d o o o o o o
2451    row 4  |  o o o d d d o o o o o o
2452    row 5  |  o o o d d d o o o o o o
2453           -------------------
2454 .ve
2455 
2456    Thus, any entries in the d locations are stored in the d (diagonal)
2457    submatrix, and any entries in the o locations are stored in the
2458    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2459    stored simply in the MATSEQBAIJ format for compressed row storage.
2460 
2461    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2462    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2463    In general, for PDE problems in which most nonzeros are near the diagonal,
2464    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2465    or you will get TERRIBLE performance; see the users' manual chapter on
2466    matrices.
2467 
2468    Level: intermediate
2469 
2470 .keywords: matrix, block, aij, compressed row, sparse, parallel
2471 
2472 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
2473 @*/
2474 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2475 {
2476   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2477 
2478   PetscFunctionBegin;
2479   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2480   if (f) {
2481     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2482   }
2483   PetscFunctionReturn(0);
2484 }
2485 
2486 #undef __FUNCT__
2487 #define __FUNCT__ "MatCreateMPIBAIJ"
2488 /*@C
2489    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
2490    (block compressed row).  For good matrix assembly performance
2491    the user should preallocate the matrix storage by setting the parameters
2492    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2493    performance can be increased by more than a factor of 50.
2494 
2495    Collective on MPI_Comm
2496 
2497    Input Parameters:
2498 +  comm - MPI communicator
2499 .  bs   - size of blockk
2500 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2501            This value should be the same as the local size used in creating the
2502            y vector for the matrix-vector product y = Ax.
2503 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2504            This value should be the same as the local size used in creating the
2505            x vector for the matrix-vector product y = Ax.
2506 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2507 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2508 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
2509            submatrix  (same for all local rows)
2510 .  d_nnz - array containing the number of nonzero blocks in the various block rows
2511            of the in diagonal portion of the local (possibly different for each block
2512            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2513 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
2514            submatrix (same for all local rows).
2515 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
2516            off-diagonal portion of the local submatrix (possibly different for
2517            each block row) or PETSC_NULL.
2518 
2519    Output Parameter:
2520 .  A - the matrix
2521 
2522    Options Database Keys:
2523 +   -mat_block_size - size of the blocks to use
2524 -   -mat_use_hash_table <fact>
2525 
2526    Notes:
2527    If the *_nnz parameter is given then the *_nz parameter is ignored
2528 
2529    A nonzero block is any block that as 1 or more nonzeros in it
2530 
2531    The user MUST specify either the local or global matrix dimensions
2532    (possibly both).
2533 
2534    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2535    than it must be used on all processors that share the object for that argument.
2536 
2537    Storage Information:
2538    For a square global matrix we define each processor's diagonal portion
2539    to be its local rows and the corresponding columns (a square submatrix);
2540    each processor's off-diagonal portion encompasses the remainder of the
2541    local matrix (a rectangular submatrix).
2542 
2543    The user can specify preallocated storage for the diagonal part of
2544    the local submatrix with either d_nz or d_nnz (not both).  Set
2545    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2546    memory allocation.  Likewise, specify preallocated storage for the
2547    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2548 
2549    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2550    the figure below we depict these three local rows and all columns (0-11).
2551 
2552 .vb
2553            0 1 2 3 4 5 6 7 8 9 10 11
2554           -------------------
2555    row 3  |  o o o d d d o o o o o o
2556    row 4  |  o o o d d d o o o o o o
2557    row 5  |  o o o d d d o o o o o o
2558           -------------------
2559 .ve
2560 
2561    Thus, any entries in the d locations are stored in the d (diagonal)
2562    submatrix, and any entries in the o locations are stored in the
2563    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2564    stored simply in the MATSEQBAIJ format for compressed row storage.
2565 
2566    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2567    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2568    In general, for PDE problems in which most nonzeros are near the diagonal,
2569    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2570    or you will get TERRIBLE performance; see the users' manual chapter on
2571    matrices.
2572 
2573    Level: intermediate
2574 
2575 .keywords: matrix, block, aij, compressed row, sparse, parallel
2576 
2577 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2578 @*/
2579 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2580 {
2581   PetscErrorCode ierr;
2582   PetscMPIInt    size;
2583 
2584   PetscFunctionBegin;
2585   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2586   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2587   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2588   if (size > 1) {
2589     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
2590     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2591   } else {
2592     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2593     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2594   }
2595   PetscFunctionReturn(0);
2596 }
2597 
2598 #undef __FUNCT__
2599 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
2600 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2601 {
2602   Mat            mat;
2603   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2604   PetscErrorCode ierr;
2605   PetscInt       len=0;
2606 
2607   PetscFunctionBegin;
2608   *newmat       = 0;
2609   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
2610   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
2611   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2612   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2613 
2614   mat->factor       = matin->factor;
2615   mat->preallocated = PETSC_TRUE;
2616   mat->assembled    = PETSC_TRUE;
2617   mat->insertmode   = NOT_SET_VALUES;
2618 
2619   a      = (Mat_MPIBAIJ*)mat->data;
2620   mat->rmap.bs  = matin->rmap.bs;
2621   a->bs2   = oldmat->bs2;
2622   a->mbs   = oldmat->mbs;
2623   a->nbs   = oldmat->nbs;
2624   a->Mbs   = oldmat->Mbs;
2625   a->Nbs   = oldmat->Nbs;
2626 
2627   ierr = PetscMapCopy(matin->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
2628   ierr = PetscMapCopy(matin->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
2629 
2630   a->size         = oldmat->size;
2631   a->rank         = oldmat->rank;
2632   a->donotstash   = oldmat->donotstash;
2633   a->roworiented  = oldmat->roworiented;
2634   a->rowindices   = 0;
2635   a->rowvalues    = 0;
2636   a->getrowactive = PETSC_FALSE;
2637   a->barray       = 0;
2638   a->rstartbs     = oldmat->rstartbs;
2639   a->rendbs       = oldmat->rendbs;
2640   a->cstartbs     = oldmat->cstartbs;
2641   a->cendbs       = oldmat->cendbs;
2642 
2643   /* hash table stuff */
2644   a->ht           = 0;
2645   a->hd           = 0;
2646   a->ht_size      = 0;
2647   a->ht_flag      = oldmat->ht_flag;
2648   a->ht_fact      = oldmat->ht_fact;
2649   a->ht_total_ct  = 0;
2650   a->ht_insert_ct = 0;
2651 
2652   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
2653   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2654   ierr = MatStashCreate_Private(matin->comm,matin->rmap.bs,&mat->bstash);CHKERRQ(ierr);
2655   if (oldmat->colmap) {
2656 #if defined (PETSC_USE_CTABLE)
2657   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2658 #else
2659   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2660   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2661   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2662 #endif
2663   } else a->colmap = 0;
2664 
2665   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2666     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2667     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2668     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2669   } else a->garray = 0;
2670 
2671   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2672   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2673   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2674   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2675 
2676   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2677   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2678   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2679   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2680   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2681   *newmat = mat;
2682 
2683   PetscFunctionReturn(0);
2684 }
2685 
2686 #include "petscsys.h"
2687 
2688 #undef __FUNCT__
2689 #define __FUNCT__ "MatLoad_MPIBAIJ"
2690 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2691 {
2692   Mat            A;
2693   PetscErrorCode ierr;
2694   int            fd;
2695   PetscInt       i,nz,j,rstart,rend;
2696   PetscScalar    *vals,*buf;
2697   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2698   MPI_Status     status;
2699   PetscMPIInt    rank,size,maxnz;
2700   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
2701   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
2702   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
2703   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
2704   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
2705   PetscInt       dcount,kmax,k,nzcount,tmp,mend;
2706 
2707   PetscFunctionBegin;
2708   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
2709     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2710   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2711 
2712   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2713   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2714   if (!rank) {
2715     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2716     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2717     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2718   }
2719 
2720   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2721   M = header[1]; N = header[2];
2722 
2723   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2724 
2725   /*
2726      This code adds extra rows to make sure the number of rows is
2727      divisible by the blocksize
2728   */
2729   Mbs        = M/bs;
2730   extra_rows = bs - M + bs*Mbs;
2731   if (extra_rows == bs) extra_rows = 0;
2732   else                  Mbs++;
2733   if (extra_rows && !rank) {
2734     ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2735   }
2736 
2737   /* determine ownership of all rows */
2738   mbs        = Mbs/size + ((Mbs % size) > rank);
2739   m          = mbs*bs;
2740   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
2741   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2742 
2743   /* process 0 needs enough room for process with most rows */
2744   if (!rank) {
2745     mmax = rowners[1];
2746     for (i=2; i<size; i++) {
2747       mmax = PetscMax(mmax,rowners[i]);
2748     }
2749     mmax*=bs;
2750   } else mmax = m;
2751 
2752   rowners[0] = 0;
2753   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
2754   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2755   rstart = rowners[rank];
2756   rend   = rowners[rank+1];
2757 
2758   /* distribute row lengths to all processors */
2759   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
2760   if (!rank) {
2761     mend = m;
2762     if (size == 1) mend = mend - extra_rows;
2763     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
2764     for (j=mend; j<m; j++) locrowlens[j] = 1;
2765     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2766     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2767     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2768     for (j=0; j<m; j++) {
2769       procsnz[0] += locrowlens[j];
2770     }
2771     for (i=1; i<size; i++) {
2772       mend = browners[i+1] - browners[i];
2773       if (i == size-1) mend = mend - extra_rows;
2774       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
2775       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
2776       /* calculate the number of nonzeros on each processor */
2777       for (j=0; j<browners[i+1]-browners[i]; j++) {
2778         procsnz[i] += rowlengths[j];
2779       }
2780       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2781     }
2782     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2783   } else {
2784     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2785   }
2786 
2787   if (!rank) {
2788     /* determine max buffer needed and allocate it */
2789     maxnz = procsnz[0];
2790     for (i=1; i<size; i++) {
2791       maxnz = PetscMax(maxnz,procsnz[i]);
2792     }
2793     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2794 
2795     /* read in my part of the matrix column indices  */
2796     nz     = procsnz[0];
2797     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2798     mycols = ibuf;
2799     if (size == 1)  nz -= extra_rows;
2800     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2801     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2802 
2803     /* read in every ones (except the last) and ship off */
2804     for (i=1; i<size-1; i++) {
2805       nz   = procsnz[i];
2806       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2807       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2808     }
2809     /* read in the stuff for the last proc */
2810     if (size != 1) {
2811       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2812       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2813       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2814       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2815     }
2816     ierr = PetscFree(cols);CHKERRQ(ierr);
2817   } else {
2818     /* determine buffer space needed for message */
2819     nz = 0;
2820     for (i=0; i<m; i++) {
2821       nz += locrowlens[i];
2822     }
2823     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2824     mycols = ibuf;
2825     /* receive message of column indices*/
2826     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2827     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2828     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2829   }
2830 
2831   /* loop over local rows, determining number of off diagonal entries */
2832   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2833   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2834   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2835   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2836   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2837   rowcount = 0; nzcount = 0;
2838   for (i=0; i<mbs; i++) {
2839     dcount  = 0;
2840     odcount = 0;
2841     for (j=0; j<bs; j++) {
2842       kmax = locrowlens[rowcount];
2843       for (k=0; k<kmax; k++) {
2844         tmp = mycols[nzcount++]/bs;
2845         if (!mask[tmp]) {
2846           mask[tmp] = 1;
2847           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2848           else masked1[dcount++] = tmp;
2849         }
2850       }
2851       rowcount++;
2852     }
2853 
2854     dlens[i]  = dcount;
2855     odlens[i] = odcount;
2856 
2857     /* zero out the mask elements we set */
2858     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2859     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2860   }
2861 
2862   /* create our matrix */
2863   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2864   ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2865   ierr = MatSetType(A,type);CHKERRQ(ierr)
2866   ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2867 
2868   /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */
2869   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2870 
2871   if (!rank) {
2872     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2873     /* read in my part of the matrix numerical values  */
2874     nz = procsnz[0];
2875     vals = buf;
2876     mycols = ibuf;
2877     if (size == 1)  nz -= extra_rows;
2878     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2879     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2880 
2881     /* insert into matrix */
2882     jj      = rstart*bs;
2883     for (i=0; i<m; i++) {
2884       ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2885       mycols += locrowlens[i];
2886       vals   += locrowlens[i];
2887       jj++;
2888     }
2889     /* read in other processors (except the last one) and ship out */
2890     for (i=1; i<size-1; i++) {
2891       nz   = procsnz[i];
2892       vals = buf;
2893       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2894       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2895     }
2896     /* the last proc */
2897     if (size != 1){
2898       nz   = procsnz[i] - extra_rows;
2899       vals = buf;
2900       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2901       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2902       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2903     }
2904     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2905   } else {
2906     /* receive numeric values */
2907     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2908 
2909     /* receive message of values*/
2910     vals   = buf;
2911     mycols = ibuf;
2912     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2913     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2914     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2915 
2916     /* insert into matrix */
2917     jj      = rstart*bs;
2918     for (i=0; i<m; i++) {
2919       ierr    = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2920       mycols += locrowlens[i];
2921       vals   += locrowlens[i];
2922       jj++;
2923     }
2924   }
2925   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2926   ierr = PetscFree(buf);CHKERRQ(ierr);
2927   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2928   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2929   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2930   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2931   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2932   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2933 
2934   *newmat = A;
2935   PetscFunctionReturn(0);
2936 }
2937 
2938 #undef __FUNCT__
2939 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
2940 /*@
2941    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2942 
2943    Input Parameters:
2944 .  mat  - the matrix
2945 .  fact - factor
2946 
2947    Collective on Mat
2948 
2949    Level: advanced
2950 
2951   Notes:
2952    This can also be set by the command line option: -mat_use_hash_table <fact>
2953 
2954 .keywords: matrix, hashtable, factor, HT
2955 
2956 .seealso: MatSetOption()
2957 @*/
2958 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2959 {
2960   PetscErrorCode ierr,(*f)(Mat,PetscReal);
2961 
2962   PetscFunctionBegin;
2963   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
2964   if (f) {
2965     ierr = (*f)(mat,fact);CHKERRQ(ierr);
2966   }
2967   PetscFunctionReturn(0);
2968 }
2969 
2970 EXTERN_C_BEGIN
2971 #undef __FUNCT__
2972 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
2973 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
2974 {
2975   Mat_MPIBAIJ *baij;
2976 
2977   PetscFunctionBegin;
2978   baij = (Mat_MPIBAIJ*)mat->data;
2979   baij->ht_fact = fact;
2980   PetscFunctionReturn(0);
2981 }
2982 EXTERN_C_END
2983 
2984 #undef __FUNCT__
2985 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
2986 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2987 {
2988   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2989   PetscFunctionBegin;
2990   *Ad     = a->A;
2991   *Ao     = a->B;
2992   *colmap = a->garray;
2993   PetscFunctionReturn(0);
2994 }
2995