xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 6024bd2c273c6b4ac03c2bd49d1968cdcd0f9df3)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4 
5 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
7 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
8 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
9 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
10 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
11 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
12 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
14 EXTERN PetscErrorCode MatPrintHelp_SeqBAIJ(Mat);
15 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,const PetscScalar*);
16 
17 /*  UGLY, ugly, ugly
18    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
19    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
20    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
21    converts the entries into single precision and then calls ..._MatScalar() to put them
22    into the single precision data structures.
23 */
24 #if defined(PETSC_USE_MAT_SINGLE)
25 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
26 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
27 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
28 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
29 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
30 #else
31 #define MatSetValuesBlocked_SeqBAIJ_MatScalar      MatSetValuesBlocked_SeqBAIJ
32 #define MatSetValues_MPIBAIJ_MatScalar             MatSetValues_MPIBAIJ
33 #define MatSetValuesBlocked_MPIBAIJ_MatScalar      MatSetValuesBlocked_MPIBAIJ
34 #define MatSetValues_MPIBAIJ_HT_MatScalar          MatSetValues_MPIBAIJ_HT
35 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar   MatSetValuesBlocked_MPIBAIJ_HT
36 #endif
37 
38 #undef __FUNCT__
39 #define __FUNCT__ "MatGetRowMax_MPIBAIJ"
40 PetscErrorCode MatGetRowMax_MPIBAIJ(Mat A,Vec v)
41 {
42   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
43   PetscErrorCode ierr;
44   PetscInt       i;
45   PetscScalar    *va,*vb;
46   Vec            vtmp;
47 
48   PetscFunctionBegin;
49 
50   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
51   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
52 
53   ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr);
54   ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr);
55   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
56 
57   for (i=0; i<A->m; i++){
58     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i];
59   }
60 
61   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
62   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
63   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
64 
65   PetscFunctionReturn(0);
66 }
67 
68 EXTERN_C_BEGIN
69 #undef __FUNCT__
70 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
71 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat)
72 {
73   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
74   PetscErrorCode ierr;
75 
76   PetscFunctionBegin;
77   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
78   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
79   PetscFunctionReturn(0);
80 }
81 EXTERN_C_END
82 
83 EXTERN_C_BEGIN
84 #undef __FUNCT__
85 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
86 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat)
87 {
88   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
89   PetscErrorCode ierr;
90 
91   PetscFunctionBegin;
92   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
93   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
94   PetscFunctionReturn(0);
95 }
96 EXTERN_C_END
97 
98 /*
99      Local utility routine that creates a mapping from the global column
100    number to the local number in the off-diagonal part of the local
101    storage of the matrix.  This is done in a non scable way since the
102    length of colmap equals the global matrix length.
103 */
104 #undef __FUNCT__
105 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
106 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
107 {
108   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
109   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
110   PetscErrorCode ierr;
111   PetscInt       nbs = B->nbs,i,bs=mat->bs;
112 
113   PetscFunctionBegin;
114 #if defined (PETSC_USE_CTABLE)
115   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
116   for (i=0; i<nbs; i++){
117     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
118   }
119 #else
120   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
121   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
122   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
123   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
124 #endif
125   PetscFunctionReturn(0);
126 }
127 
128 #define CHUNKSIZE  10
129 
130 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
131 { \
132  \
133     brow = row/bs;  \
134     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
135     rmax = aimax[brow]; nrow = ailen[brow]; \
136       bcol = col/bs; \
137       ridx = row % bs; cidx = col % bs; \
138       low = 0; high = nrow; \
139       while (high-low > 3) { \
140         t = (low+high)/2; \
141         if (rp[t] > bcol) high = t; \
142         else              low  = t; \
143       } \
144       for (_i=low; _i<high; _i++) { \
145         if (rp[_i] > bcol) break; \
146         if (rp[_i] == bcol) { \
147           bap  = ap +  bs2*_i + bs*cidx + ridx; \
148           if (addv == ADD_VALUES) *bap += value;  \
149           else                    *bap  = value;  \
150           goto a_noinsert; \
151         } \
152       } \
153       if (a->nonew == 1) goto a_noinsert; \
154       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
155       if (nrow >= rmax) { \
156         /* there is no extra room in row, therefore enlarge */ \
157         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
158         MatScalar *new_a; \
159  \
160         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
161  \
162         /* malloc new storage space */ \
163         len     = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); \
164         ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
165         new_j   = (PetscInt*)(new_a + bs2*new_nz); \
166         new_i   = new_j + new_nz; \
167  \
168         /* copy over old data into new slots */ \
169         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
170         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
171         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
172         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
173         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
174         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
175         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
176         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
177                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
178         /* free up old matrix storage */ \
179         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
180         if (!a->singlemalloc) { \
181           ierr = PetscFree(a->i);CHKERRQ(ierr); \
182           ierr = PetscFree(a->j);CHKERRQ(ierr);\
183         } \
184         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
185         a->singlemalloc = PETSC_TRUE; \
186  \
187         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
188         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
189         ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); \
190         a->maxnz += bs2*CHUNKSIZE; \
191         a->reallocs++; \
192         a->nz++; \
193       } \
194       N = nrow++ - 1;  \
195       /* shift up all the later entries in this row */ \
196       for (ii=N; ii>=_i; ii--) { \
197         rp[ii+1] = rp[ii]; \
198         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
199       } \
200       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
201       rp[_i]                      = bcol;  \
202       ap[bs2*_i + bs*cidx + ridx] = value;  \
203       a_noinsert:; \
204     ailen[brow] = nrow; \
205 }
206 
207 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
208 { \
209     brow = row/bs;  \
210     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
211     rmax = bimax[brow]; nrow = bilen[brow]; \
212       bcol = col/bs; \
213       ridx = row % bs; cidx = col % bs; \
214       low = 0; high = nrow; \
215       while (high-low > 3) { \
216         t = (low+high)/2; \
217         if (rp[t] > bcol) high = t; \
218         else              low  = t; \
219       } \
220       for (_i=low; _i<high; _i++) { \
221         if (rp[_i] > bcol) break; \
222         if (rp[_i] == bcol) { \
223           bap  = ap +  bs2*_i + bs*cidx + ridx; \
224           if (addv == ADD_VALUES) *bap += value;  \
225           else                    *bap  = value;  \
226           goto b_noinsert; \
227         } \
228       } \
229       if (b->nonew == 1) goto b_noinsert; \
230       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
231       if (nrow >= rmax) { \
232         /* there is no extra room in row, therefore enlarge */ \
233         PetscInt       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
234         MatScalar *new_a; \
235  \
236         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
237  \
238         /* malloc new storage space */ \
239         len     = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(PetscInt); \
240         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
241         new_j   = (PetscInt*)(new_a + bs2*new_nz); \
242         new_i   = new_j + new_nz; \
243  \
244         /* copy over old data into new slots */ \
245         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
246         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
247         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
248         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
249         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
250         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
251         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
252         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
253                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
254         /* free up old matrix storage */ \
255         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
256         if (!b->singlemalloc) { \
257           ierr = PetscFree(b->i);CHKERRQ(ierr); \
258           ierr = PetscFree(b->j);CHKERRQ(ierr); \
259         } \
260         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
261         b->singlemalloc = PETSC_TRUE; \
262  \
263         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
264         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
265         ierr = PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); \
266         b->maxnz += bs2*CHUNKSIZE; \
267         b->reallocs++; \
268         b->nz++; \
269       } \
270       N = nrow++ - 1;  \
271       /* shift up all the later entries in this row */ \
272       for (ii=N; ii>=_i; ii--) { \
273         rp[ii+1] = rp[ii]; \
274         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
275       } \
276       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
277       rp[_i]                      = bcol;  \
278       ap[bs2*_i + bs*cidx + ridx] = value;  \
279       b_noinsert:; \
280     bilen[brow] = nrow; \
281 }
282 
283 #if defined(PETSC_USE_MAT_SINGLE)
284 #undef __FUNCT__
285 #define __FUNCT__ "MatSetValues_MPIBAIJ"
286 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
287 {
288   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
289   PetscErrorCode ierr;
290   PetscInt       i,N = m*n;
291   MatScalar      *vsingle;
292 
293   PetscFunctionBegin;
294   if (N > b->setvalueslen) {
295     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
296     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
297     b->setvalueslen  = N;
298   }
299   vsingle = b->setvaluescopy;
300 
301   for (i=0; i<N; i++) {
302     vsingle[i] = v[i];
303   }
304   ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
305   PetscFunctionReturn(0);
306 }
307 
308 #undef __FUNCT__
309 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
310 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
311 {
312   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
313   PetscErrorCode ierr;
314   PetscInt       i,N = m*n*b->bs2;
315   MatScalar      *vsingle;
316 
317   PetscFunctionBegin;
318   if (N > b->setvalueslen) {
319     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
320     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
321     b->setvalueslen  = N;
322   }
323   vsingle = b->setvaluescopy;
324   for (i=0; i<N; i++) {
325     vsingle[i] = v[i];
326   }
327   ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
328   PetscFunctionReturn(0);
329 }
330 
331 #undef __FUNCT__
332 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
333 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
334 {
335   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
336   PetscErrorCode ierr;
337   PetscInt       i,N = m*n;
338   MatScalar      *vsingle;
339 
340   PetscFunctionBegin;
341   if (N > b->setvalueslen) {
342     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
343     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
344     b->setvalueslen  = N;
345   }
346   vsingle = b->setvaluescopy;
347   for (i=0; i<N; i++) {
348     vsingle[i] = v[i];
349   }
350   ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
351   PetscFunctionReturn(0);
352 }
353 
354 #undef __FUNCT__
355 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
356 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
357 {
358   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
359   PetscErrorCode ierr;
360   PetscInt       i,N = m*n*b->bs2;
361   MatScalar      *vsingle;
362 
363   PetscFunctionBegin;
364   if (N > b->setvalueslen) {
365     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
366     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
367     b->setvalueslen  = N;
368   }
369   vsingle = b->setvaluescopy;
370   for (i=0; i<N; i++) {
371     vsingle[i] = v[i];
372   }
373   ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
374   PetscFunctionReturn(0);
375 }
376 #endif
377 
378 #undef __FUNCT__
379 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
380 PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
381 {
382   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
383   MatScalar      value;
384   PetscTruth     roworiented = baij->roworiented;
385   PetscErrorCode ierr;
386   PetscInt       i,j,row,col;
387   PetscInt       rstart_orig=baij->rstart_bs;
388   PetscInt       rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
389   PetscInt       cend_orig=baij->cend_bs,bs=mat->bs;
390 
391   /* Some Variables required in the macro */
392   Mat            A = baij->A;
393   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
394   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
395   MatScalar      *aa=a->a;
396 
397   Mat            B = baij->B;
398   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
399   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
400   MatScalar      *ba=b->a;
401 
402   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
403   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
404   MatScalar      *ap,*bap;
405 
406   PetscFunctionBegin;
407   for (i=0; i<m; i++) {
408     if (im[i] < 0) continue;
409 #if defined(PETSC_USE_DEBUG)
410     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
411 #endif
412     if (im[i] >= rstart_orig && im[i] < rend_orig) {
413       row = im[i] - rstart_orig;
414       for (j=0; j<n; j++) {
415         if (in[j] >= cstart_orig && in[j] < cend_orig){
416           col = in[j] - cstart_orig;
417           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
418           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
419           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
420         } else if (in[j] < 0) continue;
421 #if defined(PETSC_USE_DEBUG)
422         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->N-1);}
423 #endif
424         else {
425           if (mat->was_assembled) {
426             if (!baij->colmap) {
427               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
428             }
429 #if defined (PETSC_USE_CTABLE)
430             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
431             col  = col - 1;
432 #else
433             col = baij->colmap[in[j]/bs] - 1;
434 #endif
435             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
436               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
437               col =  in[j];
438               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
439               B = baij->B;
440               b = (Mat_SeqBAIJ*)(B)->data;
441               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
442               ba=b->a;
443             } else col += in[j]%bs;
444           } else col = in[j];
445           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
446           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
447           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
448         }
449       }
450     } else {
451       if (!baij->donotstash) {
452         if (roworiented) {
453           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
454         } else {
455           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
456         }
457       }
458     }
459   }
460   PetscFunctionReturn(0);
461 }
462 
463 #undef __FUNCT__
464 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar"
465 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
466 {
467   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
468   const MatScalar *value;
469   MatScalar       *barray=baij->barray;
470   PetscTruth      roworiented = baij->roworiented;
471   PetscErrorCode  ierr;
472   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstart;
473   PetscInt        rend=baij->rend,cstart=baij->cstart,stepval;
474   PetscInt        cend=baij->cend,bs=mat->bs,bs2=baij->bs2;
475 
476   PetscFunctionBegin;
477   if(!barray) {
478     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
479     baij->barray = barray;
480   }
481 
482   if (roworiented) {
483     stepval = (n-1)*bs;
484   } else {
485     stepval = (m-1)*bs;
486   }
487   for (i=0; i<m; i++) {
488     if (im[i] < 0) continue;
489 #if defined(PETSC_USE_DEBUG)
490     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
491 #endif
492     if (im[i] >= rstart && im[i] < rend) {
493       row = im[i] - rstart;
494       for (j=0; j<n; j++) {
495         /* If NumCol = 1 then a copy is not required */
496         if ((roworiented) && (n == 1)) {
497           barray = (MatScalar*)v + i*bs2;
498         } else if((!roworiented) && (m == 1)) {
499           barray = (MatScalar*)v + j*bs2;
500         } else { /* Here a copy is required */
501           if (roworiented) {
502             value = v + i*(stepval+bs)*bs + j*bs;
503           } else {
504             value = v + j*(stepval+bs)*bs + i*bs;
505           }
506           for (ii=0; ii<bs; ii++,value+=stepval) {
507             for (jj=0; jj<bs; jj++) {
508               *barray++  = *value++;
509             }
510           }
511           barray -=bs2;
512         }
513 
514         if (in[j] >= cstart && in[j] < cend){
515           col  = in[j] - cstart;
516           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
517         }
518         else if (in[j] < 0) continue;
519 #if defined(PETSC_USE_DEBUG)
520         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
521 #endif
522         else {
523           if (mat->was_assembled) {
524             if (!baij->colmap) {
525               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
526             }
527 
528 #if defined(PETSC_USE_DEBUG)
529 #if defined (PETSC_USE_CTABLE)
530             { PetscInt data;
531               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
532               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
533             }
534 #else
535             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
536 #endif
537 #endif
538 #if defined (PETSC_USE_CTABLE)
539 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
540             col  = (col - 1)/bs;
541 #else
542             col = (baij->colmap[in[j]] - 1)/bs;
543 #endif
544             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
545               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
546               col =  in[j];
547             }
548           }
549           else col = in[j];
550           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
551         }
552       }
553     } else {
554       if (!baij->donotstash) {
555         if (roworiented) {
556           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
557         } else {
558           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
559         }
560       }
561     }
562   }
563   PetscFunctionReturn(0);
564 }
565 
566 #define HASH_KEY 0.6180339887
567 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
568 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
569 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
570 #undef __FUNCT__
571 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar"
572 PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
573 {
574   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
575   PetscTruth     roworiented = baij->roworiented;
576   PetscErrorCode ierr;
577   PetscInt       i,j,row,col;
578   PetscInt       rstart_orig=baij->rstart_bs;
579   PetscInt       rend_orig=baij->rend_bs,Nbs=baij->Nbs;
580   PetscInt       h1,key,size=baij->ht_size,bs=mat->bs,*HT=baij->ht,idx;
581   PetscReal      tmp;
582   MatScalar      **HD = baij->hd,value;
583 #if defined(PETSC_USE_DEBUG)
584   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
585 #endif
586 
587   PetscFunctionBegin;
588 
589   for (i=0; i<m; i++) {
590 #if defined(PETSC_USE_DEBUG)
591     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
592     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
593 #endif
594       row = im[i];
595     if (row >= rstart_orig && row < rend_orig) {
596       for (j=0; j<n; j++) {
597         col = in[j];
598         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
599         /* Look up PetscInto the Hash Table */
600         key = (row/bs)*Nbs+(col/bs)+1;
601         h1  = HASH(size,key,tmp);
602 
603 
604         idx = h1;
605 #if defined(PETSC_USE_DEBUG)
606         insert_ct++;
607         total_ct++;
608         if (HT[idx] != key) {
609           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
610           if (idx == size) {
611             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
612             if (idx == h1) {
613               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
614             }
615           }
616         }
617 #else
618         if (HT[idx] != key) {
619           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
620           if (idx == size) {
621             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
622             if (idx == h1) {
623               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
624             }
625           }
626         }
627 #endif
628         /* A HASH table entry is found, so insert the values at the correct address */
629         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
630         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
631       }
632     } else {
633       if (!baij->donotstash) {
634         if (roworiented) {
635           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
636         } else {
637           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
638         }
639       }
640     }
641   }
642 #if defined(PETSC_USE_DEBUG)
643   baij->ht_total_ct = total_ct;
644   baij->ht_insert_ct = insert_ct;
645 #endif
646   PetscFunctionReturn(0);
647 }
648 
649 #undef __FUNCT__
650 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar"
651 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
652 {
653   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
654   PetscTruth      roworiented = baij->roworiented;
655   PetscErrorCode  ierr;
656   PetscInt        i,j,ii,jj,row,col;
657   PetscInt        rstart=baij->rstart ;
658   PetscInt        rend=baij->rend,stepval,bs=mat->bs,bs2=baij->bs2;
659   PetscInt        h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
660   PetscReal       tmp;
661   MatScalar       **HD = baij->hd,*baij_a;
662   const MatScalar *v_t,*value;
663 #if defined(PETSC_USE_DEBUG)
664   PetscInt        total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
665 #endif
666 
667   PetscFunctionBegin;
668 
669   if (roworiented) {
670     stepval = (n-1)*bs;
671   } else {
672     stepval = (m-1)*bs;
673   }
674   for (i=0; i<m; i++) {
675 #if defined(PETSC_USE_DEBUG)
676     if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
677     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
678 #endif
679     row   = im[i];
680     v_t   = v + i*bs2;
681     if (row >= rstart && row < rend) {
682       for (j=0; j<n; j++) {
683         col = in[j];
684 
685         /* Look up into the Hash Table */
686         key = row*Nbs+col+1;
687         h1  = HASH(size,key,tmp);
688 
689         idx = h1;
690 #if defined(PETSC_USE_DEBUG)
691         total_ct++;
692         insert_ct++;
693        if (HT[idx] != key) {
694           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
695           if (idx == size) {
696             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
697             if (idx == h1) {
698               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
699             }
700           }
701         }
702 #else
703         if (HT[idx] != key) {
704           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
705           if (idx == size) {
706             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
707             if (idx == h1) {
708               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
709             }
710           }
711         }
712 #endif
713         baij_a = HD[idx];
714         if (roworiented) {
715           /*value = v + i*(stepval+bs)*bs + j*bs;*/
716           /* value = v + (i*(stepval+bs)+j)*bs; */
717           value = v_t;
718           v_t  += bs;
719           if (addv == ADD_VALUES) {
720             for (ii=0; ii<bs; ii++,value+=stepval) {
721               for (jj=ii; jj<bs2; jj+=bs) {
722                 baij_a[jj]  += *value++;
723               }
724             }
725           } else {
726             for (ii=0; ii<bs; ii++,value+=stepval) {
727               for (jj=ii; jj<bs2; jj+=bs) {
728                 baij_a[jj]  = *value++;
729               }
730             }
731           }
732         } else {
733           value = v + j*(stepval+bs)*bs + i*bs;
734           if (addv == ADD_VALUES) {
735             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
736               for (jj=0; jj<bs; jj++) {
737                 baij_a[jj]  += *value++;
738               }
739             }
740           } else {
741             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
742               for (jj=0; jj<bs; jj++) {
743                 baij_a[jj]  = *value++;
744               }
745             }
746           }
747         }
748       }
749     } else {
750       if (!baij->donotstash) {
751         if (roworiented) {
752           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
753         } else {
754           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
755         }
756       }
757     }
758   }
759 #if defined(PETSC_USE_DEBUG)
760   baij->ht_total_ct = total_ct;
761   baij->ht_insert_ct = insert_ct;
762 #endif
763   PetscFunctionReturn(0);
764 }
765 
766 #undef __FUNCT__
767 #define __FUNCT__ "MatGetValues_MPIBAIJ"
768 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
769 {
770   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
771   PetscErrorCode ierr;
772   PetscInt       bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
773   PetscInt       bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
774 
775   PetscFunctionBegin;
776   for (i=0; i<m; i++) {
777     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
778     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
779     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
780       row = idxm[i] - bsrstart;
781       for (j=0; j<n; j++) {
782         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
783         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
784         if (idxn[j] >= bscstart && idxn[j] < bscend){
785           col = idxn[j] - bscstart;
786           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
787         } else {
788           if (!baij->colmap) {
789             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
790           }
791 #if defined (PETSC_USE_CTABLE)
792           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
793           data --;
794 #else
795           data = baij->colmap[idxn[j]/bs]-1;
796 #endif
797           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
798           else {
799             col  = data + idxn[j]%bs;
800             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
801           }
802         }
803       }
804     } else {
805       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
806     }
807   }
808  PetscFunctionReturn(0);
809 }
810 
811 #undef __FUNCT__
812 #define __FUNCT__ "MatNorm_MPIBAIJ"
813 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
814 {
815   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
816   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
817   PetscErrorCode ierr;
818   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->bs,nz,row,col;
819   PetscReal      sum = 0.0;
820   MatScalar      *v;
821 
822   PetscFunctionBegin;
823   if (baij->size == 1) {
824     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
825   } else {
826     if (type == NORM_FROBENIUS) {
827       v = amat->a;
828       nz = amat->nz*bs2;
829       for (i=0; i<nz; i++) {
830 #if defined(PETSC_USE_COMPLEX)
831         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
832 #else
833         sum += (*v)*(*v); v++;
834 #endif
835       }
836       v = bmat->a;
837       nz = bmat->nz*bs2;
838       for (i=0; i<nz; i++) {
839 #if defined(PETSC_USE_COMPLEX)
840         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
841 #else
842         sum += (*v)*(*v); v++;
843 #endif
844       }
845       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
846       *nrm = sqrt(*nrm);
847     } else if (type == NORM_1) { /* max column sum */
848       PetscReal *tmp,*tmp2;
849       PetscInt  *jj,*garray=baij->garray,cstart=baij->cstart;
850       ierr = PetscMalloc((2*mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
851       tmp2 = tmp + mat->N;
852       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
853       v = amat->a; jj = amat->j;
854       for (i=0; i<amat->nz; i++) {
855         for (j=0; j<bs; j++){
856           col = bs*(cstart + *jj) + j; /* column index */
857           for (row=0; row<bs; row++){
858             tmp[col] += PetscAbsScalar(*v);  v++;
859           }
860         }
861         jj++;
862       }
863       v = bmat->a; jj = bmat->j;
864       for (i=0; i<bmat->nz; i++) {
865         for (j=0; j<bs; j++){
866           col = bs*garray[*jj] + j;
867           for (row=0; row<bs; row++){
868             tmp[col] += PetscAbsScalar(*v); v++;
869           }
870         }
871         jj++;
872       }
873       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
874       *nrm = 0.0;
875       for (j=0; j<mat->N; j++) {
876         if (tmp2[j] > *nrm) *nrm = tmp2[j];
877       }
878       ierr = PetscFree(tmp);CHKERRQ(ierr);
879     } else if (type == NORM_INFINITY) { /* max row sum */
880       PetscReal *sums;
881       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr)
882       sum = 0.0;
883       for (j=0; j<amat->mbs; j++) {
884         for (row=0; row<bs; row++) sums[row] = 0.0;
885         v = amat->a + bs2*amat->i[j];
886         nz = amat->i[j+1]-amat->i[j];
887         for (i=0; i<nz; i++) {
888           for (col=0; col<bs; col++){
889             for (row=0; row<bs; row++){
890               sums[row] += PetscAbsScalar(*v); v++;
891             }
892           }
893         }
894         v = bmat->a + bs2*bmat->i[j];
895         nz = bmat->i[j+1]-bmat->i[j];
896         for (i=0; i<nz; i++) {
897           for (col=0; col<bs; col++){
898             for (row=0; row<bs; row++){
899               sums[row] += PetscAbsScalar(*v); v++;
900             }
901           }
902         }
903         for (row=0; row<bs; row++){
904           if (sums[row] > sum) sum = sums[row];
905         }
906       }
907       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
908       ierr = PetscFree(sums);CHKERRQ(ierr);
909     } else {
910       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
911     }
912   }
913   PetscFunctionReturn(0);
914 }
915 
916 /*
917   Creates the hash table, and sets the table
918   This table is created only once.
919   If new entried need to be added to the matrix
920   then the hash table has to be destroyed and
921   recreated.
922 */
923 #undef __FUNCT__
924 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
925 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
926 {
927   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
928   Mat            A = baij->A,B=baij->B;
929   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
930   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
931   PetscErrorCode ierr;
932   PetscInt       size,bs2=baij->bs2,rstart=baij->rstart;
933   PetscInt       cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
934   PetscInt       *HT,key;
935   MatScalar      **HD;
936   PetscReal      tmp;
937 #if defined(PETSC_USE_DEBUG)
938   PetscInt       ct=0,max=0;
939 #endif
940 
941   PetscFunctionBegin;
942   baij->ht_size=(PetscInt)(factor*nz);
943   size = baij->ht_size;
944 
945   if (baij->ht) {
946     PetscFunctionReturn(0);
947   }
948 
949   /* Allocate Memory for Hash Table */
950   ierr     = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr);
951   baij->ht = (PetscInt*)(baij->hd + size);
952   HD       = baij->hd;
953   HT       = baij->ht;
954 
955 
956   ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr);
957 
958 
959   /* Loop Over A */
960   for (i=0; i<a->mbs; i++) {
961     for (j=ai[i]; j<ai[i+1]; j++) {
962       row = i+rstart;
963       col = aj[j]+cstart;
964 
965       key = row*Nbs + col + 1;
966       h1  = HASH(size,key,tmp);
967       for (k=0; k<size; k++){
968         if (!HT[(h1+k)%size]) {
969           HT[(h1+k)%size] = key;
970           HD[(h1+k)%size] = a->a + j*bs2;
971           break;
972 #if defined(PETSC_USE_DEBUG)
973         } else {
974           ct++;
975 #endif
976         }
977       }
978 #if defined(PETSC_USE_DEBUG)
979       if (k> max) max = k;
980 #endif
981     }
982   }
983   /* Loop Over B */
984   for (i=0; i<b->mbs; i++) {
985     for (j=bi[i]; j<bi[i+1]; j++) {
986       row = i+rstart;
987       col = garray[bj[j]];
988       key = row*Nbs + col + 1;
989       h1  = HASH(size,key,tmp);
990       for (k=0; k<size; k++){
991         if (!HT[(h1+k)%size]) {
992           HT[(h1+k)%size] = key;
993           HD[(h1+k)%size] = b->a + j*bs2;
994           break;
995 #if defined(PETSC_USE_DEBUG)
996         } else {
997           ct++;
998 #endif
999         }
1000       }
1001 #if defined(PETSC_USE_DEBUG)
1002       if (k> max) max = k;
1003 #endif
1004     }
1005   }
1006 
1007   /* Print Summary */
1008 #if defined(PETSC_USE_DEBUG)
1009   for (i=0,j=0; i<size; i++) {
1010     if (HT[i]) {j++;}
1011   }
1012   ierr = PetscLogInfo((0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max));CHKERRQ(ierr);
1013 #endif
1014   PetscFunctionReturn(0);
1015 }
1016 
1017 #undef __FUNCT__
1018 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
1019 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
1020 {
1021   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1022   PetscErrorCode ierr;
1023   PetscInt       nstash,reallocs;
1024   InsertMode     addv;
1025 
1026   PetscFunctionBegin;
1027   if (baij->donotstash) {
1028     PetscFunctionReturn(0);
1029   }
1030 
1031   /* make sure all processors are either in INSERTMODE or ADDMODE */
1032   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
1033   if (addv == (ADD_VALUES|INSERT_VALUES)) {
1034     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
1035   }
1036   mat->insertmode = addv; /* in case this processor had no cache */
1037 
1038   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
1039   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
1040   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
1041   ierr = PetscLogInfo((0,"MatAssemblyBegin_MPIBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
1042   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
1043   ierr = PetscLogInfo((0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
1044   PetscFunctionReturn(0);
1045 }
1046 
1047 #undef __FUNCT__
1048 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
1049 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
1050 {
1051   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
1052   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
1053   PetscErrorCode ierr;
1054   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
1055   PetscInt       *row,*col,other_disassembled;
1056   PetscTruth     r1,r2,r3;
1057   MatScalar      *val;
1058   InsertMode     addv = mat->insertmode;
1059   PetscMPIInt    n;
1060 
1061   PetscFunctionBegin;
1062   if (!baij->donotstash) {
1063     while (1) {
1064       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1065       if (!flg) break;
1066 
1067       for (i=0; i<n;) {
1068         /* Now identify the consecutive vals belonging to the same row */
1069         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1070         if (j < n) ncols = j-i;
1071         else       ncols = n-i;
1072         /* Now assemble all these values with a single function call */
1073         ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
1074         i = j;
1075       }
1076     }
1077     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
1078     /* Now process the block-stash. Since the values are stashed column-oriented,
1079        set the roworiented flag to column oriented, and after MatSetValues()
1080        restore the original flags */
1081     r1 = baij->roworiented;
1082     r2 = a->roworiented;
1083     r3 = b->roworiented;
1084     baij->roworiented = PETSC_FALSE;
1085     a->roworiented    = PETSC_FALSE;
1086     b->roworiented    = PETSC_FALSE;
1087     while (1) {
1088       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1089       if (!flg) break;
1090 
1091       for (i=0; i<n;) {
1092         /* Now identify the consecutive vals belonging to the same row */
1093         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1094         if (j < n) ncols = j-i;
1095         else       ncols = n-i;
1096         ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
1097         i = j;
1098       }
1099     }
1100     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
1101     baij->roworiented = r1;
1102     a->roworiented    = r2;
1103     b->roworiented    = r3;
1104   }
1105 
1106   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
1107   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
1108 
1109   /* determine if any processor has disassembled, if so we must
1110      also disassemble ourselfs, in order that we may reassemble. */
1111   /*
1112      if nonzero structure of submatrix B cannot change then we know that
1113      no processor disassembled thus we can skip this stuff
1114   */
1115   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
1116     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
1117     if (mat->was_assembled && !other_disassembled) {
1118       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
1119     }
1120   }
1121 
1122   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1123     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
1124   }
1125   b->compressedrow.use = PETSC_TRUE;
1126   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1127   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1128 
1129 #if defined(PETSC_USE_DEBUG)
1130   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1131     ierr = PetscLogInfo((0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct));CHKERRQ(ierr);
1132     baij->ht_total_ct  = 0;
1133     baij->ht_insert_ct = 0;
1134   }
1135 #endif
1136   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1137     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1138     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1139     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1140   }
1141 
1142   if (baij->rowvalues) {
1143     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1144     baij->rowvalues = 0;
1145   }
1146   PetscFunctionReturn(0);
1147 }
1148 
1149 #undef __FUNCT__
1150 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
1151 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1152 {
1153   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
1154   PetscErrorCode    ierr;
1155   PetscMPIInt       size = baij->size,rank = baij->rank;
1156   PetscInt          bs = mat->bs;
1157   PetscTruth        iascii,isdraw;
1158   PetscViewer       sviewer;
1159   PetscViewerFormat format;
1160 
1161   PetscFunctionBegin;
1162   /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */
1163   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1164   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1165   if (iascii) {
1166     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1167     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1168       MatInfo info;
1169       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
1170       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1171       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
1172               rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
1173               mat->bs,(PetscInt)info.memory);CHKERRQ(ierr);
1174       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1175       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
1176       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1177       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
1178       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1179       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1180       PetscFunctionReturn(0);
1181     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1182       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1183       PetscFunctionReturn(0);
1184     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1185       PetscFunctionReturn(0);
1186     }
1187   }
1188 
1189   if (isdraw) {
1190     PetscDraw       draw;
1191     PetscTruth isnull;
1192     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1193     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1194   }
1195 
1196   if (size == 1) {
1197     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
1198     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1199   } else {
1200     /* assemble the entire matrix onto first processor. */
1201     Mat         A;
1202     Mat_SeqBAIJ *Aloc;
1203     PetscInt    M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1204     MatScalar   *a;
1205 
1206     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1207     /* Perhaps this should be the type of mat? */
1208     if (!rank) {
1209       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
1210     } else {
1211       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
1212     }
1213     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1214     ierr = MatMPIBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1215     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1216 
1217     /* copy over the A part */
1218     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1219     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1220     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1221 
1222     for (i=0; i<mbs; i++) {
1223       rvals[0] = bs*(baij->rstart + i);
1224       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1225       for (j=ai[i]; j<ai[i+1]; j++) {
1226         col = (baij->cstart+aj[j])*bs;
1227         for (k=0; k<bs; k++) {
1228           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1229           col++; a += bs;
1230         }
1231       }
1232     }
1233     /* copy over the B part */
1234     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1235     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1236     for (i=0; i<mbs; i++) {
1237       rvals[0] = bs*(baij->rstart + i);
1238       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1239       for (j=ai[i]; j<ai[i+1]; j++) {
1240         col = baij->garray[aj[j]]*bs;
1241         for (k=0; k<bs; k++) {
1242           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1243           col++; a += bs;
1244         }
1245       }
1246     }
1247     ierr = PetscFree(rvals);CHKERRQ(ierr);
1248     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1249     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1250     /*
1251        Everyone has to call to draw the matrix since the graphics waits are
1252        synchronized across all processors that share the PetscDraw object
1253     */
1254     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1255     if (!rank) {
1256       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1257       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1258     }
1259     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1260     ierr = MatDestroy(A);CHKERRQ(ierr);
1261   }
1262   PetscFunctionReturn(0);
1263 }
1264 
1265 #undef __FUNCT__
1266 #define __FUNCT__ "MatView_MPIBAIJ"
1267 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1268 {
1269   PetscErrorCode ierr;
1270   PetscTruth     iascii,isdraw,issocket,isbinary;
1271 
1272   PetscFunctionBegin;
1273   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1274   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1275   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1276   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1277   if (iascii || isdraw || issocket || isbinary) {
1278     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1279   } else {
1280     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1281   }
1282   PetscFunctionReturn(0);
1283 }
1284 
1285 #undef __FUNCT__
1286 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1287 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1288 {
1289   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1290   PetscErrorCode ierr;
1291 
1292   PetscFunctionBegin;
1293 #if defined(PETSC_USE_LOG)
1294   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N);
1295 #endif
1296   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1297   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1298   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1299   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1300   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1301 #if defined (PETSC_USE_CTABLE)
1302   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1303 #else
1304   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1305 #endif
1306   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1307   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1308   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1309   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1310   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1311   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
1312 #if defined(PETSC_USE_MAT_SINGLE)
1313   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
1314 #endif
1315   ierr = PetscFree(baij);CHKERRQ(ierr);
1316 
1317   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1318   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1319   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1320   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1321   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1322   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1323   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1324   PetscFunctionReturn(0);
1325 }
1326 
1327 #undef __FUNCT__
1328 #define __FUNCT__ "MatMult_MPIBAIJ"
1329 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1330 {
1331   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1332   PetscErrorCode ierr;
1333   PetscInt       nt;
1334 
1335   PetscFunctionBegin;
1336   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1337   if (nt != A->n) {
1338     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1339   }
1340   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1341   if (nt != A->m) {
1342     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1343   }
1344   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1345   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1346   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1347   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1348   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1349   PetscFunctionReturn(0);
1350 }
1351 
1352 #undef __FUNCT__
1353 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1354 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1355 {
1356   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1357   PetscErrorCode ierr;
1358 
1359   PetscFunctionBegin;
1360   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1361   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1362   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1363   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1364   PetscFunctionReturn(0);
1365 }
1366 
1367 #undef __FUNCT__
1368 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1369 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1370 {
1371   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1372   PetscErrorCode ierr;
1373   PetscTruth     merged;
1374 
1375   PetscFunctionBegin;
1376   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1377   /* do nondiagonal part */
1378   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1379   if (!merged) {
1380     /* send it on its way */
1381     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1382     /* do local part */
1383     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1384     /* receive remote parts: note this assumes the values are not actually */
1385     /* inserted in yy until the next line */
1386     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1387   } else {
1388     /* do local part */
1389     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1390     /* send it on its way */
1391     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1392     /* values actually were received in the Begin() but we need to call this nop */
1393     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1394   }
1395   PetscFunctionReturn(0);
1396 }
1397 
1398 #undef __FUNCT__
1399 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1400 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1401 {
1402   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1403   PetscErrorCode ierr;
1404 
1405   PetscFunctionBegin;
1406   /* do nondiagonal part */
1407   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1408   /* send it on its way */
1409   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1410   /* do local part */
1411   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1412   /* receive remote parts: note this assumes the values are not actually */
1413   /* inserted in yy until the next line, which is true for my implementation*/
1414   /* but is not perhaps always true. */
1415   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1416   PetscFunctionReturn(0);
1417 }
1418 
1419 /*
1420   This only works correctly for square matrices where the subblock A->A is the
1421    diagonal block
1422 */
1423 #undef __FUNCT__
1424 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1425 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1426 {
1427   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1428   PetscErrorCode ierr;
1429 
1430   PetscFunctionBegin;
1431   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1432   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1433   PetscFunctionReturn(0);
1434 }
1435 
1436 #undef __FUNCT__
1437 #define __FUNCT__ "MatScale_MPIBAIJ"
1438 PetscErrorCode MatScale_MPIBAIJ(const PetscScalar *aa,Mat A)
1439 {
1440   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1441   PetscErrorCode ierr;
1442 
1443   PetscFunctionBegin;
1444   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1445   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1446   PetscFunctionReturn(0);
1447 }
1448 
1449 #undef __FUNCT__
1450 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1451 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1452 {
1453   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1454   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1455   PetscErrorCode ierr;
1456   PetscInt       bs = matin->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1457   PetscInt       nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1458   PetscInt       *cmap,*idx_p,cstart = mat->cstart;
1459 
1460   PetscFunctionBegin;
1461   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1462   mat->getrowactive = PETSC_TRUE;
1463 
1464   if (!mat->rowvalues && (idx || v)) {
1465     /*
1466         allocate enough space to hold information from the longest row.
1467     */
1468     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1469     PetscInt     max = 1,mbs = mat->mbs,tmp;
1470     for (i=0; i<mbs; i++) {
1471       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1472       if (max < tmp) { max = tmp; }
1473     }
1474     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1475     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1476   }
1477 
1478   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1479   lrow = row - brstart;
1480 
1481   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1482   if (!v)   {pvA = 0; pvB = 0;}
1483   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1484   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1485   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1486   nztot = nzA + nzB;
1487 
1488   cmap  = mat->garray;
1489   if (v  || idx) {
1490     if (nztot) {
1491       /* Sort by increasing column numbers, assuming A and B already sorted */
1492       PetscInt imark = -1;
1493       if (v) {
1494         *v = v_p = mat->rowvalues;
1495         for (i=0; i<nzB; i++) {
1496           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1497           else break;
1498         }
1499         imark = i;
1500         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1501         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1502       }
1503       if (idx) {
1504         *idx = idx_p = mat->rowindices;
1505         if (imark > -1) {
1506           for (i=0; i<imark; i++) {
1507             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1508           }
1509         } else {
1510           for (i=0; i<nzB; i++) {
1511             if (cmap[cworkB[i]/bs] < cstart)
1512               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1513             else break;
1514           }
1515           imark = i;
1516         }
1517         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1518         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1519       }
1520     } else {
1521       if (idx) *idx = 0;
1522       if (v)   *v   = 0;
1523     }
1524   }
1525   *nz = nztot;
1526   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1527   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1528   PetscFunctionReturn(0);
1529 }
1530 
1531 #undef __FUNCT__
1532 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1533 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1534 {
1535   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1536 
1537   PetscFunctionBegin;
1538   if (!baij->getrowactive) {
1539     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1540   }
1541   baij->getrowactive = PETSC_FALSE;
1542   PetscFunctionReturn(0);
1543 }
1544 
1545 #undef __FUNCT__
1546 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1547 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1548 {
1549   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1550   PetscErrorCode ierr;
1551 
1552   PetscFunctionBegin;
1553   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1554   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1555   PetscFunctionReturn(0);
1556 }
1557 
1558 #undef __FUNCT__
1559 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1560 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1561 {
1562   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1563   Mat            A = a->A,B = a->B;
1564   PetscErrorCode ierr;
1565   PetscReal      isend[5],irecv[5];
1566 
1567   PetscFunctionBegin;
1568   info->block_size     = (PetscReal)matin->bs;
1569   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1570   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1571   isend[3] = info->memory;  isend[4] = info->mallocs;
1572   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1573   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1574   isend[3] += info->memory;  isend[4] += info->mallocs;
1575   if (flag == MAT_LOCAL) {
1576     info->nz_used      = isend[0];
1577     info->nz_allocated = isend[1];
1578     info->nz_unneeded  = isend[2];
1579     info->memory       = isend[3];
1580     info->mallocs      = isend[4];
1581   } else if (flag == MAT_GLOBAL_MAX) {
1582     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1583     info->nz_used      = irecv[0];
1584     info->nz_allocated = irecv[1];
1585     info->nz_unneeded  = irecv[2];
1586     info->memory       = irecv[3];
1587     info->mallocs      = irecv[4];
1588   } else if (flag == MAT_GLOBAL_SUM) {
1589     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1590     info->nz_used      = irecv[0];
1591     info->nz_allocated = irecv[1];
1592     info->nz_unneeded  = irecv[2];
1593     info->memory       = irecv[3];
1594     info->mallocs      = irecv[4];
1595   } else {
1596     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1597   }
1598   info->rows_global       = (PetscReal)A->M;
1599   info->columns_global    = (PetscReal)A->N;
1600   info->rows_local        = (PetscReal)A->m;
1601   info->columns_local     = (PetscReal)A->N;
1602   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1603   info->fill_ratio_needed = 0;
1604   info->factor_mallocs    = 0;
1605   PetscFunctionReturn(0);
1606 }
1607 
1608 #undef __FUNCT__
1609 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1610 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op)
1611 {
1612   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1613   PetscErrorCode ierr;
1614 
1615   PetscFunctionBegin;
1616   switch (op) {
1617   case MAT_NO_NEW_NONZERO_LOCATIONS:
1618   case MAT_YES_NEW_NONZERO_LOCATIONS:
1619   case MAT_COLUMNS_UNSORTED:
1620   case MAT_COLUMNS_SORTED:
1621   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1622   case MAT_KEEP_ZEROED_ROWS:
1623   case MAT_NEW_NONZERO_LOCATION_ERR:
1624     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1625     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1626     break;
1627   case MAT_ROW_ORIENTED:
1628     a->roworiented = PETSC_TRUE;
1629     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1630     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1631     break;
1632   case MAT_ROWS_SORTED:
1633   case MAT_ROWS_UNSORTED:
1634   case MAT_YES_NEW_DIAGONALS:
1635     ierr = PetscLogInfo((A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"));CHKERRQ(ierr);
1636     break;
1637   case MAT_COLUMN_ORIENTED:
1638     a->roworiented = PETSC_FALSE;
1639     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1640     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1641     break;
1642   case MAT_IGNORE_OFF_PROC_ENTRIES:
1643     a->donotstash = PETSC_TRUE;
1644     break;
1645   case MAT_NO_NEW_DIAGONALS:
1646     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1647   case MAT_USE_HASH_TABLE:
1648     a->ht_flag = PETSC_TRUE;
1649     break;
1650   case MAT_SYMMETRIC:
1651   case MAT_STRUCTURALLY_SYMMETRIC:
1652   case MAT_HERMITIAN:
1653   case MAT_SYMMETRY_ETERNAL:
1654     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1655     break;
1656   case MAT_NOT_SYMMETRIC:
1657   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1658   case MAT_NOT_HERMITIAN:
1659   case MAT_NOT_SYMMETRY_ETERNAL:
1660     break;
1661   default:
1662     SETERRQ(PETSC_ERR_SUP,"unknown option");
1663   }
1664   PetscFunctionReturn(0);
1665 }
1666 
1667 #undef __FUNCT__
1668 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1669 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1670 {
1671   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1672   Mat_SeqBAIJ    *Aloc;
1673   Mat            B;
1674   PetscErrorCode ierr;
1675   PetscInt       M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col;
1676   PetscInt       bs=A->bs,mbs=baij->mbs;
1677   MatScalar      *a;
1678 
1679   PetscFunctionBegin;
1680   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1681   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1682   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1683   ierr = MatMPIBAIJSetPreallocation(B,A->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1684 
1685   /* copy over the A part */
1686   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1687   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1688   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1689 
1690   for (i=0; i<mbs; i++) {
1691     rvals[0] = bs*(baij->rstart + i);
1692     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1693     for (j=ai[i]; j<ai[i+1]; j++) {
1694       col = (baij->cstart+aj[j])*bs;
1695       for (k=0; k<bs; k++) {
1696         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1697         col++; a += bs;
1698       }
1699     }
1700   }
1701   /* copy over the B part */
1702   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1703   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1704   for (i=0; i<mbs; i++) {
1705     rvals[0] = bs*(baij->rstart + i);
1706     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1707     for (j=ai[i]; j<ai[i+1]; j++) {
1708       col = baij->garray[aj[j]]*bs;
1709       for (k=0; k<bs; k++) {
1710         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1711         col++; a += bs;
1712       }
1713     }
1714   }
1715   ierr = PetscFree(rvals);CHKERRQ(ierr);
1716   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1717   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1718 
1719   if (matout) {
1720     *matout = B;
1721   } else {
1722     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1723   }
1724   PetscFunctionReturn(0);
1725 }
1726 
1727 #undef __FUNCT__
1728 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1729 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1730 {
1731   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1732   Mat            a = baij->A,b = baij->B;
1733   PetscErrorCode ierr;
1734   PetscInt       s1,s2,s3;
1735 
1736   PetscFunctionBegin;
1737   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1738   if (rr) {
1739     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1740     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1741     /* Overlap communication with computation. */
1742     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1743   }
1744   if (ll) {
1745     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1746     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1747     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1748   }
1749   /* scale  the diagonal block */
1750   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1751 
1752   if (rr) {
1753     /* Do a scatter end and then right scale the off-diagonal block */
1754     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1755     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1756   }
1757 
1758   PetscFunctionReturn(0);
1759 }
1760 
1761 #undef __FUNCT__
1762 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1763 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,IS is,const PetscScalar *diag)
1764 {
1765   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1766   PetscErrorCode ierr;
1767   PetscMPIInt    imdex,size = l->size,n,rank = l->rank;
1768   PetscInt       i,N,*rows,*owners = l->rowners;
1769   PetscInt       *nprocs,j,idx,nsends,row;
1770   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
1771   PetscInt       *rvalues,tag = A->tag,count,base,slen,*source,lastidx = -1;
1772   PetscInt       *lens,*lrows,*values,bs=A->bs,rstart_bs=l->rstart_bs;
1773   MPI_Comm       comm = A->comm;
1774   MPI_Request    *send_waits,*recv_waits;
1775   MPI_Status     recv_status,*send_status;
1776   IS             istmp;
1777 #if defined(PETSC_DEBUG)
1778   PetscTruth     found = PETSC_FALSE;
1779 #endif
1780 
1781   PetscFunctionBegin;
1782   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
1783   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1784 
1785   /*  first count number of contributors to each processor */
1786   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1787   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1788   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1789   j = 0;
1790   for (i=0; i<N; i++) {
1791     if (lastidx > (idx = rows[i])) j = 0;
1792     lastidx = idx;
1793     for (; j<size; j++) {
1794       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1795         nprocs[2*j]++;
1796         nprocs[2*j+1] = 1;
1797         owner[i] = j;
1798 #if defined(PETSC_DEBUG)
1799         found = PETSC_TRUE;
1800 #endif
1801         break;
1802       }
1803     }
1804 #if defined(PETSC_DEBUG)
1805     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1806     found = PETSC_FALSE;
1807 #endif
1808   }
1809   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1810 
1811   /* inform other processors of number of messages and max length*/
1812   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1813 
1814   /* post receives:   */
1815   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1816   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1817   for (i=0; i<nrecvs; i++) {
1818     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1819   }
1820 
1821   /* do sends:
1822      1) starts[i] gives the starting index in svalues for stuff going to
1823      the ith processor
1824   */
1825   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1826   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1827   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1828   starts[0]  = 0;
1829   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1830   for (i=0; i<N; i++) {
1831     svalues[starts[owner[i]]++] = rows[i];
1832   }
1833   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
1834 
1835   starts[0] = 0;
1836   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1837   count = 0;
1838   for (i=0; i<size; i++) {
1839     if (nprocs[2*i+1]) {
1840       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1841     }
1842   }
1843   ierr = PetscFree(starts);CHKERRQ(ierr);
1844 
1845   base = owners[rank]*bs;
1846 
1847   /*  wait on receives */
1848   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
1849   source = lens + nrecvs;
1850   count  = nrecvs; slen = 0;
1851   while (count) {
1852     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1853     /* unpack receives into our local space */
1854     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1855     source[imdex]  = recv_status.MPI_SOURCE;
1856     lens[imdex]    = n;
1857     slen          += n;
1858     count--;
1859   }
1860   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1861 
1862   /* move the data into the send scatter */
1863   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1864   count = 0;
1865   for (i=0; i<nrecvs; i++) {
1866     values = rvalues + i*nmax;
1867     for (j=0; j<lens[i]; j++) {
1868       lrows[count++] = values[j] - base;
1869     }
1870   }
1871   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1872   ierr = PetscFree(lens);CHKERRQ(ierr);
1873   ierr = PetscFree(owner);CHKERRQ(ierr);
1874   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1875 
1876   /* actually zap the local rows */
1877   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1878   ierr = PetscLogObjectParent(A,istmp);CHKERRQ(ierr);
1879 
1880   /*
1881         Zero the required rows. If the "diagonal block" of the matrix
1882      is square and the user wishes to set the diagonal we use seperate
1883      code so that MatSetValues() is not called for each diagonal allocating
1884      new memory, thus calling lots of mallocs and slowing things down.
1885 
1886        Contributed by: Mathew Knepley
1887   */
1888   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1889   ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr);
1890   if (diag && (l->A->M == l->A->N)) {
1891     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr);
1892   } else if (diag) {
1893     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1894     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1895       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1896 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1897     }
1898     for (i=0; i<slen; i++) {
1899       row  = lrows[i] + rstart_bs;
1900       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1901     }
1902     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1903     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1904   } else {
1905     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1906   }
1907 
1908   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1909   ierr = PetscFree(lrows);CHKERRQ(ierr);
1910 
1911   /* wait on sends */
1912   if (nsends) {
1913     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1914     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1915     ierr = PetscFree(send_status);CHKERRQ(ierr);
1916   }
1917   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1918   ierr = PetscFree(svalues);CHKERRQ(ierr);
1919 
1920   PetscFunctionReturn(0);
1921 }
1922 
1923 #undef __FUNCT__
1924 #define __FUNCT__ "MatPrintHelp_MPIBAIJ"
1925 PetscErrorCode MatPrintHelp_MPIBAIJ(Mat A)
1926 {
1927   Mat_MPIBAIJ       *a   = (Mat_MPIBAIJ*)A->data;
1928   MPI_Comm          comm = A->comm;
1929   static PetscTruth called = PETSC_FALSE;
1930   PetscErrorCode    ierr;
1931 
1932   PetscFunctionBegin;
1933   if (!a->rank) {
1934     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1935   }
1936   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1937   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1938   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1939   PetscFunctionReturn(0);
1940 }
1941 
1942 #undef __FUNCT__
1943 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1944 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1945 {
1946   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1947   PetscErrorCode ierr;
1948 
1949   PetscFunctionBegin;
1950   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1951   PetscFunctionReturn(0);
1952 }
1953 
1954 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1955 
1956 #undef __FUNCT__
1957 #define __FUNCT__ "MatEqual_MPIBAIJ"
1958 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1959 {
1960   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1961   Mat            a,b,c,d;
1962   PetscTruth     flg;
1963   PetscErrorCode ierr;
1964 
1965   PetscFunctionBegin;
1966   a = matA->A; b = matA->B;
1967   c = matB->A; d = matB->B;
1968 
1969   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1970   if (flg) {
1971     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1972   }
1973   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1974   PetscFunctionReturn(0);
1975 }
1976 
1977 
1978 #undef __FUNCT__
1979 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1980 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1981 {
1982   PetscErrorCode ierr;
1983 
1984   PetscFunctionBegin;
1985   ierr =  MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1986   PetscFunctionReturn(0);
1987 }
1988 
1989 /* -------------------------------------------------------------------*/
1990 static struct _MatOps MatOps_Values = {
1991        MatSetValues_MPIBAIJ,
1992        MatGetRow_MPIBAIJ,
1993        MatRestoreRow_MPIBAIJ,
1994        MatMult_MPIBAIJ,
1995 /* 4*/ MatMultAdd_MPIBAIJ,
1996        MatMultTranspose_MPIBAIJ,
1997        MatMultTransposeAdd_MPIBAIJ,
1998        0,
1999        0,
2000        0,
2001 /*10*/ 0,
2002        0,
2003        0,
2004        0,
2005        MatTranspose_MPIBAIJ,
2006 /*15*/ MatGetInfo_MPIBAIJ,
2007        MatEqual_MPIBAIJ,
2008        MatGetDiagonal_MPIBAIJ,
2009        MatDiagonalScale_MPIBAIJ,
2010        MatNorm_MPIBAIJ,
2011 /*20*/ MatAssemblyBegin_MPIBAIJ,
2012        MatAssemblyEnd_MPIBAIJ,
2013        0,
2014        MatSetOption_MPIBAIJ,
2015        MatZeroEntries_MPIBAIJ,
2016 /*25*/ MatZeroRows_MPIBAIJ,
2017        0,
2018        0,
2019        0,
2020        0,
2021 /*30*/ MatSetUpPreallocation_MPIBAIJ,
2022        0,
2023        0,
2024        0,
2025        0,
2026 /*35*/ MatDuplicate_MPIBAIJ,
2027        0,
2028        0,
2029        0,
2030        0,
2031 /*40*/ 0,
2032        MatGetSubMatrices_MPIBAIJ,
2033        MatIncreaseOverlap_MPIBAIJ,
2034        MatGetValues_MPIBAIJ,
2035        0,
2036 /*45*/ MatPrintHelp_MPIBAIJ,
2037        MatScale_MPIBAIJ,
2038        0,
2039        0,
2040        0,
2041 /*50*/ 0,
2042        0,
2043        0,
2044        0,
2045        0,
2046 /*55*/ 0,
2047        0,
2048        MatSetUnfactored_MPIBAIJ,
2049        0,
2050        MatSetValuesBlocked_MPIBAIJ,
2051 /*60*/ 0,
2052        MatDestroy_MPIBAIJ,
2053        MatView_MPIBAIJ,
2054        MatGetPetscMaps_Petsc,
2055        0,
2056 /*65*/ 0,
2057        0,
2058        0,
2059        0,
2060        0,
2061 /*70*/ MatGetRowMax_MPIBAIJ,
2062        0,
2063        0,
2064        0,
2065        0,
2066 /*75*/ 0,
2067        0,
2068        0,
2069        0,
2070        0,
2071 /*80*/ 0,
2072        0,
2073        0,
2074        0,
2075        MatLoad_MPIBAIJ,
2076 /*85*/ 0,
2077        0,
2078        0,
2079        0,
2080        0,
2081 /*90*/ 0,
2082        0,
2083        0,
2084        0,
2085        0,
2086 /*95*/ 0,
2087        0,
2088        0,
2089        0};
2090 
2091 
2092 EXTERN_C_BEGIN
2093 #undef __FUNCT__
2094 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2095 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
2096 {
2097   PetscFunctionBegin;
2098   *a      = ((Mat_MPIBAIJ *)A->data)->A;
2099   *iscopy = PETSC_FALSE;
2100   PetscFunctionReturn(0);
2101 }
2102 EXTERN_C_END
2103 
2104 EXTERN_C_BEGIN
2105 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
2106 EXTERN_C_END
2107 
2108 #undef __FUNCT__
2109 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2110 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2111 {
2112   Mat_MPIBAIJ    *b  = (Mat_MPIBAIJ *)B->data;
2113   PetscInt       m = B->m/bs,cstart = b->cstart, cend = b->cend,j,nnz,i,d;
2114   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii;
2115   const PetscInt *JJ;
2116   PetscScalar    *values;
2117   PetscErrorCode ierr;
2118 
2119   PetscFunctionBegin;
2120 #if defined(PETSC_OPT_g)
2121   if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]);
2122 #endif
2123   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2124   o_nnz = d_nnz + m;
2125 
2126   for (i=0; i<m; i++) {
2127     nnz     = I[i+1]- I[i];
2128     JJ      = J + I[i];
2129     nnz_max = PetscMax(nnz_max,nnz);
2130 #if defined(PETSC_OPT_g)
2131     if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2132 #endif
2133     for (j=0; j<nnz; j++) {
2134       if (*JJ >= cstart) break;
2135       JJ++;
2136     }
2137     d = 0;
2138     for (; j<nnz; j++) {
2139       if (*JJ++ >= cend) break;
2140       d++;
2141     }
2142     d_nnz[i] = d;
2143     o_nnz[i] = nnz - d;
2144   }
2145   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2146   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2147 
2148   if (v) values = (PetscScalar*)v;
2149   else {
2150     ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2151     ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2152   }
2153 
2154   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2155   for (i=0; i<m; i++) {
2156     ii   = i + rstart;
2157     nnz  = I[i+1]- I[i];
2158     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr);
2159   }
2160   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2161   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2162   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2163 
2164   if (!v) {
2165     ierr = PetscFree(values);CHKERRQ(ierr);
2166   }
2167   PetscFunctionReturn(0);
2168 }
2169 
2170 #undef __FUNCT__
2171 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2172 /*@C
2173    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2174    (the default parallel PETSc format).
2175 
2176    Collective on MPI_Comm
2177 
2178    Input Parameters:
2179 +  A - the matrix
2180 .  i - the indices into j for the start of each local row (starts with zero)
2181 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2182 -  v - optional values in the matrix
2183 
2184    Level: developer
2185 
2186 .keywords: matrix, aij, compressed row, sparse, parallel
2187 
2188 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2189 @*/
2190 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2191 {
2192   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
2193 
2194   PetscFunctionBegin;
2195   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2196   if (f) {
2197     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
2198   }
2199   PetscFunctionReturn(0);
2200 }
2201 
2202 EXTERN_C_BEGIN
2203 #undef __FUNCT__
2204 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2205 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2206 {
2207   Mat_MPIBAIJ    *b;
2208   PetscErrorCode ierr;
2209   PetscInt       i;
2210 
2211   PetscFunctionBegin;
2212   B->preallocated = PETSC_TRUE;
2213   ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2214 
2215   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2216   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2217   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2218   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2219   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2220   if (d_nnz) {
2221   for (i=0; i<B->m/bs; i++) {
2222       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2223     }
2224   }
2225   if (o_nnz) {
2226     for (i=0; i<B->m/bs; i++) {
2227       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2228     }
2229   }
2230 
2231   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
2232   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
2233   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
2234   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
2235 
2236   b = (Mat_MPIBAIJ*)B->data;
2237   B->bs  = bs;
2238   b->bs2 = bs*bs;
2239   b->mbs = B->m/bs;
2240   b->nbs = B->n/bs;
2241   b->Mbs = B->M/bs;
2242   b->Nbs = B->N/bs;
2243 
2244   ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
2245   b->rowners[0]    = 0;
2246   for (i=2; i<=b->size; i++) {
2247     b->rowners[i] += b->rowners[i-1];
2248   }
2249   b->rstart    = b->rowners[b->rank];
2250   b->rend      = b->rowners[b->rank+1];
2251 
2252   ierr = MPI_Allgather(&b->nbs,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
2253   b->cowners[0] = 0;
2254   for (i=2; i<=b->size; i++) {
2255     b->cowners[i] += b->cowners[i-1];
2256   }
2257   b->cstart    = b->cowners[b->rank];
2258   b->cend      = b->cowners[b->rank+1];
2259 
2260   for (i=0; i<=b->size; i++) {
2261     b->rowners_bs[i] = b->rowners[i]*bs;
2262   }
2263   b->rstart_bs = b->rstart*bs;
2264   b->rend_bs   = b->rend*bs;
2265   b->cstart_bs = b->cstart*bs;
2266   b->cend_bs   = b->cend*bs;
2267 
2268   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
2269   ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2270   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2271   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2272   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
2273   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2274   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2275   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2276 
2277   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
2278 
2279   PetscFunctionReturn(0);
2280 }
2281 EXTERN_C_END
2282 
2283 EXTERN_C_BEGIN
2284 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2285 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2286 EXTERN_C_END
2287 
2288 /*MC
2289    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2290 
2291    Options Database Keys:
2292 . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2293 
2294   Level: beginner
2295 
2296 .seealso: MatCreateMPIBAIJ
2297 M*/
2298 
2299 EXTERN_C_BEGIN
2300 #undef __FUNCT__
2301 #define __FUNCT__ "MatCreate_MPIBAIJ"
2302 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B)
2303 {
2304   Mat_MPIBAIJ    *b;
2305   PetscErrorCode ierr;
2306   PetscTruth     flg;
2307 
2308   PetscFunctionBegin;
2309   ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr);
2310   B->data = (void*)b;
2311 
2312   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2313   B->mapping    = 0;
2314   B->factor     = 0;
2315   B->assembled  = PETSC_FALSE;
2316 
2317   B->insertmode = NOT_SET_VALUES;
2318   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
2319   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
2320 
2321   /* build local table of row and column ownerships */
2322   ierr          = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
2323   ierr = PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
2324   b->cowners    = b->rowners + b->size + 2;
2325   b->rowners_bs = b->cowners + b->size + 2;
2326 
2327   /* build cache for off array entries formed */
2328   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
2329   b->donotstash  = PETSC_FALSE;
2330   b->colmap      = PETSC_NULL;
2331   b->garray      = PETSC_NULL;
2332   b->roworiented = PETSC_TRUE;
2333 
2334 #if defined(PETSC_USE_MAT_SINGLE)
2335   /* stuff for MatSetValues_XXX in single precision */
2336   b->setvalueslen     = 0;
2337   b->setvaluescopy    = PETSC_NULL;
2338 #endif
2339 
2340   /* stuff used in block assembly */
2341   b->barray       = 0;
2342 
2343   /* stuff used for matrix vector multiply */
2344   b->lvec         = 0;
2345   b->Mvctx        = 0;
2346 
2347   /* stuff for MatGetRow() */
2348   b->rowindices   = 0;
2349   b->rowvalues    = 0;
2350   b->getrowactive = PETSC_FALSE;
2351 
2352   /* hash table stuff */
2353   b->ht           = 0;
2354   b->hd           = 0;
2355   b->ht_size      = 0;
2356   b->ht_flag      = PETSC_FALSE;
2357   b->ht_fact      = 0;
2358   b->ht_total_ct  = 0;
2359   b->ht_insert_ct = 0;
2360 
2361   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2362   if (flg) {
2363     PetscReal fact = 1.39;
2364     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2365     ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2366     if (fact <= 1.0) fact = 1.39;
2367     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2368     ierr = PetscLogInfo((0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact));CHKERRQ(ierr);
2369   }
2370   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2371                                      "MatStoreValues_MPIBAIJ",
2372                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2373   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2374                                      "MatRetrieveValues_MPIBAIJ",
2375                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2376   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2377                                      "MatGetDiagonalBlock_MPIBAIJ",
2378                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2379   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
2380                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
2381                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2382   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
2383 				     "MatMPIBAIJSetPreallocationCSR_MPIAIJ",
2384 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
2385   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
2386                                      "MatDiagonalScaleLocal_MPIBAIJ",
2387                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
2388   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
2389                                      "MatSetHashTableFactor_MPIBAIJ",
2390                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2391   PetscFunctionReturn(0);
2392 }
2393 EXTERN_C_END
2394 
2395 /*MC
2396    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2397 
2398    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2399    and MATMPIBAIJ otherwise.
2400 
2401    Options Database Keys:
2402 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2403 
2404   Level: beginner
2405 
2406 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2407 M*/
2408 
2409 EXTERN_C_BEGIN
2410 #undef __FUNCT__
2411 #define __FUNCT__ "MatCreate_BAIJ"
2412 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A)
2413 {
2414   PetscErrorCode ierr;
2415   PetscMPIInt    size;
2416 
2417   PetscFunctionBegin;
2418   ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr);
2419   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2420   if (size == 1) {
2421     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
2422   } else {
2423     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
2424   }
2425   PetscFunctionReturn(0);
2426 }
2427 EXTERN_C_END
2428 
2429 #undef __FUNCT__
2430 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
2431 /*@C
2432    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
2433    (block compressed row).  For good matrix assembly performance
2434    the user should preallocate the matrix storage by setting the parameters
2435    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2436    performance can be increased by more than a factor of 50.
2437 
2438    Collective on Mat
2439 
2440    Input Parameters:
2441 +  A - the matrix
2442 .  bs   - size of blockk
2443 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2444            submatrix  (same for all local rows)
2445 .  d_nnz - array containing the number of block nonzeros in the various block rows
2446            of the in diagonal portion of the local (possibly different for each block
2447            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2448 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2449            submatrix (same for all local rows).
2450 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2451            off-diagonal portion of the local submatrix (possibly different for
2452            each block row) or PETSC_NULL.
2453 
2454    If the *_nnz parameter is given then the *_nz parameter is ignored
2455 
2456    Options Database Keys:
2457 .   -mat_no_unroll - uses code that does not unroll the loops in the
2458                      block calculations (much slower)
2459 .   -mat_block_size - size of the blocks to use
2460 
2461    Notes:
2462    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2463    than it must be used on all processors that share the object for that argument.
2464 
2465    Storage Information:
2466    For a square global matrix we define each processor's diagonal portion
2467    to be its local rows and the corresponding columns (a square submatrix);
2468    each processor's off-diagonal portion encompasses the remainder of the
2469    local matrix (a rectangular submatrix).
2470 
2471    The user can specify preallocated storage for the diagonal part of
2472    the local submatrix with either d_nz or d_nnz (not both).  Set
2473    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2474    memory allocation.  Likewise, specify preallocated storage for the
2475    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2476 
2477    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2478    the figure below we depict these three local rows and all columns (0-11).
2479 
2480 .vb
2481            0 1 2 3 4 5 6 7 8 9 10 11
2482           -------------------
2483    row 3  |  o o o d d d o o o o o o
2484    row 4  |  o o o d d d o o o o o o
2485    row 5  |  o o o d d d o o o o o o
2486           -------------------
2487 .ve
2488 
2489    Thus, any entries in the d locations are stored in the d (diagonal)
2490    submatrix, and any entries in the o locations are stored in the
2491    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2492    stored simply in the MATSEQBAIJ format for compressed row storage.
2493 
2494    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2495    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2496    In general, for PDE problems in which most nonzeros are near the diagonal,
2497    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2498    or you will get TERRIBLE performance; see the users' manual chapter on
2499    matrices.
2500 
2501    Level: intermediate
2502 
2503 .keywords: matrix, block, aij, compressed row, sparse, parallel
2504 
2505 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
2506 @*/
2507 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2508 {
2509   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2510 
2511   PetscFunctionBegin;
2512   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2513   if (f) {
2514     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2515   }
2516   PetscFunctionReturn(0);
2517 }
2518 
2519 #undef __FUNCT__
2520 #define __FUNCT__ "MatCreateMPIBAIJ"
2521 /*@C
2522    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
2523    (block compressed row).  For good matrix assembly performance
2524    the user should preallocate the matrix storage by setting the parameters
2525    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2526    performance can be increased by more than a factor of 50.
2527 
2528    Collective on MPI_Comm
2529 
2530    Input Parameters:
2531 +  comm - MPI communicator
2532 .  bs   - size of blockk
2533 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2534            This value should be the same as the local size used in creating the
2535            y vector for the matrix-vector product y = Ax.
2536 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2537            This value should be the same as the local size used in creating the
2538            x vector for the matrix-vector product y = Ax.
2539 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2540 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2541 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
2542            submatrix  (same for all local rows)
2543 .  d_nnz - array containing the number of nonzero blocks in the various block rows
2544            of the in diagonal portion of the local (possibly different for each block
2545            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2546 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
2547            submatrix (same for all local rows).
2548 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
2549            off-diagonal portion of the local submatrix (possibly different for
2550            each block row) or PETSC_NULL.
2551 
2552    Output Parameter:
2553 .  A - the matrix
2554 
2555    Options Database Keys:
2556 .   -mat_no_unroll - uses code that does not unroll the loops in the
2557                      block calculations (much slower)
2558 .   -mat_block_size - size of the blocks to use
2559 
2560    Notes:
2561    If the *_nnz parameter is given then the *_nz parameter is ignored
2562 
2563    A nonzero block is any block that as 1 or more nonzeros in it
2564 
2565    The user MUST specify either the local or global matrix dimensions
2566    (possibly both).
2567 
2568    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2569    than it must be used on all processors that share the object for that argument.
2570 
2571    Storage Information:
2572    For a square global matrix we define each processor's diagonal portion
2573    to be its local rows and the corresponding columns (a square submatrix);
2574    each processor's off-diagonal portion encompasses the remainder of the
2575    local matrix (a rectangular submatrix).
2576 
2577    The user can specify preallocated storage for the diagonal part of
2578    the local submatrix with either d_nz or d_nnz (not both).  Set
2579    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2580    memory allocation.  Likewise, specify preallocated storage for the
2581    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2582 
2583    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2584    the figure below we depict these three local rows and all columns (0-11).
2585 
2586 .vb
2587            0 1 2 3 4 5 6 7 8 9 10 11
2588           -------------------
2589    row 3  |  o o o d d d o o o o o o
2590    row 4  |  o o o d d d o o o o o o
2591    row 5  |  o o o d d d o o o o o o
2592           -------------------
2593 .ve
2594 
2595    Thus, any entries in the d locations are stored in the d (diagonal)
2596    submatrix, and any entries in the o locations are stored in the
2597    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2598    stored simply in the MATSEQBAIJ format for compressed row storage.
2599 
2600    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2601    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2602    In general, for PDE problems in which most nonzeros are near the diagonal,
2603    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2604    or you will get TERRIBLE performance; see the users' manual chapter on
2605    matrices.
2606 
2607    Level: intermediate
2608 
2609 .keywords: matrix, block, aij, compressed row, sparse, parallel
2610 
2611 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2612 @*/
2613 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2614 {
2615   PetscErrorCode ierr;
2616   PetscMPIInt    size;
2617 
2618   PetscFunctionBegin;
2619   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2620   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2621   if (size > 1) {
2622     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
2623     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2624   } else {
2625     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2626     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2627   }
2628   PetscFunctionReturn(0);
2629 }
2630 
2631 #undef __FUNCT__
2632 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
2633 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2634 {
2635   Mat            mat;
2636   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2637   PetscErrorCode ierr;
2638   PetscInt       len=0;
2639 
2640   PetscFunctionBegin;
2641   *newmat       = 0;
2642   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2643   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2644   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2645 
2646   mat->factor       = matin->factor;
2647   mat->preallocated = PETSC_TRUE;
2648   mat->assembled    = PETSC_TRUE;
2649   mat->insertmode   = NOT_SET_VALUES;
2650 
2651   a      = (Mat_MPIBAIJ*)mat->data;
2652   mat->bs  = matin->bs;
2653   a->bs2   = oldmat->bs2;
2654   a->mbs   = oldmat->mbs;
2655   a->nbs   = oldmat->nbs;
2656   a->Mbs   = oldmat->Mbs;
2657   a->Nbs   = oldmat->Nbs;
2658 
2659   a->rstart       = oldmat->rstart;
2660   a->rend         = oldmat->rend;
2661   a->cstart       = oldmat->cstart;
2662   a->cend         = oldmat->cend;
2663   a->size         = oldmat->size;
2664   a->rank         = oldmat->rank;
2665   a->donotstash   = oldmat->donotstash;
2666   a->roworiented  = oldmat->roworiented;
2667   a->rowindices   = 0;
2668   a->rowvalues    = 0;
2669   a->getrowactive = PETSC_FALSE;
2670   a->barray       = 0;
2671   a->rstart_bs    = oldmat->rstart_bs;
2672   a->rend_bs      = oldmat->rend_bs;
2673   a->cstart_bs    = oldmat->cstart_bs;
2674   a->cend_bs      = oldmat->cend_bs;
2675 
2676   /* hash table stuff */
2677   a->ht           = 0;
2678   a->hd           = 0;
2679   a->ht_size      = 0;
2680   a->ht_flag      = oldmat->ht_flag;
2681   a->ht_fact      = oldmat->ht_fact;
2682   a->ht_total_ct  = 0;
2683   a->ht_insert_ct = 0;
2684 
2685   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2686   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2687   ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr);
2688   if (oldmat->colmap) {
2689 #if defined (PETSC_USE_CTABLE)
2690   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2691 #else
2692   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2693   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2694   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2695 #endif
2696   } else a->colmap = 0;
2697 
2698   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2699     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2700     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2701     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2702   } else a->garray = 0;
2703 
2704   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2705   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2706   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2707   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2708 
2709   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2710   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2711   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2712   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2713   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2714   *newmat = mat;
2715 
2716   PetscFunctionReturn(0);
2717 }
2718 
2719 #include "petscsys.h"
2720 
2721 #undef __FUNCT__
2722 #define __FUNCT__ "MatLoad_MPIBAIJ"
2723 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
2724 {
2725   Mat            A;
2726   PetscErrorCode ierr;
2727   int            fd;
2728   PetscInt       i,nz,j,rstart,rend;
2729   PetscScalar    *vals,*buf;
2730   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2731   MPI_Status     status;
2732   PetscMPIInt    rank,size,maxnz;
2733   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
2734   PetscInt       *locrowlens,*procsnz = 0,*browners;
2735   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
2736   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
2737   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2738   PetscInt       dcount,kmax,k,nzcount,tmp,mend;
2739 
2740   PetscFunctionBegin;
2741   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2742 
2743   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2744   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2745   if (!rank) {
2746     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2747     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2748     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2749   }
2750 
2751   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2752   M = header[1]; N = header[2];
2753 
2754   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2755 
2756   /*
2757      This code adds extra rows to make sure the number of rows is
2758      divisible by the blocksize
2759   */
2760   Mbs        = M/bs;
2761   extra_rows = bs - M + bs*Mbs;
2762   if (extra_rows == bs) extra_rows = 0;
2763   else                  Mbs++;
2764   if (extra_rows && !rank) {
2765     ierr = PetscLogInfo((0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr);
2766   }
2767 
2768   /* determine ownership of all rows */
2769   mbs        = Mbs/size + ((Mbs % size) > rank);
2770   m          = mbs*bs;
2771   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
2772   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2773 
2774   /* process 0 needs enough room for process with most rows */
2775   if (!rank) {
2776     mmax = rowners[1];
2777     for (i=2; i<size; i++) {
2778       mmax = PetscMax(mmax,rowners[i]);
2779     }
2780     mmax*=bs;
2781   } else mmax = m;
2782 
2783   rowners[0] = 0;
2784   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
2785   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2786   rstart = rowners[rank];
2787   rend   = rowners[rank+1];
2788 
2789   /* distribute row lengths to all processors */
2790   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
2791   if (!rank) {
2792     mend = m;
2793     if (size == 1) mend = mend - extra_rows;
2794     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
2795     for (j=mend; j<m; j++) locrowlens[j] = 1;
2796     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2797     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2798     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2799     for (j=0; j<m; j++) {
2800       procsnz[0] += locrowlens[j];
2801     }
2802     for (i=1; i<size; i++) {
2803       mend = browners[i+1] - browners[i];
2804       if (i == size-1) mend = mend - extra_rows;
2805       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
2806       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
2807       /* calculate the number of nonzeros on each processor */
2808       for (j=0; j<browners[i+1]-browners[i]; j++) {
2809         procsnz[i] += rowlengths[j];
2810       }
2811       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2812     }
2813     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2814   } else {
2815     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2816   }
2817 
2818   if (!rank) {
2819     /* determine max buffer needed and allocate it */
2820     maxnz = procsnz[0];
2821     for (i=1; i<size; i++) {
2822       maxnz = PetscMax(maxnz,procsnz[i]);
2823     }
2824     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2825 
2826     /* read in my part of the matrix column indices  */
2827     nz     = procsnz[0];
2828     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2829     mycols = ibuf;
2830     if (size == 1)  nz -= extra_rows;
2831     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2832     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2833 
2834     /* read in every ones (except the last) and ship off */
2835     for (i=1; i<size-1; i++) {
2836       nz   = procsnz[i];
2837       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2838       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2839     }
2840     /* read in the stuff for the last proc */
2841     if (size != 1) {
2842       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2843       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2844       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2845       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2846     }
2847     ierr = PetscFree(cols);CHKERRQ(ierr);
2848   } else {
2849     /* determine buffer space needed for message */
2850     nz = 0;
2851     for (i=0; i<m; i++) {
2852       nz += locrowlens[i];
2853     }
2854     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2855     mycols = ibuf;
2856     /* receive message of column indices*/
2857     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2858     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2859     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2860   }
2861 
2862   /* loop over local rows, determining number of off diagonal entries */
2863   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2864   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2865   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2866   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2867   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2868   rowcount = 0; nzcount = 0;
2869   for (i=0; i<mbs; i++) {
2870     dcount  = 0;
2871     odcount = 0;
2872     for (j=0; j<bs; j++) {
2873       kmax = locrowlens[rowcount];
2874       for (k=0; k<kmax; k++) {
2875         tmp = mycols[nzcount++]/bs;
2876         if (!mask[tmp]) {
2877           mask[tmp] = 1;
2878           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2879           else masked1[dcount++] = tmp;
2880         }
2881       }
2882       rowcount++;
2883     }
2884 
2885     dlens[i]  = dcount;
2886     odlens[i] = odcount;
2887 
2888     /* zero out the mask elements we set */
2889     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2890     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2891   }
2892 
2893   /* create our matrix */
2894   ierr = MatCreate(comm,m,m,M+extra_rows,N+extra_rows,&A);CHKERRQ(ierr);
2895   ierr = MatSetType(A,type);CHKERRQ(ierr)
2896   ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2897 
2898   /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */
2899   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2900 
2901   if (!rank) {
2902     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2903     /* read in my part of the matrix numerical values  */
2904     nz = procsnz[0];
2905     vals = buf;
2906     mycols = ibuf;
2907     if (size == 1)  nz -= extra_rows;
2908     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2909     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2910 
2911     /* insert into matrix */
2912     jj      = rstart*bs;
2913     for (i=0; i<m; i++) {
2914       ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2915       mycols += locrowlens[i];
2916       vals   += locrowlens[i];
2917       jj++;
2918     }
2919     /* read in other processors (except the last one) and ship out */
2920     for (i=1; i<size-1; i++) {
2921       nz   = procsnz[i];
2922       vals = buf;
2923       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2924       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2925     }
2926     /* the last proc */
2927     if (size != 1){
2928       nz   = procsnz[i] - extra_rows;
2929       vals = buf;
2930       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2931       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2932       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2933     }
2934     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2935   } else {
2936     /* receive numeric values */
2937     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2938 
2939     /* receive message of values*/
2940     vals   = buf;
2941     mycols = ibuf;
2942     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2943     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2944     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2945 
2946     /* insert into matrix */
2947     jj      = rstart*bs;
2948     for (i=0; i<m; i++) {
2949       ierr    = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2950       mycols += locrowlens[i];
2951       vals   += locrowlens[i];
2952       jj++;
2953     }
2954   }
2955   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2956   ierr = PetscFree(buf);CHKERRQ(ierr);
2957   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2958   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2959   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2960   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2961   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2962   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2963 
2964   *newmat = A;
2965   PetscFunctionReturn(0);
2966 }
2967 
2968 #undef __FUNCT__
2969 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
2970 /*@
2971    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2972 
2973    Input Parameters:
2974 .  mat  - the matrix
2975 .  fact - factor
2976 
2977    Collective on Mat
2978 
2979    Level: advanced
2980 
2981   Notes:
2982    This can also be set by the command line option: -mat_use_hash_table fact
2983 
2984 .keywords: matrix, hashtable, factor, HT
2985 
2986 .seealso: MatSetOption()
2987 @*/
2988 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2989 {
2990   PetscErrorCode ierr,(*f)(Mat,PetscReal);
2991 
2992   PetscFunctionBegin;
2993   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
2994   if (f) {
2995     ierr = (*f)(mat,fact);CHKERRQ(ierr);
2996   }
2997   PetscFunctionReturn(0);
2998 }
2999 
3000 EXTERN_C_BEGIN
3001 #undef __FUNCT__
3002 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3003 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3004 {
3005   Mat_MPIBAIJ *baij;
3006 
3007   PetscFunctionBegin;
3008   baij = (Mat_MPIBAIJ*)mat->data;
3009   baij->ht_fact = fact;
3010   PetscFunctionReturn(0);
3011 }
3012 EXTERN_C_END
3013 
3014 #undef __FUNCT__
3015 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3016 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3017 {
3018   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3019   PetscFunctionBegin;
3020   *Ad     = a->A;
3021   *Ao     = a->B;
3022   *colmap = a->garray;
3023   PetscFunctionReturn(0);
3024 }
3025