xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 6ab11f9b6ca3a3a4762ba278e5cdc46bee711a5b)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4 
5 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
7 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
8 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
9 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
10 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
11 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
12 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
14 EXTERN PetscErrorCode MatPrintHelp_SeqBAIJ(Mat);
15 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,const PetscScalar*);
16 
17 /*  UGLY, ugly, ugly
18    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
19    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
20    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
21    converts the entries into single precision and then calls ..._MatScalar() to put them
22    into the single precision data structures.
23 */
24 #if defined(PETSC_USE_MAT_SINGLE)
25 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
26 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
27 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
28 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
29 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
30 #else
31 #define MatSetValuesBlocked_SeqBAIJ_MatScalar      MatSetValuesBlocked_SeqBAIJ
32 #define MatSetValues_MPIBAIJ_MatScalar             MatSetValues_MPIBAIJ
33 #define MatSetValuesBlocked_MPIBAIJ_MatScalar      MatSetValuesBlocked_MPIBAIJ
34 #define MatSetValues_MPIBAIJ_HT_MatScalar          MatSetValues_MPIBAIJ_HT
35 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar   MatSetValuesBlocked_MPIBAIJ_HT
36 #endif
37 
38 #undef __FUNCT__
39 #define __FUNCT__ "MatGetRowMax_MPIBAIJ"
40 PetscErrorCode MatGetRowMax_MPIBAIJ(Mat A,Vec v)
41 {
42   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
43   PetscErrorCode ierr;
44   PetscInt       i;
45   PetscScalar    *va,*vb;
46   Vec            vtmp;
47 
48   PetscFunctionBegin;
49 
50   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
51   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
52 
53   ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr);
54   ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr);
55   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
56 
57   for (i=0; i<A->m; i++){
58     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i];
59   }
60 
61   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
62   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
63   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
64 
65   PetscFunctionReturn(0);
66 }
67 
68 EXTERN_C_BEGIN
69 #undef __FUNCT__
70 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
71 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat)
72 {
73   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
74   PetscErrorCode ierr;
75 
76   PetscFunctionBegin;
77   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
78   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
79   PetscFunctionReturn(0);
80 }
81 EXTERN_C_END
82 
83 EXTERN_C_BEGIN
84 #undef __FUNCT__
85 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
86 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat)
87 {
88   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
89   PetscErrorCode ierr;
90 
91   PetscFunctionBegin;
92   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
93   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
94   PetscFunctionReturn(0);
95 }
96 EXTERN_C_END
97 
98 /*
99      Local utility routine that creates a mapping from the global column
100    number to the local number in the off-diagonal part of the local
101    storage of the matrix.  This is done in a non scable way since the
102    length of colmap equals the global matrix length.
103 */
104 #undef __FUNCT__
105 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
106 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
107 {
108   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
109   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
110   PetscErrorCode ierr;
111   PetscInt       nbs = B->nbs,i,bs=mat->bs;
112 
113   PetscFunctionBegin;
114 #if defined (PETSC_USE_CTABLE)
115   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
116   for (i=0; i<nbs; i++){
117     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
118   }
119 #else
120   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
121   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
122   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
123   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
124 #endif
125   PetscFunctionReturn(0);
126 }
127 
128 #define CHUNKSIZE  10
129 
130 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
131 { \
132  \
133     brow = row/bs;  \
134     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
135     rmax = aimax[brow]; nrow = ailen[brow]; \
136       bcol = col/bs; \
137       ridx = row % bs; cidx = col % bs; \
138       low = 0; high = nrow; \
139       while (high-low > 3) { \
140         t = (low+high)/2; \
141         if (rp[t] > bcol) high = t; \
142         else              low  = t; \
143       } \
144       for (_i=low; _i<high; _i++) { \
145         if (rp[_i] > bcol) break; \
146         if (rp[_i] == bcol) { \
147           bap  = ap +  bs2*_i + bs*cidx + ridx; \
148           if (addv == ADD_VALUES) *bap += value;  \
149           else                    *bap  = value;  \
150           goto a_noinsert; \
151         } \
152       } \
153       if (a->nonew == 1) goto a_noinsert; \
154       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
155       if (nrow >= rmax) { \
156         /* there is no extra room in row, therefore enlarge */ \
157         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
158         MatScalar *new_a; \
159  \
160         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
161  \
162         /* malloc new storage space */ \
163         ierr = PetscMalloc3(bs2*new_nz,PetscScalar,&new_a,new_nz,PetscInt,&new_j,a->mbs+1,PetscInt,&new_i);CHKERRQ(ierr);\
164  \
165         /* copy over old data into new slots */ \
166         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
167         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
168         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
169         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
170         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
171         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
172         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
173         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
174         /* free up old matrix storage */ \
175         ierr = MatSeqXAIJFreeAIJ(a->singlemalloc,a->a,a->j,a->i);CHKERRQ(ierr);\
176         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
177         a->singlemalloc = PETSC_TRUE; \
178  \
179         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
180         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
181         ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); \
182         a->maxnz += bs2*CHUNKSIZE; \
183         a->reallocs++; \
184         a->nz++; \
185       } \
186       N = nrow++ - 1;  \
187       /* shift up all the later entries in this row */ \
188       for (ii=N; ii>=_i; ii--) { \
189         rp[ii+1] = rp[ii]; \
190         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
191       } \
192       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
193       rp[_i]                      = bcol;  \
194       ap[bs2*_i + bs*cidx + ridx] = value;  \
195       a_noinsert:; \
196     ailen[brow] = nrow; \
197 }
198 
199 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
200 { \
201     brow = row/bs;  \
202     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
203     rmax = bimax[brow]; nrow = bilen[brow]; \
204       bcol = col/bs; \
205       ridx = row % bs; cidx = col % bs; \
206       low = 0; high = nrow; \
207       while (high-low > 3) { \
208         t = (low+high)/2; \
209         if (rp[t] > bcol) high = t; \
210         else              low  = t; \
211       } \
212       for (_i=low; _i<high; _i++) { \
213         if (rp[_i] > bcol) break; \
214         if (rp[_i] == bcol) { \
215           bap  = ap +  bs2*_i + bs*cidx + ridx; \
216           if (addv == ADD_VALUES) *bap += value;  \
217           else                    *bap  = value;  \
218           goto b_noinsert; \
219         } \
220       } \
221       if (b->nonew == 1) goto b_noinsert; \
222       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
223       if (nrow >= rmax) { \
224         /* there is no extra room in row, therefore enlarge */ \
225         PetscInt       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
226         MatScalar *new_a; \
227  \
228         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
229  \
230         /* malloc new storage space */ \
231         ierr = PetscMalloc3(bs2*new_nz,PetscScalar,&new_a,new_nz,PetscInt,&new_j,b->mbs+1,PetscInt,&new_i);CHKERRQ(ierr);\
232  \
233         /* copy over old data into new slots */ \
234         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
235         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
236         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
237         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
238         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
239         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
240         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
241         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
242                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
243         /* free up old matrix storage */ \
244         ierr = MatSeqXAIJFreeAIJ(b->singlemalloc,b->a,b->j,b->i);CHKERRQ(ierr);\
245         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
246         b->singlemalloc = PETSC_TRUE; \
247  \
248         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
249         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
250         ierr = PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); \
251         b->maxnz += bs2*CHUNKSIZE; \
252         b->reallocs++; \
253         b->nz++; \
254       } \
255       N = nrow++ - 1;  \
256       /* shift up all the later entries in this row */ \
257       for (ii=N; ii>=_i; ii--) { \
258         rp[ii+1] = rp[ii]; \
259         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
260       } \
261       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
262       rp[_i]                      = bcol;  \
263       ap[bs2*_i + bs*cidx + ridx] = value;  \
264       b_noinsert:; \
265     bilen[brow] = nrow; \
266 }
267 
268 #if defined(PETSC_USE_MAT_SINGLE)
269 #undef __FUNCT__
270 #define __FUNCT__ "MatSetValues_MPIBAIJ"
271 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
272 {
273   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
274   PetscErrorCode ierr;
275   PetscInt       i,N = m*n;
276   MatScalar      *vsingle;
277 
278   PetscFunctionBegin;
279   if (N > b->setvalueslen) {
280     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
281     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
282     b->setvalueslen  = N;
283   }
284   vsingle = b->setvaluescopy;
285 
286   for (i=0; i<N; i++) {
287     vsingle[i] = v[i];
288   }
289   ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
290   PetscFunctionReturn(0);
291 }
292 
293 #undef __FUNCT__
294 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
295 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
296 {
297   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
298   PetscErrorCode ierr;
299   PetscInt       i,N = m*n*b->bs2;
300   MatScalar      *vsingle;
301 
302   PetscFunctionBegin;
303   if (N > b->setvalueslen) {
304     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
305     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
306     b->setvalueslen  = N;
307   }
308   vsingle = b->setvaluescopy;
309   for (i=0; i<N; i++) {
310     vsingle[i] = v[i];
311   }
312   ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
313   PetscFunctionReturn(0);
314 }
315 
316 #undef __FUNCT__
317 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
318 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
319 {
320   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
321   PetscErrorCode ierr;
322   PetscInt       i,N = m*n;
323   MatScalar      *vsingle;
324 
325   PetscFunctionBegin;
326   if (N > b->setvalueslen) {
327     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
328     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
329     b->setvalueslen  = N;
330   }
331   vsingle = b->setvaluescopy;
332   for (i=0; i<N; i++) {
333     vsingle[i] = v[i];
334   }
335   ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
336   PetscFunctionReturn(0);
337 }
338 
339 #undef __FUNCT__
340 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
341 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
342 {
343   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
344   PetscErrorCode ierr;
345   PetscInt       i,N = m*n*b->bs2;
346   MatScalar      *vsingle;
347 
348   PetscFunctionBegin;
349   if (N > b->setvalueslen) {
350     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
351     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
352     b->setvalueslen  = N;
353   }
354   vsingle = b->setvaluescopy;
355   for (i=0; i<N; i++) {
356     vsingle[i] = v[i];
357   }
358   ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
359   PetscFunctionReturn(0);
360 }
361 #endif
362 
363 #undef __FUNCT__
364 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
365 PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
366 {
367   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
368   MatScalar      value;
369   PetscTruth     roworiented = baij->roworiented;
370   PetscErrorCode ierr;
371   PetscInt       i,j,row,col;
372   PetscInt       rstart_orig=baij->rstart_bs;
373   PetscInt       rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
374   PetscInt       cend_orig=baij->cend_bs,bs=mat->bs;
375 
376   /* Some Variables required in the macro */
377   Mat            A = baij->A;
378   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
379   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
380   MatScalar      *aa=a->a;
381 
382   Mat            B = baij->B;
383   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
384   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
385   MatScalar      *ba=b->a;
386 
387   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
388   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
389   MatScalar      *ap,*bap;
390 
391   PetscFunctionBegin;
392   for (i=0; i<m; i++) {
393     if (im[i] < 0) continue;
394 #if defined(PETSC_USE_DEBUG)
395     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
396 #endif
397     if (im[i] >= rstart_orig && im[i] < rend_orig) {
398       row = im[i] - rstart_orig;
399       for (j=0; j<n; j++) {
400         if (in[j] >= cstart_orig && in[j] < cend_orig){
401           col = in[j] - cstart_orig;
402           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
403           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
404           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
405         } else if (in[j] < 0) continue;
406 #if defined(PETSC_USE_DEBUG)
407         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->N-1);}
408 #endif
409         else {
410           if (mat->was_assembled) {
411             if (!baij->colmap) {
412               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
413             }
414 #if defined (PETSC_USE_CTABLE)
415             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
416             col  = col - 1;
417 #else
418             col = baij->colmap[in[j]/bs] - 1;
419 #endif
420             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
421               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
422               col =  in[j];
423               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
424               B = baij->B;
425               b = (Mat_SeqBAIJ*)(B)->data;
426               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
427               ba=b->a;
428             } else col += in[j]%bs;
429           } else col = in[j];
430           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
431           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
432           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
433         }
434       }
435     } else {
436       if (!baij->donotstash) {
437         if (roworiented) {
438           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
439         } else {
440           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
441         }
442       }
443     }
444   }
445   PetscFunctionReturn(0);
446 }
447 
448 #undef __FUNCT__
449 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar"
450 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
451 {
452   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
453   const MatScalar *value;
454   MatScalar       *barray=baij->barray;
455   PetscTruth      roworiented = baij->roworiented;
456   PetscErrorCode  ierr;
457   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstart;
458   PetscInt        rend=baij->rend,cstart=baij->cstart,stepval;
459   PetscInt        cend=baij->cend,bs=mat->bs,bs2=baij->bs2;
460 
461   PetscFunctionBegin;
462   if(!barray) {
463     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
464     baij->barray = barray;
465   }
466 
467   if (roworiented) {
468     stepval = (n-1)*bs;
469   } else {
470     stepval = (m-1)*bs;
471   }
472   for (i=0; i<m; i++) {
473     if (im[i] < 0) continue;
474 #if defined(PETSC_USE_DEBUG)
475     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
476 #endif
477     if (im[i] >= rstart && im[i] < rend) {
478       row = im[i] - rstart;
479       for (j=0; j<n; j++) {
480         /* If NumCol = 1 then a copy is not required */
481         if ((roworiented) && (n == 1)) {
482           barray = (MatScalar*)v + i*bs2;
483         } else if((!roworiented) && (m == 1)) {
484           barray = (MatScalar*)v + j*bs2;
485         } else { /* Here a copy is required */
486           if (roworiented) {
487             value = v + i*(stepval+bs)*bs + j*bs;
488           } else {
489             value = v + j*(stepval+bs)*bs + i*bs;
490           }
491           for (ii=0; ii<bs; ii++,value+=stepval) {
492             for (jj=0; jj<bs; jj++) {
493               *barray++  = *value++;
494             }
495           }
496           barray -=bs2;
497         }
498 
499         if (in[j] >= cstart && in[j] < cend){
500           col  = in[j] - cstart;
501           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
502         }
503         else if (in[j] < 0) continue;
504 #if defined(PETSC_USE_DEBUG)
505         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
506 #endif
507         else {
508           if (mat->was_assembled) {
509             if (!baij->colmap) {
510               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
511             }
512 
513 #if defined(PETSC_USE_DEBUG)
514 #if defined (PETSC_USE_CTABLE)
515             { PetscInt data;
516               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
517               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
518             }
519 #else
520             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
521 #endif
522 #endif
523 #if defined (PETSC_USE_CTABLE)
524 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
525             col  = (col - 1)/bs;
526 #else
527             col = (baij->colmap[in[j]] - 1)/bs;
528 #endif
529             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
530               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
531               col =  in[j];
532             }
533           }
534           else col = in[j];
535           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
536         }
537       }
538     } else {
539       if (!baij->donotstash) {
540         if (roworiented) {
541           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
542         } else {
543           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
544         }
545       }
546     }
547   }
548   PetscFunctionReturn(0);
549 }
550 
551 #define HASH_KEY 0.6180339887
552 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
553 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
554 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
555 #undef __FUNCT__
556 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar"
557 PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
558 {
559   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
560   PetscTruth     roworiented = baij->roworiented;
561   PetscErrorCode ierr;
562   PetscInt       i,j,row,col;
563   PetscInt       rstart_orig=baij->rstart_bs;
564   PetscInt       rend_orig=baij->rend_bs,Nbs=baij->Nbs;
565   PetscInt       h1,key,size=baij->ht_size,bs=mat->bs,*HT=baij->ht,idx;
566   PetscReal      tmp;
567   MatScalar      **HD = baij->hd,value;
568 #if defined(PETSC_USE_DEBUG)
569   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
570 #endif
571 
572   PetscFunctionBegin;
573 
574   for (i=0; i<m; i++) {
575 #if defined(PETSC_USE_DEBUG)
576     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
577     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
578 #endif
579       row = im[i];
580     if (row >= rstart_orig && row < rend_orig) {
581       for (j=0; j<n; j++) {
582         col = in[j];
583         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
584         /* Look up PetscInto the Hash Table */
585         key = (row/bs)*Nbs+(col/bs)+1;
586         h1  = HASH(size,key,tmp);
587 
588 
589         idx = h1;
590 #if defined(PETSC_USE_DEBUG)
591         insert_ct++;
592         total_ct++;
593         if (HT[idx] != key) {
594           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
595           if (idx == size) {
596             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
597             if (idx == h1) {
598               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
599             }
600           }
601         }
602 #else
603         if (HT[idx] != key) {
604           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
605           if (idx == size) {
606             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
607             if (idx == h1) {
608               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
609             }
610           }
611         }
612 #endif
613         /* A HASH table entry is found, so insert the values at the correct address */
614         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
615         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
616       }
617     } else {
618       if (!baij->donotstash) {
619         if (roworiented) {
620           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
621         } else {
622           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
623         }
624       }
625     }
626   }
627 #if defined(PETSC_USE_DEBUG)
628   baij->ht_total_ct = total_ct;
629   baij->ht_insert_ct = insert_ct;
630 #endif
631   PetscFunctionReturn(0);
632 }
633 
634 #undef __FUNCT__
635 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar"
636 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
637 {
638   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
639   PetscTruth      roworiented = baij->roworiented;
640   PetscErrorCode  ierr;
641   PetscInt        i,j,ii,jj,row,col;
642   PetscInt        rstart=baij->rstart ;
643   PetscInt        rend=baij->rend,stepval,bs=mat->bs,bs2=baij->bs2;
644   PetscInt        h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
645   PetscReal       tmp;
646   MatScalar       **HD = baij->hd,*baij_a;
647   const MatScalar *v_t,*value;
648 #if defined(PETSC_USE_DEBUG)
649   PetscInt        total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
650 #endif
651 
652   PetscFunctionBegin;
653 
654   if (roworiented) {
655     stepval = (n-1)*bs;
656   } else {
657     stepval = (m-1)*bs;
658   }
659   for (i=0; i<m; i++) {
660 #if defined(PETSC_USE_DEBUG)
661     if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
662     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
663 #endif
664     row   = im[i];
665     v_t   = v + i*bs2;
666     if (row >= rstart && row < rend) {
667       for (j=0; j<n; j++) {
668         col = in[j];
669 
670         /* Look up into the Hash Table */
671         key = row*Nbs+col+1;
672         h1  = HASH(size,key,tmp);
673 
674         idx = h1;
675 #if defined(PETSC_USE_DEBUG)
676         total_ct++;
677         insert_ct++;
678        if (HT[idx] != key) {
679           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
680           if (idx == size) {
681             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
682             if (idx == h1) {
683               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
684             }
685           }
686         }
687 #else
688         if (HT[idx] != key) {
689           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
690           if (idx == size) {
691             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
692             if (idx == h1) {
693               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
694             }
695           }
696         }
697 #endif
698         baij_a = HD[idx];
699         if (roworiented) {
700           /*value = v + i*(stepval+bs)*bs + j*bs;*/
701           /* value = v + (i*(stepval+bs)+j)*bs; */
702           value = v_t;
703           v_t  += bs;
704           if (addv == ADD_VALUES) {
705             for (ii=0; ii<bs; ii++,value+=stepval) {
706               for (jj=ii; jj<bs2; jj+=bs) {
707                 baij_a[jj]  += *value++;
708               }
709             }
710           } else {
711             for (ii=0; ii<bs; ii++,value+=stepval) {
712               for (jj=ii; jj<bs2; jj+=bs) {
713                 baij_a[jj]  = *value++;
714               }
715             }
716           }
717         } else {
718           value = v + j*(stepval+bs)*bs + i*bs;
719           if (addv == ADD_VALUES) {
720             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
721               for (jj=0; jj<bs; jj++) {
722                 baij_a[jj]  += *value++;
723               }
724             }
725           } else {
726             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
727               for (jj=0; jj<bs; jj++) {
728                 baij_a[jj]  = *value++;
729               }
730             }
731           }
732         }
733       }
734     } else {
735       if (!baij->donotstash) {
736         if (roworiented) {
737           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
738         } else {
739           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
740         }
741       }
742     }
743   }
744 #if defined(PETSC_USE_DEBUG)
745   baij->ht_total_ct = total_ct;
746   baij->ht_insert_ct = insert_ct;
747 #endif
748   PetscFunctionReturn(0);
749 }
750 
751 #undef __FUNCT__
752 #define __FUNCT__ "MatGetValues_MPIBAIJ"
753 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
754 {
755   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
756   PetscErrorCode ierr;
757   PetscInt       bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
758   PetscInt       bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
759 
760   PetscFunctionBegin;
761   for (i=0; i<m; i++) {
762     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
763     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
764     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
765       row = idxm[i] - bsrstart;
766       for (j=0; j<n; j++) {
767         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
768         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
769         if (idxn[j] >= bscstart && idxn[j] < bscend){
770           col = idxn[j] - bscstart;
771           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
772         } else {
773           if (!baij->colmap) {
774             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
775           }
776 #if defined (PETSC_USE_CTABLE)
777           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
778           data --;
779 #else
780           data = baij->colmap[idxn[j]/bs]-1;
781 #endif
782           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
783           else {
784             col  = data + idxn[j]%bs;
785             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
786           }
787         }
788       }
789     } else {
790       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
791     }
792   }
793  PetscFunctionReturn(0);
794 }
795 
796 #undef __FUNCT__
797 #define __FUNCT__ "MatNorm_MPIBAIJ"
798 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
799 {
800   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
801   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
802   PetscErrorCode ierr;
803   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->bs,nz,row,col;
804   PetscReal      sum = 0.0;
805   MatScalar      *v;
806 
807   PetscFunctionBegin;
808   if (baij->size == 1) {
809     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
810   } else {
811     if (type == NORM_FROBENIUS) {
812       v = amat->a;
813       nz = amat->nz*bs2;
814       for (i=0; i<nz; i++) {
815 #if defined(PETSC_USE_COMPLEX)
816         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
817 #else
818         sum += (*v)*(*v); v++;
819 #endif
820       }
821       v = bmat->a;
822       nz = bmat->nz*bs2;
823       for (i=0; i<nz; i++) {
824 #if defined(PETSC_USE_COMPLEX)
825         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
826 #else
827         sum += (*v)*(*v); v++;
828 #endif
829       }
830       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
831       *nrm = sqrt(*nrm);
832     } else if (type == NORM_1) { /* max column sum */
833       PetscReal *tmp,*tmp2;
834       PetscInt  *jj,*garray=baij->garray,cstart=baij->cstart;
835       ierr = PetscMalloc((2*mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
836       tmp2 = tmp + mat->N;
837       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
838       v = amat->a; jj = amat->j;
839       for (i=0; i<amat->nz; i++) {
840         for (j=0; j<bs; j++){
841           col = bs*(cstart + *jj) + j; /* column index */
842           for (row=0; row<bs; row++){
843             tmp[col] += PetscAbsScalar(*v);  v++;
844           }
845         }
846         jj++;
847       }
848       v = bmat->a; jj = bmat->j;
849       for (i=0; i<bmat->nz; i++) {
850         for (j=0; j<bs; j++){
851           col = bs*garray[*jj] + j;
852           for (row=0; row<bs; row++){
853             tmp[col] += PetscAbsScalar(*v); v++;
854           }
855         }
856         jj++;
857       }
858       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
859       *nrm = 0.0;
860       for (j=0; j<mat->N; j++) {
861         if (tmp2[j] > *nrm) *nrm = tmp2[j];
862       }
863       ierr = PetscFree(tmp);CHKERRQ(ierr);
864     } else if (type == NORM_INFINITY) { /* max row sum */
865       PetscReal *sums;
866       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr)
867       sum = 0.0;
868       for (j=0; j<amat->mbs; j++) {
869         for (row=0; row<bs; row++) sums[row] = 0.0;
870         v = amat->a + bs2*amat->i[j];
871         nz = amat->i[j+1]-amat->i[j];
872         for (i=0; i<nz; i++) {
873           for (col=0; col<bs; col++){
874             for (row=0; row<bs; row++){
875               sums[row] += PetscAbsScalar(*v); v++;
876             }
877           }
878         }
879         v = bmat->a + bs2*bmat->i[j];
880         nz = bmat->i[j+1]-bmat->i[j];
881         for (i=0; i<nz; i++) {
882           for (col=0; col<bs; col++){
883             for (row=0; row<bs; row++){
884               sums[row] += PetscAbsScalar(*v); v++;
885             }
886           }
887         }
888         for (row=0; row<bs; row++){
889           if (sums[row] > sum) sum = sums[row];
890         }
891       }
892       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
893       ierr = PetscFree(sums);CHKERRQ(ierr);
894     } else {
895       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
896     }
897   }
898   PetscFunctionReturn(0);
899 }
900 
901 /*
902   Creates the hash table, and sets the table
903   This table is created only once.
904   If new entried need to be added to the matrix
905   then the hash table has to be destroyed and
906   recreated.
907 */
908 #undef __FUNCT__
909 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
910 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
911 {
912   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
913   Mat            A = baij->A,B=baij->B;
914   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
915   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
916   PetscErrorCode ierr;
917   PetscInt       size,bs2=baij->bs2,rstart=baij->rstart;
918   PetscInt       cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
919   PetscInt       *HT,key;
920   MatScalar      **HD;
921   PetscReal      tmp;
922 #if defined(PETSC_USE_DEBUG)
923   PetscInt       ct=0,max=0;
924 #endif
925 
926   PetscFunctionBegin;
927   baij->ht_size=(PetscInt)(factor*nz);
928   size = baij->ht_size;
929 
930   if (baij->ht) {
931     PetscFunctionReturn(0);
932   }
933 
934   /* Allocate Memory for Hash Table */
935   ierr     = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr);
936   baij->ht = (PetscInt*)(baij->hd + size);
937   HD       = baij->hd;
938   HT       = baij->ht;
939 
940 
941   ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr);
942 
943 
944   /* Loop Over A */
945   for (i=0; i<a->mbs; i++) {
946     for (j=ai[i]; j<ai[i+1]; j++) {
947       row = i+rstart;
948       col = aj[j]+cstart;
949 
950       key = row*Nbs + col + 1;
951       h1  = HASH(size,key,tmp);
952       for (k=0; k<size; k++){
953         if (!HT[(h1+k)%size]) {
954           HT[(h1+k)%size] = key;
955           HD[(h1+k)%size] = a->a + j*bs2;
956           break;
957 #if defined(PETSC_USE_DEBUG)
958         } else {
959           ct++;
960 #endif
961         }
962       }
963 #if defined(PETSC_USE_DEBUG)
964       if (k> max) max = k;
965 #endif
966     }
967   }
968   /* Loop Over B */
969   for (i=0; i<b->mbs; i++) {
970     for (j=bi[i]; j<bi[i+1]; j++) {
971       row = i+rstart;
972       col = garray[bj[j]];
973       key = row*Nbs + col + 1;
974       h1  = HASH(size,key,tmp);
975       for (k=0; k<size; k++){
976         if (!HT[(h1+k)%size]) {
977           HT[(h1+k)%size] = key;
978           HD[(h1+k)%size] = b->a + j*bs2;
979           break;
980 #if defined(PETSC_USE_DEBUG)
981         } else {
982           ct++;
983 #endif
984         }
985       }
986 #if defined(PETSC_USE_DEBUG)
987       if (k> max) max = k;
988 #endif
989     }
990   }
991 
992   /* Print Summary */
993 #if defined(PETSC_USE_DEBUG)
994   for (i=0,j=0; i<size; i++) {
995     if (HT[i]) {j++;}
996   }
997   ierr = PetscLogInfo((0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max));CHKERRQ(ierr);
998 #endif
999   PetscFunctionReturn(0);
1000 }
1001 
1002 #undef __FUNCT__
1003 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
1004 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
1005 {
1006   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1007   PetscErrorCode ierr;
1008   PetscInt       nstash,reallocs;
1009   InsertMode     addv;
1010 
1011   PetscFunctionBegin;
1012   if (baij->donotstash) {
1013     PetscFunctionReturn(0);
1014   }
1015 
1016   /* make sure all processors are either in INSERTMODE or ADDMODE */
1017   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
1018   if (addv == (ADD_VALUES|INSERT_VALUES)) {
1019     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
1020   }
1021   mat->insertmode = addv; /* in case this processor had no cache */
1022 
1023   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
1024   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
1025   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
1026   ierr = PetscLogInfo((0,"MatAssemblyBegin_MPIBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
1027   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
1028   ierr = PetscLogInfo((0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
1029   PetscFunctionReturn(0);
1030 }
1031 
1032 #undef __FUNCT__
1033 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
1034 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
1035 {
1036   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
1037   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
1038   PetscErrorCode ierr;
1039   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
1040   PetscInt       *row,*col,other_disassembled;
1041   PetscTruth     r1,r2,r3;
1042   MatScalar      *val;
1043   InsertMode     addv = mat->insertmode;
1044   PetscMPIInt    n;
1045 
1046   PetscFunctionBegin;
1047   if (!baij->donotstash) {
1048     while (1) {
1049       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1050       if (!flg) break;
1051 
1052       for (i=0; i<n;) {
1053         /* Now identify the consecutive vals belonging to the same row */
1054         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1055         if (j < n) ncols = j-i;
1056         else       ncols = n-i;
1057         /* Now assemble all these values with a single function call */
1058         ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
1059         i = j;
1060       }
1061     }
1062     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
1063     /* Now process the block-stash. Since the values are stashed column-oriented,
1064        set the roworiented flag to column oriented, and after MatSetValues()
1065        restore the original flags */
1066     r1 = baij->roworiented;
1067     r2 = a->roworiented;
1068     r3 = b->roworiented;
1069     baij->roworiented = PETSC_FALSE;
1070     a->roworiented    = PETSC_FALSE;
1071     b->roworiented    = PETSC_FALSE;
1072     while (1) {
1073       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1074       if (!flg) break;
1075 
1076       for (i=0; i<n;) {
1077         /* Now identify the consecutive vals belonging to the same row */
1078         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1079         if (j < n) ncols = j-i;
1080         else       ncols = n-i;
1081         ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
1082         i = j;
1083       }
1084     }
1085     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
1086     baij->roworiented = r1;
1087     a->roworiented    = r2;
1088     b->roworiented    = r3;
1089   }
1090 
1091   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
1092   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
1093 
1094   /* determine if any processor has disassembled, if so we must
1095      also disassemble ourselfs, in order that we may reassemble. */
1096   /*
1097      if nonzero structure of submatrix B cannot change then we know that
1098      no processor disassembled thus we can skip this stuff
1099   */
1100   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
1101     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
1102     if (mat->was_assembled && !other_disassembled) {
1103       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
1104     }
1105   }
1106 
1107   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1108     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
1109   }
1110   b->compressedrow.use = PETSC_TRUE;
1111   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1112   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1113 
1114 #if defined(PETSC_USE_DEBUG)
1115   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1116     ierr = PetscLogInfo((0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct));CHKERRQ(ierr);
1117     baij->ht_total_ct  = 0;
1118     baij->ht_insert_ct = 0;
1119   }
1120 #endif
1121   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1122     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1123     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1124     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1125   }
1126 
1127   if (baij->rowvalues) {
1128     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1129     baij->rowvalues = 0;
1130   }
1131   PetscFunctionReturn(0);
1132 }
1133 
1134 #undef __FUNCT__
1135 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
1136 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1137 {
1138   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
1139   PetscErrorCode    ierr;
1140   PetscMPIInt       size = baij->size,rank = baij->rank;
1141   PetscInt          bs = mat->bs;
1142   PetscTruth        iascii,isdraw;
1143   PetscViewer       sviewer;
1144   PetscViewerFormat format;
1145 
1146   PetscFunctionBegin;
1147   /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */
1148   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1149   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1150   if (iascii) {
1151     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1152     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1153       MatInfo info;
1154       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
1155       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1156       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
1157               rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
1158               mat->bs,(PetscInt)info.memory);CHKERRQ(ierr);
1159       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1160       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
1161       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1162       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
1163       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1164       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1165       PetscFunctionReturn(0);
1166     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1167       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1168       PetscFunctionReturn(0);
1169     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1170       PetscFunctionReturn(0);
1171     }
1172   }
1173 
1174   if (isdraw) {
1175     PetscDraw       draw;
1176     PetscTruth isnull;
1177     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1178     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1179   }
1180 
1181   if (size == 1) {
1182     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
1183     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1184   } else {
1185     /* assemble the entire matrix onto first processor. */
1186     Mat         A;
1187     Mat_SeqBAIJ *Aloc;
1188     PetscInt    M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1189     MatScalar   *a;
1190 
1191     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1192     /* Perhaps this should be the type of mat? */
1193     if (!rank) {
1194       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
1195     } else {
1196       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
1197     }
1198     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1199     ierr = MatMPIBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1200     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1201 
1202     /* copy over the A part */
1203     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1204     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1205     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1206 
1207     for (i=0; i<mbs; i++) {
1208       rvals[0] = bs*(baij->rstart + i);
1209       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1210       for (j=ai[i]; j<ai[i+1]; j++) {
1211         col = (baij->cstart+aj[j])*bs;
1212         for (k=0; k<bs; k++) {
1213           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1214           col++; a += bs;
1215         }
1216       }
1217     }
1218     /* copy over the B part */
1219     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1220     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1221     for (i=0; i<mbs; i++) {
1222       rvals[0] = bs*(baij->rstart + i);
1223       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1224       for (j=ai[i]; j<ai[i+1]; j++) {
1225         col = baij->garray[aj[j]]*bs;
1226         for (k=0; k<bs; k++) {
1227           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1228           col++; a += bs;
1229         }
1230       }
1231     }
1232     ierr = PetscFree(rvals);CHKERRQ(ierr);
1233     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1234     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1235     /*
1236        Everyone has to call to draw the matrix since the graphics waits are
1237        synchronized across all processors that share the PetscDraw object
1238     */
1239     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1240     if (!rank) {
1241       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1242       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1243     }
1244     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1245     ierr = MatDestroy(A);CHKERRQ(ierr);
1246   }
1247   PetscFunctionReturn(0);
1248 }
1249 
1250 #undef __FUNCT__
1251 #define __FUNCT__ "MatView_MPIBAIJ"
1252 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1253 {
1254   PetscErrorCode ierr;
1255   PetscTruth     iascii,isdraw,issocket,isbinary;
1256 
1257   PetscFunctionBegin;
1258   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1259   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1260   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1261   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1262   if (iascii || isdraw || issocket || isbinary) {
1263     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1264   } else {
1265     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1266   }
1267   PetscFunctionReturn(0);
1268 }
1269 
1270 #undef __FUNCT__
1271 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1272 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1273 {
1274   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1275   PetscErrorCode ierr;
1276 
1277   PetscFunctionBegin;
1278 #if defined(PETSC_USE_LOG)
1279   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N);
1280 #endif
1281   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1282   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1283   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1284   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1285   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1286 #if defined (PETSC_USE_CTABLE)
1287   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1288 #else
1289   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1290 #endif
1291   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1292   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1293   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1294   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1295   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1296   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
1297 #if defined(PETSC_USE_MAT_SINGLE)
1298   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
1299 #endif
1300   ierr = PetscFree(baij);CHKERRQ(ierr);
1301 
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1304   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1305   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1306   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1307   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1308   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1309   PetscFunctionReturn(0);
1310 }
1311 
1312 #undef __FUNCT__
1313 #define __FUNCT__ "MatMult_MPIBAIJ"
1314 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1315 {
1316   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1317   PetscErrorCode ierr;
1318   PetscInt       nt;
1319 
1320   PetscFunctionBegin;
1321   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1322   if (nt != A->n) {
1323     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1324   }
1325   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1326   if (nt != A->m) {
1327     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1328   }
1329   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1330   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1331   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1332   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1333   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1334   PetscFunctionReturn(0);
1335 }
1336 
1337 #undef __FUNCT__
1338 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1339 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1340 {
1341   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1342   PetscErrorCode ierr;
1343 
1344   PetscFunctionBegin;
1345   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1346   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1347   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1348   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1349   PetscFunctionReturn(0);
1350 }
1351 
1352 #undef __FUNCT__
1353 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1354 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1355 {
1356   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1357   PetscErrorCode ierr;
1358   PetscTruth     merged;
1359 
1360   PetscFunctionBegin;
1361   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1362   /* do nondiagonal part */
1363   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1364   if (!merged) {
1365     /* send it on its way */
1366     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1367     /* do local part */
1368     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1369     /* receive remote parts: note this assumes the values are not actually */
1370     /* inserted in yy until the next line */
1371     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1372   } else {
1373     /* do local part */
1374     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1375     /* send it on its way */
1376     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1377     /* values actually were received in the Begin() but we need to call this nop */
1378     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1379   }
1380   PetscFunctionReturn(0);
1381 }
1382 
1383 #undef __FUNCT__
1384 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1385 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1386 {
1387   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1388   PetscErrorCode ierr;
1389 
1390   PetscFunctionBegin;
1391   /* do nondiagonal part */
1392   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1393   /* send it on its way */
1394   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1395   /* do local part */
1396   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1397   /* receive remote parts: note this assumes the values are not actually */
1398   /* inserted in yy until the next line, which is true for my implementation*/
1399   /* but is not perhaps always true. */
1400   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1401   PetscFunctionReturn(0);
1402 }
1403 
1404 /*
1405   This only works correctly for square matrices where the subblock A->A is the
1406    diagonal block
1407 */
1408 #undef __FUNCT__
1409 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1410 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1411 {
1412   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1413   PetscErrorCode ierr;
1414 
1415   PetscFunctionBegin;
1416   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1417   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1418   PetscFunctionReturn(0);
1419 }
1420 
1421 #undef __FUNCT__
1422 #define __FUNCT__ "MatScale_MPIBAIJ"
1423 PetscErrorCode MatScale_MPIBAIJ(const PetscScalar *aa,Mat A)
1424 {
1425   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1426   PetscErrorCode ierr;
1427 
1428   PetscFunctionBegin;
1429   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1430   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1431   PetscFunctionReturn(0);
1432 }
1433 
1434 #undef __FUNCT__
1435 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1436 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1437 {
1438   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1439   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1440   PetscErrorCode ierr;
1441   PetscInt       bs = matin->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1442   PetscInt       nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1443   PetscInt       *cmap,*idx_p,cstart = mat->cstart;
1444 
1445   PetscFunctionBegin;
1446   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1447   mat->getrowactive = PETSC_TRUE;
1448 
1449   if (!mat->rowvalues && (idx || v)) {
1450     /*
1451         allocate enough space to hold information from the longest row.
1452     */
1453     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1454     PetscInt     max = 1,mbs = mat->mbs,tmp;
1455     for (i=0; i<mbs; i++) {
1456       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1457       if (max < tmp) { max = tmp; }
1458     }
1459     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1460     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1461   }
1462 
1463   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1464   lrow = row - brstart;
1465 
1466   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1467   if (!v)   {pvA = 0; pvB = 0;}
1468   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1469   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1470   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1471   nztot = nzA + nzB;
1472 
1473   cmap  = mat->garray;
1474   if (v  || idx) {
1475     if (nztot) {
1476       /* Sort by increasing column numbers, assuming A and B already sorted */
1477       PetscInt imark = -1;
1478       if (v) {
1479         *v = v_p = mat->rowvalues;
1480         for (i=0; i<nzB; i++) {
1481           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1482           else break;
1483         }
1484         imark = i;
1485         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1486         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1487       }
1488       if (idx) {
1489         *idx = idx_p = mat->rowindices;
1490         if (imark > -1) {
1491           for (i=0; i<imark; i++) {
1492             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1493           }
1494         } else {
1495           for (i=0; i<nzB; i++) {
1496             if (cmap[cworkB[i]/bs] < cstart)
1497               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1498             else break;
1499           }
1500           imark = i;
1501         }
1502         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1503         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1504       }
1505     } else {
1506       if (idx) *idx = 0;
1507       if (v)   *v   = 0;
1508     }
1509   }
1510   *nz = nztot;
1511   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1512   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1513   PetscFunctionReturn(0);
1514 }
1515 
1516 #undef __FUNCT__
1517 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1518 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1519 {
1520   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1521 
1522   PetscFunctionBegin;
1523   if (!baij->getrowactive) {
1524     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1525   }
1526   baij->getrowactive = PETSC_FALSE;
1527   PetscFunctionReturn(0);
1528 }
1529 
1530 #undef __FUNCT__
1531 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1532 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1533 {
1534   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1535   PetscErrorCode ierr;
1536 
1537   PetscFunctionBegin;
1538   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1539   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1540   PetscFunctionReturn(0);
1541 }
1542 
1543 #undef __FUNCT__
1544 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1545 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1546 {
1547   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1548   Mat            A = a->A,B = a->B;
1549   PetscErrorCode ierr;
1550   PetscReal      isend[5],irecv[5];
1551 
1552   PetscFunctionBegin;
1553   info->block_size     = (PetscReal)matin->bs;
1554   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1555   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1556   isend[3] = info->memory;  isend[4] = info->mallocs;
1557   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1558   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1559   isend[3] += info->memory;  isend[4] += info->mallocs;
1560   if (flag == MAT_LOCAL) {
1561     info->nz_used      = isend[0];
1562     info->nz_allocated = isend[1];
1563     info->nz_unneeded  = isend[2];
1564     info->memory       = isend[3];
1565     info->mallocs      = isend[4];
1566   } else if (flag == MAT_GLOBAL_MAX) {
1567     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1568     info->nz_used      = irecv[0];
1569     info->nz_allocated = irecv[1];
1570     info->nz_unneeded  = irecv[2];
1571     info->memory       = irecv[3];
1572     info->mallocs      = irecv[4];
1573   } else if (flag == MAT_GLOBAL_SUM) {
1574     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1575     info->nz_used      = irecv[0];
1576     info->nz_allocated = irecv[1];
1577     info->nz_unneeded  = irecv[2];
1578     info->memory       = irecv[3];
1579     info->mallocs      = irecv[4];
1580   } else {
1581     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1582   }
1583   info->rows_global       = (PetscReal)A->M;
1584   info->columns_global    = (PetscReal)A->N;
1585   info->rows_local        = (PetscReal)A->m;
1586   info->columns_local     = (PetscReal)A->N;
1587   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1588   info->fill_ratio_needed = 0;
1589   info->factor_mallocs    = 0;
1590   PetscFunctionReturn(0);
1591 }
1592 
1593 #undef __FUNCT__
1594 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1595 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op)
1596 {
1597   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1598   PetscErrorCode ierr;
1599 
1600   PetscFunctionBegin;
1601   switch (op) {
1602   case MAT_NO_NEW_NONZERO_LOCATIONS:
1603   case MAT_YES_NEW_NONZERO_LOCATIONS:
1604   case MAT_COLUMNS_UNSORTED:
1605   case MAT_COLUMNS_SORTED:
1606   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1607   case MAT_KEEP_ZEROED_ROWS:
1608   case MAT_NEW_NONZERO_LOCATION_ERR:
1609     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1610     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1611     break;
1612   case MAT_ROW_ORIENTED:
1613     a->roworiented = PETSC_TRUE;
1614     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1615     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1616     break;
1617   case MAT_ROWS_SORTED:
1618   case MAT_ROWS_UNSORTED:
1619   case MAT_YES_NEW_DIAGONALS:
1620     ierr = PetscLogInfo((A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"));CHKERRQ(ierr);
1621     break;
1622   case MAT_COLUMN_ORIENTED:
1623     a->roworiented = PETSC_FALSE;
1624     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1625     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1626     break;
1627   case MAT_IGNORE_OFF_PROC_ENTRIES:
1628     a->donotstash = PETSC_TRUE;
1629     break;
1630   case MAT_NO_NEW_DIAGONALS:
1631     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1632   case MAT_USE_HASH_TABLE:
1633     a->ht_flag = PETSC_TRUE;
1634     break;
1635   case MAT_SYMMETRIC:
1636   case MAT_STRUCTURALLY_SYMMETRIC:
1637   case MAT_HERMITIAN:
1638   case MAT_SYMMETRY_ETERNAL:
1639     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1640     break;
1641   case MAT_NOT_SYMMETRIC:
1642   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1643   case MAT_NOT_HERMITIAN:
1644   case MAT_NOT_SYMMETRY_ETERNAL:
1645     break;
1646   default:
1647     SETERRQ(PETSC_ERR_SUP,"unknown option");
1648   }
1649   PetscFunctionReturn(0);
1650 }
1651 
1652 #undef __FUNCT__
1653 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1654 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1655 {
1656   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1657   Mat_SeqBAIJ    *Aloc;
1658   Mat            B;
1659   PetscErrorCode ierr;
1660   PetscInt       M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col;
1661   PetscInt       bs=A->bs,mbs=baij->mbs;
1662   MatScalar      *a;
1663 
1664   PetscFunctionBegin;
1665   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1666   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1667   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1668   ierr = MatMPIBAIJSetPreallocation(B,A->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1669 
1670   /* copy over the A part */
1671   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1672   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1673   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1674 
1675   for (i=0; i<mbs; i++) {
1676     rvals[0] = bs*(baij->rstart + i);
1677     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1678     for (j=ai[i]; j<ai[i+1]; j++) {
1679       col = (baij->cstart+aj[j])*bs;
1680       for (k=0; k<bs; k++) {
1681         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1682         col++; a += bs;
1683       }
1684     }
1685   }
1686   /* copy over the B part */
1687   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1688   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1689   for (i=0; i<mbs; i++) {
1690     rvals[0] = bs*(baij->rstart + i);
1691     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1692     for (j=ai[i]; j<ai[i+1]; j++) {
1693       col = baij->garray[aj[j]]*bs;
1694       for (k=0; k<bs; k++) {
1695         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1696         col++; a += bs;
1697       }
1698     }
1699   }
1700   ierr = PetscFree(rvals);CHKERRQ(ierr);
1701   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1702   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1703 
1704   if (matout) {
1705     *matout = B;
1706   } else {
1707     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1708   }
1709   PetscFunctionReturn(0);
1710 }
1711 
1712 #undef __FUNCT__
1713 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1714 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1715 {
1716   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1717   Mat            a = baij->A,b = baij->B;
1718   PetscErrorCode ierr;
1719   PetscInt       s1,s2,s3;
1720 
1721   PetscFunctionBegin;
1722   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1723   if (rr) {
1724     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1725     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1726     /* Overlap communication with computation. */
1727     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1728   }
1729   if (ll) {
1730     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1731     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1732     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1733   }
1734   /* scale  the diagonal block */
1735   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1736 
1737   if (rr) {
1738     /* Do a scatter end and then right scale the off-diagonal block */
1739     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1740     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1741   }
1742 
1743   PetscFunctionReturn(0);
1744 }
1745 
1746 #undef __FUNCT__
1747 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1748 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,IS is,const PetscScalar *diag)
1749 {
1750   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1751   PetscErrorCode ierr;
1752   PetscMPIInt    imdex,size = l->size,n,rank = l->rank;
1753   PetscInt       i,N,*rows,*owners = l->rowners;
1754   PetscInt       *nprocs,j,idx,nsends,row;
1755   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
1756   PetscInt       *rvalues,tag = A->tag,count,base,slen,*source,lastidx = -1;
1757   PetscInt       *lens,*lrows,*values,bs=A->bs,rstart_bs=l->rstart_bs;
1758   MPI_Comm       comm = A->comm;
1759   MPI_Request    *send_waits,*recv_waits;
1760   MPI_Status     recv_status,*send_status;
1761   IS             istmp;
1762 #if defined(PETSC_DEBUG)
1763   PetscTruth     found = PETSC_FALSE;
1764 #endif
1765 
1766   PetscFunctionBegin;
1767   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
1768   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1769 
1770   /*  first count number of contributors to each processor */
1771   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1772   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1773   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1774   j = 0;
1775   for (i=0; i<N; i++) {
1776     if (lastidx > (idx = rows[i])) j = 0;
1777     lastidx = idx;
1778     for (; j<size; j++) {
1779       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1780         nprocs[2*j]++;
1781         nprocs[2*j+1] = 1;
1782         owner[i] = j;
1783 #if defined(PETSC_DEBUG)
1784         found = PETSC_TRUE;
1785 #endif
1786         break;
1787       }
1788     }
1789 #if defined(PETSC_DEBUG)
1790     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1791     found = PETSC_FALSE;
1792 #endif
1793   }
1794   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1795 
1796   /* inform other processors of number of messages and max length*/
1797   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1798 
1799   /* post receives:   */
1800   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1801   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1802   for (i=0; i<nrecvs; i++) {
1803     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1804   }
1805 
1806   /* do sends:
1807      1) starts[i] gives the starting index in svalues for stuff going to
1808      the ith processor
1809   */
1810   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1811   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1812   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1813   starts[0]  = 0;
1814   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1815   for (i=0; i<N; i++) {
1816     svalues[starts[owner[i]]++] = rows[i];
1817   }
1818   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
1819 
1820   starts[0] = 0;
1821   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1822   count = 0;
1823   for (i=0; i<size; i++) {
1824     if (nprocs[2*i+1]) {
1825       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1826     }
1827   }
1828   ierr = PetscFree(starts);CHKERRQ(ierr);
1829 
1830   base = owners[rank]*bs;
1831 
1832   /*  wait on receives */
1833   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
1834   source = lens + nrecvs;
1835   count  = nrecvs; slen = 0;
1836   while (count) {
1837     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1838     /* unpack receives into our local space */
1839     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1840     source[imdex]  = recv_status.MPI_SOURCE;
1841     lens[imdex]    = n;
1842     slen          += n;
1843     count--;
1844   }
1845   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1846 
1847   /* move the data into the send scatter */
1848   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1849   count = 0;
1850   for (i=0; i<nrecvs; i++) {
1851     values = rvalues + i*nmax;
1852     for (j=0; j<lens[i]; j++) {
1853       lrows[count++] = values[j] - base;
1854     }
1855   }
1856   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1857   ierr = PetscFree(lens);CHKERRQ(ierr);
1858   ierr = PetscFree(owner);CHKERRQ(ierr);
1859   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1860 
1861   /* actually zap the local rows */
1862   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1863   ierr = PetscLogObjectParent(A,istmp);CHKERRQ(ierr);
1864 
1865   /*
1866         Zero the required rows. If the "diagonal block" of the matrix
1867      is square and the user wishes to set the diagonal we use seperate
1868      code so that MatSetValues() is not called for each diagonal allocating
1869      new memory, thus calling lots of mallocs and slowing things down.
1870 
1871        Contributed by: Mathew Knepley
1872   */
1873   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1874   ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr);
1875   if (diag && (l->A->M == l->A->N)) {
1876     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr);
1877   } else if (diag) {
1878     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1879     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1880       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1881 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1882     }
1883     for (i=0; i<slen; i++) {
1884       row  = lrows[i] + rstart_bs;
1885       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1886     }
1887     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1888     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1889   } else {
1890     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1891   }
1892 
1893   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1894   ierr = PetscFree(lrows);CHKERRQ(ierr);
1895 
1896   /* wait on sends */
1897   if (nsends) {
1898     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1899     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1900     ierr = PetscFree(send_status);CHKERRQ(ierr);
1901   }
1902   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1903   ierr = PetscFree(svalues);CHKERRQ(ierr);
1904 
1905   PetscFunctionReturn(0);
1906 }
1907 
1908 #undef __FUNCT__
1909 #define __FUNCT__ "MatPrintHelp_MPIBAIJ"
1910 PetscErrorCode MatPrintHelp_MPIBAIJ(Mat A)
1911 {
1912   Mat_MPIBAIJ       *a   = (Mat_MPIBAIJ*)A->data;
1913   MPI_Comm          comm = A->comm;
1914   static PetscTruth called = PETSC_FALSE;
1915   PetscErrorCode    ierr;
1916 
1917   PetscFunctionBegin;
1918   if (!a->rank) {
1919     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1920   }
1921   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1922   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1923   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1924   PetscFunctionReturn(0);
1925 }
1926 
1927 #undef __FUNCT__
1928 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1929 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1930 {
1931   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1932   PetscErrorCode ierr;
1933 
1934   PetscFunctionBegin;
1935   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1936   PetscFunctionReturn(0);
1937 }
1938 
1939 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1940 
1941 #undef __FUNCT__
1942 #define __FUNCT__ "MatEqual_MPIBAIJ"
1943 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1944 {
1945   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1946   Mat            a,b,c,d;
1947   PetscTruth     flg;
1948   PetscErrorCode ierr;
1949 
1950   PetscFunctionBegin;
1951   a = matA->A; b = matA->B;
1952   c = matB->A; d = matB->B;
1953 
1954   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1955   if (flg) {
1956     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1957   }
1958   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1959   PetscFunctionReturn(0);
1960 }
1961 
1962 
1963 #undef __FUNCT__
1964 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1965 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1966 {
1967   PetscErrorCode ierr;
1968 
1969   PetscFunctionBegin;
1970   ierr =  MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1971   PetscFunctionReturn(0);
1972 }
1973 
1974 /* -------------------------------------------------------------------*/
1975 static struct _MatOps MatOps_Values = {
1976        MatSetValues_MPIBAIJ,
1977        MatGetRow_MPIBAIJ,
1978        MatRestoreRow_MPIBAIJ,
1979        MatMult_MPIBAIJ,
1980 /* 4*/ MatMultAdd_MPIBAIJ,
1981        MatMultTranspose_MPIBAIJ,
1982        MatMultTransposeAdd_MPIBAIJ,
1983        0,
1984        0,
1985        0,
1986 /*10*/ 0,
1987        0,
1988        0,
1989        0,
1990        MatTranspose_MPIBAIJ,
1991 /*15*/ MatGetInfo_MPIBAIJ,
1992        MatEqual_MPIBAIJ,
1993        MatGetDiagonal_MPIBAIJ,
1994        MatDiagonalScale_MPIBAIJ,
1995        MatNorm_MPIBAIJ,
1996 /*20*/ MatAssemblyBegin_MPIBAIJ,
1997        MatAssemblyEnd_MPIBAIJ,
1998        0,
1999        MatSetOption_MPIBAIJ,
2000        MatZeroEntries_MPIBAIJ,
2001 /*25*/ MatZeroRows_MPIBAIJ,
2002        0,
2003        0,
2004        0,
2005        0,
2006 /*30*/ MatSetUpPreallocation_MPIBAIJ,
2007        0,
2008        0,
2009        0,
2010        0,
2011 /*35*/ MatDuplicate_MPIBAIJ,
2012        0,
2013        0,
2014        0,
2015        0,
2016 /*40*/ 0,
2017        MatGetSubMatrices_MPIBAIJ,
2018        MatIncreaseOverlap_MPIBAIJ,
2019        MatGetValues_MPIBAIJ,
2020        0,
2021 /*45*/ MatPrintHelp_MPIBAIJ,
2022        MatScale_MPIBAIJ,
2023        0,
2024        0,
2025        0,
2026 /*50*/ 0,
2027        0,
2028        0,
2029        0,
2030        0,
2031 /*55*/ 0,
2032        0,
2033        MatSetUnfactored_MPIBAIJ,
2034        0,
2035        MatSetValuesBlocked_MPIBAIJ,
2036 /*60*/ 0,
2037        MatDestroy_MPIBAIJ,
2038        MatView_MPIBAIJ,
2039        MatGetPetscMaps_Petsc,
2040        0,
2041 /*65*/ 0,
2042        0,
2043        0,
2044        0,
2045        0,
2046 /*70*/ MatGetRowMax_MPIBAIJ,
2047        0,
2048        0,
2049        0,
2050        0,
2051 /*75*/ 0,
2052        0,
2053        0,
2054        0,
2055        0,
2056 /*80*/ 0,
2057        0,
2058        0,
2059        0,
2060        MatLoad_MPIBAIJ,
2061 /*85*/ 0,
2062        0,
2063        0,
2064        0,
2065        0,
2066 /*90*/ 0,
2067        0,
2068        0,
2069        0,
2070        0,
2071 /*95*/ 0,
2072        0,
2073        0,
2074        0};
2075 
2076 
2077 EXTERN_C_BEGIN
2078 #undef __FUNCT__
2079 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2080 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
2081 {
2082   PetscFunctionBegin;
2083   *a      = ((Mat_MPIBAIJ *)A->data)->A;
2084   *iscopy = PETSC_FALSE;
2085   PetscFunctionReturn(0);
2086 }
2087 EXTERN_C_END
2088 
2089 EXTERN_C_BEGIN
2090 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
2091 EXTERN_C_END
2092 
2093 #undef __FUNCT__
2094 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2095 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2096 {
2097   Mat_MPIBAIJ    *b  = (Mat_MPIBAIJ *)B->data;
2098   PetscInt       m = B->m/bs,cstart = b->cstart, cend = b->cend,j,nnz,i,d;
2099   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii;
2100   const PetscInt *JJ;
2101   PetscScalar    *values;
2102   PetscErrorCode ierr;
2103 
2104   PetscFunctionBegin;
2105 #if defined(PETSC_OPT_g)
2106   if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]);
2107 #endif
2108   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2109   o_nnz = d_nnz + m;
2110 
2111   for (i=0; i<m; i++) {
2112     nnz     = I[i+1]- I[i];
2113     JJ      = J + I[i];
2114     nnz_max = PetscMax(nnz_max,nnz);
2115 #if defined(PETSC_OPT_g)
2116     if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2117 #endif
2118     for (j=0; j<nnz; j++) {
2119       if (*JJ >= cstart) break;
2120       JJ++;
2121     }
2122     d = 0;
2123     for (; j<nnz; j++) {
2124       if (*JJ++ >= cend) break;
2125       d++;
2126     }
2127     d_nnz[i] = d;
2128     o_nnz[i] = nnz - d;
2129   }
2130   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2131   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2132 
2133   if (v) values = (PetscScalar*)v;
2134   else {
2135     ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2136     ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2137   }
2138 
2139   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2140   for (i=0; i<m; i++) {
2141     ii   = i + rstart;
2142     nnz  = I[i+1]- I[i];
2143     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr);
2144   }
2145   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2146   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2147   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2148 
2149   if (!v) {
2150     ierr = PetscFree(values);CHKERRQ(ierr);
2151   }
2152   PetscFunctionReturn(0);
2153 }
2154 
2155 #undef __FUNCT__
2156 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2157 /*@C
2158    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2159    (the default parallel PETSc format).
2160 
2161    Collective on MPI_Comm
2162 
2163    Input Parameters:
2164 +  A - the matrix
2165 .  i - the indices into j for the start of each local row (starts with zero)
2166 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2167 -  v - optional values in the matrix
2168 
2169    Level: developer
2170 
2171 .keywords: matrix, aij, compressed row, sparse, parallel
2172 
2173 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2174 @*/
2175 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2176 {
2177   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
2178 
2179   PetscFunctionBegin;
2180   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2181   if (f) {
2182     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
2183   }
2184   PetscFunctionReturn(0);
2185 }
2186 
2187 EXTERN_C_BEGIN
2188 #undef __FUNCT__
2189 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2190 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2191 {
2192   Mat_MPIBAIJ    *b;
2193   PetscErrorCode ierr;
2194   PetscInt       i;
2195 
2196   PetscFunctionBegin;
2197   B->preallocated = PETSC_TRUE;
2198   ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2199 
2200   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2201   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2202   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2203   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2204   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2205   if (d_nnz) {
2206   for (i=0; i<B->m/bs; i++) {
2207       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2208     }
2209   }
2210   if (o_nnz) {
2211     for (i=0; i<B->m/bs; i++) {
2212       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2213     }
2214   }
2215 
2216   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
2217   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
2218   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
2219   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
2220 
2221   b = (Mat_MPIBAIJ*)B->data;
2222   B->bs  = bs;
2223   b->bs2 = bs*bs;
2224   b->mbs = B->m/bs;
2225   b->nbs = B->n/bs;
2226   b->Mbs = B->M/bs;
2227   b->Nbs = B->N/bs;
2228 
2229   ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
2230   b->rowners[0]    = 0;
2231   for (i=2; i<=b->size; i++) {
2232     b->rowners[i] += b->rowners[i-1];
2233   }
2234   b->rstart    = b->rowners[b->rank];
2235   b->rend      = b->rowners[b->rank+1];
2236 
2237   ierr = MPI_Allgather(&b->nbs,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
2238   b->cowners[0] = 0;
2239   for (i=2; i<=b->size; i++) {
2240     b->cowners[i] += b->cowners[i-1];
2241   }
2242   b->cstart    = b->cowners[b->rank];
2243   b->cend      = b->cowners[b->rank+1];
2244 
2245   for (i=0; i<=b->size; i++) {
2246     b->rowners_bs[i] = b->rowners[i]*bs;
2247   }
2248   b->rstart_bs = b->rstart*bs;
2249   b->rend_bs   = b->rend*bs;
2250   b->cstart_bs = b->cstart*bs;
2251   b->cend_bs   = b->cend*bs;
2252 
2253   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
2254   ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2255   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2256   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2257   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
2258   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2259   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2260   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2261 
2262   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
2263 
2264   PetscFunctionReturn(0);
2265 }
2266 EXTERN_C_END
2267 
2268 EXTERN_C_BEGIN
2269 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2270 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2271 EXTERN_C_END
2272 
2273 /*MC
2274    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2275 
2276    Options Database Keys:
2277 . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2278 
2279   Level: beginner
2280 
2281 .seealso: MatCreateMPIBAIJ
2282 M*/
2283 
2284 EXTERN_C_BEGIN
2285 #undef __FUNCT__
2286 #define __FUNCT__ "MatCreate_MPIBAIJ"
2287 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B)
2288 {
2289   Mat_MPIBAIJ    *b;
2290   PetscErrorCode ierr;
2291   PetscTruth     flg;
2292 
2293   PetscFunctionBegin;
2294   ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr);
2295   B->data = (void*)b;
2296 
2297   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2298   B->mapping    = 0;
2299   B->factor     = 0;
2300   B->assembled  = PETSC_FALSE;
2301 
2302   B->insertmode = NOT_SET_VALUES;
2303   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
2304   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
2305 
2306   /* build local table of row and column ownerships */
2307   ierr          = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
2308   ierr = PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
2309   b->cowners    = b->rowners + b->size + 2;
2310   b->rowners_bs = b->cowners + b->size + 2;
2311 
2312   /* build cache for off array entries formed */
2313   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
2314   b->donotstash  = PETSC_FALSE;
2315   b->colmap      = PETSC_NULL;
2316   b->garray      = PETSC_NULL;
2317   b->roworiented = PETSC_TRUE;
2318 
2319 #if defined(PETSC_USE_MAT_SINGLE)
2320   /* stuff for MatSetValues_XXX in single precision */
2321   b->setvalueslen     = 0;
2322   b->setvaluescopy    = PETSC_NULL;
2323 #endif
2324 
2325   /* stuff used in block assembly */
2326   b->barray       = 0;
2327 
2328   /* stuff used for matrix vector multiply */
2329   b->lvec         = 0;
2330   b->Mvctx        = 0;
2331 
2332   /* stuff for MatGetRow() */
2333   b->rowindices   = 0;
2334   b->rowvalues    = 0;
2335   b->getrowactive = PETSC_FALSE;
2336 
2337   /* hash table stuff */
2338   b->ht           = 0;
2339   b->hd           = 0;
2340   b->ht_size      = 0;
2341   b->ht_flag      = PETSC_FALSE;
2342   b->ht_fact      = 0;
2343   b->ht_total_ct  = 0;
2344   b->ht_insert_ct = 0;
2345 
2346   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2347   if (flg) {
2348     PetscReal fact = 1.39;
2349     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2350     ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2351     if (fact <= 1.0) fact = 1.39;
2352     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2353     ierr = PetscLogInfo((0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact));CHKERRQ(ierr);
2354   }
2355   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2356                                      "MatStoreValues_MPIBAIJ",
2357                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2358   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2359                                      "MatRetrieveValues_MPIBAIJ",
2360                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2361   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2362                                      "MatGetDiagonalBlock_MPIBAIJ",
2363                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2364   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
2365                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
2366                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2367   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
2368 				     "MatMPIBAIJSetPreallocationCSR_MPIAIJ",
2369 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
2370   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
2371                                      "MatDiagonalScaleLocal_MPIBAIJ",
2372                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
2373   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
2374                                      "MatSetHashTableFactor_MPIBAIJ",
2375                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2376   PetscFunctionReturn(0);
2377 }
2378 EXTERN_C_END
2379 
2380 /*MC
2381    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2382 
2383    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2384    and MATMPIBAIJ otherwise.
2385 
2386    Options Database Keys:
2387 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2388 
2389   Level: beginner
2390 
2391 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2392 M*/
2393 
2394 EXTERN_C_BEGIN
2395 #undef __FUNCT__
2396 #define __FUNCT__ "MatCreate_BAIJ"
2397 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A)
2398 {
2399   PetscErrorCode ierr;
2400   PetscMPIInt    size;
2401 
2402   PetscFunctionBegin;
2403   ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr);
2404   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2405   if (size == 1) {
2406     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
2407   } else {
2408     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
2409   }
2410   PetscFunctionReturn(0);
2411 }
2412 EXTERN_C_END
2413 
2414 #undef __FUNCT__
2415 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
2416 /*@C
2417    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
2418    (block compressed row).  For good matrix assembly performance
2419    the user should preallocate the matrix storage by setting the parameters
2420    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2421    performance can be increased by more than a factor of 50.
2422 
2423    Collective on Mat
2424 
2425    Input Parameters:
2426 +  A - the matrix
2427 .  bs   - size of blockk
2428 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2429            submatrix  (same for all local rows)
2430 .  d_nnz - array containing the number of block nonzeros in the various block rows
2431            of the in diagonal portion of the local (possibly different for each block
2432            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2433 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2434            submatrix (same for all local rows).
2435 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2436            off-diagonal portion of the local submatrix (possibly different for
2437            each block row) or PETSC_NULL.
2438 
2439    If the *_nnz parameter is given then the *_nz parameter is ignored
2440 
2441    Options Database Keys:
2442 .   -mat_no_unroll - uses code that does not unroll the loops in the
2443                      block calculations (much slower)
2444 .   -mat_block_size - size of the blocks to use
2445 
2446    Notes:
2447    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2448    than it must be used on all processors that share the object for that argument.
2449 
2450    Storage Information:
2451    For a square global matrix we define each processor's diagonal portion
2452    to be its local rows and the corresponding columns (a square submatrix);
2453    each processor's off-diagonal portion encompasses the remainder of the
2454    local matrix (a rectangular submatrix).
2455 
2456    The user can specify preallocated storage for the diagonal part of
2457    the local submatrix with either d_nz or d_nnz (not both).  Set
2458    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2459    memory allocation.  Likewise, specify preallocated storage for the
2460    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2461 
2462    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2463    the figure below we depict these three local rows and all columns (0-11).
2464 
2465 .vb
2466            0 1 2 3 4 5 6 7 8 9 10 11
2467           -------------------
2468    row 3  |  o o o d d d o o o o o o
2469    row 4  |  o o o d d d o o o o o o
2470    row 5  |  o o o d d d o o o o o o
2471           -------------------
2472 .ve
2473 
2474    Thus, any entries in the d locations are stored in the d (diagonal)
2475    submatrix, and any entries in the o locations are stored in the
2476    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2477    stored simply in the MATSEQBAIJ format for compressed row storage.
2478 
2479    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2480    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2481    In general, for PDE problems in which most nonzeros are near the diagonal,
2482    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2483    or you will get TERRIBLE performance; see the users' manual chapter on
2484    matrices.
2485 
2486    Level: intermediate
2487 
2488 .keywords: matrix, block, aij, compressed row, sparse, parallel
2489 
2490 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
2491 @*/
2492 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2493 {
2494   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2495 
2496   PetscFunctionBegin;
2497   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2498   if (f) {
2499     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2500   }
2501   PetscFunctionReturn(0);
2502 }
2503 
2504 #undef __FUNCT__
2505 #define __FUNCT__ "MatCreateMPIBAIJ"
2506 /*@C
2507    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
2508    (block compressed row).  For good matrix assembly performance
2509    the user should preallocate the matrix storage by setting the parameters
2510    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2511    performance can be increased by more than a factor of 50.
2512 
2513    Collective on MPI_Comm
2514 
2515    Input Parameters:
2516 +  comm - MPI communicator
2517 .  bs   - size of blockk
2518 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2519            This value should be the same as the local size used in creating the
2520            y vector for the matrix-vector product y = Ax.
2521 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2522            This value should be the same as the local size used in creating the
2523            x vector for the matrix-vector product y = Ax.
2524 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2525 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2526 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
2527            submatrix  (same for all local rows)
2528 .  d_nnz - array containing the number of nonzero blocks in the various block rows
2529            of the in diagonal portion of the local (possibly different for each block
2530            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2531 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
2532            submatrix (same for all local rows).
2533 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
2534            off-diagonal portion of the local submatrix (possibly different for
2535            each block row) or PETSC_NULL.
2536 
2537    Output Parameter:
2538 .  A - the matrix
2539 
2540    Options Database Keys:
2541 .   -mat_no_unroll - uses code that does not unroll the loops in the
2542                      block calculations (much slower)
2543 .   -mat_block_size - size of the blocks to use
2544 
2545    Notes:
2546    If the *_nnz parameter is given then the *_nz parameter is ignored
2547 
2548    A nonzero block is any block that as 1 or more nonzeros in it
2549 
2550    The user MUST specify either the local or global matrix dimensions
2551    (possibly both).
2552 
2553    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2554    than it must be used on all processors that share the object for that argument.
2555 
2556    Storage Information:
2557    For a square global matrix we define each processor's diagonal portion
2558    to be its local rows and the corresponding columns (a square submatrix);
2559    each processor's off-diagonal portion encompasses the remainder of the
2560    local matrix (a rectangular submatrix).
2561 
2562    The user can specify preallocated storage for the diagonal part of
2563    the local submatrix with either d_nz or d_nnz (not both).  Set
2564    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2565    memory allocation.  Likewise, specify preallocated storage for the
2566    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2567 
2568    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2569    the figure below we depict these three local rows and all columns (0-11).
2570 
2571 .vb
2572            0 1 2 3 4 5 6 7 8 9 10 11
2573           -------------------
2574    row 3  |  o o o d d d o o o o o o
2575    row 4  |  o o o d d d o o o o o o
2576    row 5  |  o o o d d d o o o o o o
2577           -------------------
2578 .ve
2579 
2580    Thus, any entries in the d locations are stored in the d (diagonal)
2581    submatrix, and any entries in the o locations are stored in the
2582    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2583    stored simply in the MATSEQBAIJ format for compressed row storage.
2584 
2585    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2586    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2587    In general, for PDE problems in which most nonzeros are near the diagonal,
2588    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2589    or you will get TERRIBLE performance; see the users' manual chapter on
2590    matrices.
2591 
2592    Level: intermediate
2593 
2594 .keywords: matrix, block, aij, compressed row, sparse, parallel
2595 
2596 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2597 @*/
2598 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2599 {
2600   PetscErrorCode ierr;
2601   PetscMPIInt    size;
2602 
2603   PetscFunctionBegin;
2604   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2605   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2606   if (size > 1) {
2607     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
2608     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2609   } else {
2610     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2611     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2612   }
2613   PetscFunctionReturn(0);
2614 }
2615 
2616 #undef __FUNCT__
2617 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
2618 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2619 {
2620   Mat            mat;
2621   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2622   PetscErrorCode ierr;
2623   PetscInt       len=0;
2624 
2625   PetscFunctionBegin;
2626   *newmat       = 0;
2627   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2628   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2629   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2630 
2631   mat->factor       = matin->factor;
2632   mat->preallocated = PETSC_TRUE;
2633   mat->assembled    = PETSC_TRUE;
2634   mat->insertmode   = NOT_SET_VALUES;
2635 
2636   a      = (Mat_MPIBAIJ*)mat->data;
2637   mat->bs  = matin->bs;
2638   a->bs2   = oldmat->bs2;
2639   a->mbs   = oldmat->mbs;
2640   a->nbs   = oldmat->nbs;
2641   a->Mbs   = oldmat->Mbs;
2642   a->Nbs   = oldmat->Nbs;
2643 
2644   a->rstart       = oldmat->rstart;
2645   a->rend         = oldmat->rend;
2646   a->cstart       = oldmat->cstart;
2647   a->cend         = oldmat->cend;
2648   a->size         = oldmat->size;
2649   a->rank         = oldmat->rank;
2650   a->donotstash   = oldmat->donotstash;
2651   a->roworiented  = oldmat->roworiented;
2652   a->rowindices   = 0;
2653   a->rowvalues    = 0;
2654   a->getrowactive = PETSC_FALSE;
2655   a->barray       = 0;
2656   a->rstart_bs    = oldmat->rstart_bs;
2657   a->rend_bs      = oldmat->rend_bs;
2658   a->cstart_bs    = oldmat->cstart_bs;
2659   a->cend_bs      = oldmat->cend_bs;
2660 
2661   /* hash table stuff */
2662   a->ht           = 0;
2663   a->hd           = 0;
2664   a->ht_size      = 0;
2665   a->ht_flag      = oldmat->ht_flag;
2666   a->ht_fact      = oldmat->ht_fact;
2667   a->ht_total_ct  = 0;
2668   a->ht_insert_ct = 0;
2669 
2670   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2671   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2672   ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr);
2673   if (oldmat->colmap) {
2674 #if defined (PETSC_USE_CTABLE)
2675   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2676 #else
2677   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2678   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2679   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2680 #endif
2681   } else a->colmap = 0;
2682 
2683   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2684     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2685     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2686     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2687   } else a->garray = 0;
2688 
2689   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2690   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2691   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2692   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2693 
2694   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2695   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2696   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2697   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2698   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2699   *newmat = mat;
2700 
2701   PetscFunctionReturn(0);
2702 }
2703 
2704 #include "petscsys.h"
2705 
2706 #undef __FUNCT__
2707 #define __FUNCT__ "MatLoad_MPIBAIJ"
2708 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
2709 {
2710   Mat            A;
2711   PetscErrorCode ierr;
2712   int            fd;
2713   PetscInt       i,nz,j,rstart,rend;
2714   PetscScalar    *vals,*buf;
2715   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2716   MPI_Status     status;
2717   PetscMPIInt    rank,size,maxnz;
2718   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
2719   PetscInt       *locrowlens,*procsnz = 0,*browners;
2720   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
2721   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
2722   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2723   PetscInt       dcount,kmax,k,nzcount,tmp,mend;
2724 
2725   PetscFunctionBegin;
2726   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2727 
2728   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2729   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2730   if (!rank) {
2731     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2732     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2733     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2734   }
2735 
2736   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2737   M = header[1]; N = header[2];
2738 
2739   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2740 
2741   /*
2742      This code adds extra rows to make sure the number of rows is
2743      divisible by the blocksize
2744   */
2745   Mbs        = M/bs;
2746   extra_rows = bs - M + bs*Mbs;
2747   if (extra_rows == bs) extra_rows = 0;
2748   else                  Mbs++;
2749   if (extra_rows && !rank) {
2750     ierr = PetscLogInfo((0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr);
2751   }
2752 
2753   /* determine ownership of all rows */
2754   mbs        = Mbs/size + ((Mbs % size) > rank);
2755   m          = mbs*bs;
2756   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
2757   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2758 
2759   /* process 0 needs enough room for process with most rows */
2760   if (!rank) {
2761     mmax = rowners[1];
2762     for (i=2; i<size; i++) {
2763       mmax = PetscMax(mmax,rowners[i]);
2764     }
2765     mmax*=bs;
2766   } else mmax = m;
2767 
2768   rowners[0] = 0;
2769   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
2770   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2771   rstart = rowners[rank];
2772   rend   = rowners[rank+1];
2773 
2774   /* distribute row lengths to all processors */
2775   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
2776   if (!rank) {
2777     mend = m;
2778     if (size == 1) mend = mend - extra_rows;
2779     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
2780     for (j=mend; j<m; j++) locrowlens[j] = 1;
2781     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2782     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2783     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2784     for (j=0; j<m; j++) {
2785       procsnz[0] += locrowlens[j];
2786     }
2787     for (i=1; i<size; i++) {
2788       mend = browners[i+1] - browners[i];
2789       if (i == size-1) mend = mend - extra_rows;
2790       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
2791       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
2792       /* calculate the number of nonzeros on each processor */
2793       for (j=0; j<browners[i+1]-browners[i]; j++) {
2794         procsnz[i] += rowlengths[j];
2795       }
2796       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2797     }
2798     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2799   } else {
2800     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2801   }
2802 
2803   if (!rank) {
2804     /* determine max buffer needed and allocate it */
2805     maxnz = procsnz[0];
2806     for (i=1; i<size; i++) {
2807       maxnz = PetscMax(maxnz,procsnz[i]);
2808     }
2809     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2810 
2811     /* read in my part of the matrix column indices  */
2812     nz     = procsnz[0];
2813     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2814     mycols = ibuf;
2815     if (size == 1)  nz -= extra_rows;
2816     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2817     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2818 
2819     /* read in every ones (except the last) and ship off */
2820     for (i=1; i<size-1; i++) {
2821       nz   = procsnz[i];
2822       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2823       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2824     }
2825     /* read in the stuff for the last proc */
2826     if (size != 1) {
2827       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2828       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2829       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2830       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2831     }
2832     ierr = PetscFree(cols);CHKERRQ(ierr);
2833   } else {
2834     /* determine buffer space needed for message */
2835     nz = 0;
2836     for (i=0; i<m; i++) {
2837       nz += locrowlens[i];
2838     }
2839     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2840     mycols = ibuf;
2841     /* receive message of column indices*/
2842     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2843     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2844     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2845   }
2846 
2847   /* loop over local rows, determining number of off diagonal entries */
2848   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2849   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2850   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2851   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2852   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2853   rowcount = 0; nzcount = 0;
2854   for (i=0; i<mbs; i++) {
2855     dcount  = 0;
2856     odcount = 0;
2857     for (j=0; j<bs; j++) {
2858       kmax = locrowlens[rowcount];
2859       for (k=0; k<kmax; k++) {
2860         tmp = mycols[nzcount++]/bs;
2861         if (!mask[tmp]) {
2862           mask[tmp] = 1;
2863           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2864           else masked1[dcount++] = tmp;
2865         }
2866       }
2867       rowcount++;
2868     }
2869 
2870     dlens[i]  = dcount;
2871     odlens[i] = odcount;
2872 
2873     /* zero out the mask elements we set */
2874     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2875     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2876   }
2877 
2878   /* create our matrix */
2879   ierr = MatCreate(comm,m,m,M+extra_rows,N+extra_rows,&A);CHKERRQ(ierr);
2880   ierr = MatSetType(A,type);CHKERRQ(ierr)
2881   ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2882 
2883   /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */
2884   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2885 
2886   if (!rank) {
2887     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2888     /* read in my part of the matrix numerical values  */
2889     nz = procsnz[0];
2890     vals = buf;
2891     mycols = ibuf;
2892     if (size == 1)  nz -= extra_rows;
2893     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2894     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2895 
2896     /* insert into matrix */
2897     jj      = rstart*bs;
2898     for (i=0; i<m; i++) {
2899       ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2900       mycols += locrowlens[i];
2901       vals   += locrowlens[i];
2902       jj++;
2903     }
2904     /* read in other processors (except the last one) and ship out */
2905     for (i=1; i<size-1; i++) {
2906       nz   = procsnz[i];
2907       vals = buf;
2908       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2909       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2910     }
2911     /* the last proc */
2912     if (size != 1){
2913       nz   = procsnz[i] - extra_rows;
2914       vals = buf;
2915       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2916       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2917       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2918     }
2919     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2920   } else {
2921     /* receive numeric values */
2922     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2923 
2924     /* receive message of values*/
2925     vals   = buf;
2926     mycols = ibuf;
2927     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2928     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2929     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2930 
2931     /* insert into matrix */
2932     jj      = rstart*bs;
2933     for (i=0; i<m; i++) {
2934       ierr    = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2935       mycols += locrowlens[i];
2936       vals   += locrowlens[i];
2937       jj++;
2938     }
2939   }
2940   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2941   ierr = PetscFree(buf);CHKERRQ(ierr);
2942   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2943   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2944   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2945   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2946   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2947   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2948 
2949   *newmat = A;
2950   PetscFunctionReturn(0);
2951 }
2952 
2953 #undef __FUNCT__
2954 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
2955 /*@
2956    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2957 
2958    Input Parameters:
2959 .  mat  - the matrix
2960 .  fact - factor
2961 
2962    Collective on Mat
2963 
2964    Level: advanced
2965 
2966   Notes:
2967    This can also be set by the command line option: -mat_use_hash_table fact
2968 
2969 .keywords: matrix, hashtable, factor, HT
2970 
2971 .seealso: MatSetOption()
2972 @*/
2973 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2974 {
2975   PetscErrorCode ierr,(*f)(Mat,PetscReal);
2976 
2977   PetscFunctionBegin;
2978   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
2979   if (f) {
2980     ierr = (*f)(mat,fact);CHKERRQ(ierr);
2981   }
2982   PetscFunctionReturn(0);
2983 }
2984 
2985 EXTERN_C_BEGIN
2986 #undef __FUNCT__
2987 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
2988 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
2989 {
2990   Mat_MPIBAIJ *baij;
2991 
2992   PetscFunctionBegin;
2993   baij = (Mat_MPIBAIJ*)mat->data;
2994   baij->ht_fact = fact;
2995   PetscFunctionReturn(0);
2996 }
2997 EXTERN_C_END
2998 
2999 #undef __FUNCT__
3000 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3001 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3002 {
3003   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3004   PetscFunctionBegin;
3005   *Ad     = a->A;
3006   *Ao     = a->B;
3007   *colmap = a->garray;
3008   PetscFunctionReturn(0);
3009 }
3010