xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 8a62d963fa55bc78cd3ad0b969369715962ba40f)
1 
2 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
3 
4 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
5 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
6 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
7 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
8 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
9 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
10 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
11 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
12 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13 EXTERN PetscErrorCode MatPrintHelp_SeqBAIJ(Mat);
14 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,const PetscScalar*);
15 
16 /*  UGLY, ugly, ugly
17    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
18    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
19    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
20    converts the entries into single precision and then calls ..._MatScalar() to put them
21    into the single precision data structures.
22 */
23 #if defined(PETSC_USE_MAT_SINGLE)
24 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
25 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
26 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
27 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
28 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
29 #else
30 #define MatSetValuesBlocked_SeqBAIJ_MatScalar      MatSetValuesBlocked_SeqBAIJ
31 #define MatSetValues_MPIBAIJ_MatScalar             MatSetValues_MPIBAIJ
32 #define MatSetValuesBlocked_MPIBAIJ_MatScalar      MatSetValuesBlocked_MPIBAIJ
33 #define MatSetValues_MPIBAIJ_HT_MatScalar          MatSetValues_MPIBAIJ_HT
34 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar   MatSetValuesBlocked_MPIBAIJ_HT
35 #endif
36 
37 #undef __FUNCT__
38 #define __FUNCT__ "MatGetRowMax_MPIBAIJ"
39 PetscErrorCode MatGetRowMax_MPIBAIJ(Mat A,Vec v)
40 {
41   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
42   PetscErrorCode ierr;
43   PetscInt       i;
44   PetscScalar    *va,*vb;
45   Vec            vtmp;
46 
47   PetscFunctionBegin;
48 
49   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
50   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
51 
52   ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr);
53   ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr);
54   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
55 
56   for (i=0; i<A->m; i++){
57     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i];
58   }
59 
60   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
61   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
62   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
63 
64   PetscFunctionReturn(0);
65 }
66 
67 EXTERN_C_BEGIN
68 #undef __FUNCT__
69 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
70 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat)
71 {
72   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
73   PetscErrorCode ierr;
74 
75   PetscFunctionBegin;
76   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
77   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
78   PetscFunctionReturn(0);
79 }
80 EXTERN_C_END
81 
82 EXTERN_C_BEGIN
83 #undef __FUNCT__
84 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
85 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat)
86 {
87   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
88   PetscErrorCode ierr;
89 
90   PetscFunctionBegin;
91   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
92   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
93   PetscFunctionReturn(0);
94 }
95 EXTERN_C_END
96 
97 /*
98      Local utility routine that creates a mapping from the global column
99    number to the local number in the off-diagonal part of the local
100    storage of the matrix.  This is done in a non scable way since the
101    length of colmap equals the global matrix length.
102 */
103 #undef __FUNCT__
104 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
105 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
106 {
107   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
108   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
109   PetscErrorCode ierr;
110   PetscInt       nbs = B->nbs,i,bs=mat->bs;
111 
112   PetscFunctionBegin;
113 #if defined (PETSC_USE_CTABLE)
114   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
115   for (i=0; i<nbs; i++){
116     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
117   }
118 #else
119   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
120   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
121   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
122   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
123 #endif
124   PetscFunctionReturn(0);
125 }
126 
127 #define CHUNKSIZE  10
128 
129 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
130 { \
131  \
132     brow = row/bs;  \
133     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
134     rmax = aimax[brow]; nrow = ailen[brow]; \
135       bcol = col/bs; \
136       ridx = row % bs; cidx = col % bs; \
137       low = 0; high = nrow; \
138       while (high-low > 3) { \
139         t = (low+high)/2; \
140         if (rp[t] > bcol) high = t; \
141         else              low  = t; \
142       } \
143       for (_i=low; _i<high; _i++) { \
144         if (rp[_i] > bcol) break; \
145         if (rp[_i] == bcol) { \
146           bap  = ap +  bs2*_i + bs*cidx + ridx; \
147           if (addv == ADD_VALUES) *bap += value;  \
148           else                    *bap  = value;  \
149           goto a_noinsert; \
150         } \
151       } \
152       if (a->nonew == 1) goto a_noinsert; \
153       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
154       if (nrow >= rmax) { \
155         /* there is no extra room in row, therefore enlarge */ \
156         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
157         MatScalar *new_a; \
158  \
159         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
160  \
161         /* malloc new storage space */ \
162         len     = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); \
163         ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
164         new_j   = (PetscInt*)(new_a + bs2*new_nz); \
165         new_i   = new_j + new_nz; \
166  \
167         /* copy over old data into new slots */ \
168         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
169         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
170         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
171         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
172         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
173         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
174         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
175         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
176                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
177         /* free up old matrix storage */ \
178         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
179         if (!a->singlemalloc) { \
180           ierr = PetscFree(a->i);CHKERRQ(ierr); \
181           ierr = PetscFree(a->j);CHKERRQ(ierr);\
182         } \
183         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
184         a->singlemalloc = PETSC_TRUE; \
185  \
186         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
187         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
188         ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); \
189         a->maxnz += bs2*CHUNKSIZE; \
190         a->reallocs++; \
191         a->nz++; \
192       } \
193       N = nrow++ - 1;  \
194       /* shift up all the later entries in this row */ \
195       for (ii=N; ii>=_i; ii--) { \
196         rp[ii+1] = rp[ii]; \
197         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
198       } \
199       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
200       rp[_i]                      = bcol;  \
201       ap[bs2*_i + bs*cidx + ridx] = value;  \
202       a_noinsert:; \
203     ailen[brow] = nrow; \
204 }
205 
206 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
207 { \
208     brow = row/bs;  \
209     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
210     rmax = bimax[brow]; nrow = bilen[brow]; \
211       bcol = col/bs; \
212       ridx = row % bs; cidx = col % bs; \
213       low = 0; high = nrow; \
214       while (high-low > 3) { \
215         t = (low+high)/2; \
216         if (rp[t] > bcol) high = t; \
217         else              low  = t; \
218       } \
219       for (_i=low; _i<high; _i++) { \
220         if (rp[_i] > bcol) break; \
221         if (rp[_i] == bcol) { \
222           bap  = ap +  bs2*_i + bs*cidx + ridx; \
223           if (addv == ADD_VALUES) *bap += value;  \
224           else                    *bap  = value;  \
225           goto b_noinsert; \
226         } \
227       } \
228       if (b->nonew == 1) goto b_noinsert; \
229       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
230       if (nrow >= rmax) { \
231         /* there is no extra room in row, therefore enlarge */ \
232         PetscInt       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
233         MatScalar *new_a; \
234  \
235         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
236  \
237         /* malloc new storage space */ \
238         len     = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(PetscInt); \
239         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
240         new_j   = (PetscInt*)(new_a + bs2*new_nz); \
241         new_i   = new_j + new_nz; \
242  \
243         /* copy over old data into new slots */ \
244         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
245         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
246         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
247         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
248         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
249         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
250         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
251         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
252                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
253         /* free up old matrix storage */ \
254         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
255         if (!b->singlemalloc) { \
256           ierr = PetscFree(b->i);CHKERRQ(ierr); \
257           ierr = PetscFree(b->j);CHKERRQ(ierr); \
258         } \
259         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
260         b->singlemalloc = PETSC_TRUE; \
261  \
262         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
263         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
264         ierr = PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); \
265         b->maxnz += bs2*CHUNKSIZE; \
266         b->reallocs++; \
267         b->nz++; \
268       } \
269       N = nrow++ - 1;  \
270       /* shift up all the later entries in this row */ \
271       for (ii=N; ii>=_i; ii--) { \
272         rp[ii+1] = rp[ii]; \
273         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
274       } \
275       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
276       rp[_i]                      = bcol;  \
277       ap[bs2*_i + bs*cidx + ridx] = value;  \
278       b_noinsert:; \
279     bilen[brow] = nrow; \
280 }
281 
282 #if defined(PETSC_USE_MAT_SINGLE)
283 #undef __FUNCT__
284 #define __FUNCT__ "MatSetValues_MPIBAIJ"
285 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
286 {
287   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
288   PetscErrorCode ierr;
289   PetscInt       i,N = m*n;
290   MatScalar      *vsingle;
291 
292   PetscFunctionBegin;
293   if (N > b->setvalueslen) {
294     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
295     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
296     b->setvalueslen  = N;
297   }
298   vsingle = b->setvaluescopy;
299 
300   for (i=0; i<N; i++) {
301     vsingle[i] = v[i];
302   }
303   ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
304   PetscFunctionReturn(0);
305 }
306 
307 #undef __FUNCT__
308 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
309 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
310 {
311   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
312   PetscErrorCode ierr;
313   PetscInt       i,N = m*n*b->bs2;
314   MatScalar      *vsingle;
315 
316   PetscFunctionBegin;
317   if (N > b->setvalueslen) {
318     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
319     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
320     b->setvalueslen  = N;
321   }
322   vsingle = b->setvaluescopy;
323   for (i=0; i<N; i++) {
324     vsingle[i] = v[i];
325   }
326   ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
327   PetscFunctionReturn(0);
328 }
329 
330 #undef __FUNCT__
331 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
332 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
333 {
334   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
335   PetscErrorCode ierr;
336   PetscInt       i,N = m*n;
337   MatScalar      *vsingle;
338 
339   PetscFunctionBegin;
340   if (N > b->setvalueslen) {
341     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
342     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
343     b->setvalueslen  = N;
344   }
345   vsingle = b->setvaluescopy;
346   for (i=0; i<N; i++) {
347     vsingle[i] = v[i];
348   }
349   ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
350   PetscFunctionReturn(0);
351 }
352 
353 #undef __FUNCT__
354 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
355 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
356 {
357   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
358   PetscErrorCode ierr;
359   PetscInt       i,N = m*n*b->bs2;
360   MatScalar      *vsingle;
361 
362   PetscFunctionBegin;
363   if (N > b->setvalueslen) {
364     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
365     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
366     b->setvalueslen  = N;
367   }
368   vsingle = b->setvaluescopy;
369   for (i=0; i<N; i++) {
370     vsingle[i] = v[i];
371   }
372   ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
373   PetscFunctionReturn(0);
374 }
375 #endif
376 
377 #undef __FUNCT__
378 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
379 PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
380 {
381   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
382   MatScalar      value;
383   PetscTruth     roworiented = baij->roworiented;
384   PetscErrorCode ierr;
385   PetscInt       i,j,row,col;
386   PetscInt       rstart_orig=baij->rstart_bs;
387   PetscInt       rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
388   PetscInt       cend_orig=baij->cend_bs,bs=mat->bs;
389 
390   /* Some Variables required in the macro */
391   Mat            A = baij->A;
392   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
393   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
394   MatScalar      *aa=a->a;
395 
396   Mat            B = baij->B;
397   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
398   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
399   MatScalar      *ba=b->a;
400 
401   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
402   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
403   MatScalar      *ap,*bap;
404 
405   PetscFunctionBegin;
406   for (i=0; i<m; i++) {
407     if (im[i] < 0) continue;
408 #if defined(PETSC_USE_DEBUG)
409     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
410 #endif
411     if (im[i] >= rstart_orig && im[i] < rend_orig) {
412       row = im[i] - rstart_orig;
413       for (j=0; j<n; j++) {
414         if (in[j] >= cstart_orig && in[j] < cend_orig){
415           col = in[j] - cstart_orig;
416           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
417           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
418           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
419         } else if (in[j] < 0) continue;
420 #if defined(PETSC_USE_DEBUG)
421         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->N-1);}
422 #endif
423         else {
424           if (mat->was_assembled) {
425             if (!baij->colmap) {
426               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
427             }
428 #if defined (PETSC_USE_CTABLE)
429             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
430             col  = col - 1;
431 #else
432             col = baij->colmap[in[j]/bs] - 1;
433 #endif
434             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
435               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
436               col =  in[j];
437               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
438               B = baij->B;
439               b = (Mat_SeqBAIJ*)(B)->data;
440               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
441               ba=b->a;
442             } else col += in[j]%bs;
443           } else col = in[j];
444           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
445           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
446           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
447         }
448       }
449     } else {
450       if (!baij->donotstash) {
451         if (roworiented) {
452           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
453         } else {
454           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
455         }
456       }
457     }
458   }
459   PetscFunctionReturn(0);
460 }
461 
462 #undef __FUNCT__
463 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar"
464 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
465 {
466   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
467   const MatScalar *value;
468   MatScalar       *barray=baij->barray;
469   PetscTruth      roworiented = baij->roworiented;
470   PetscErrorCode  ierr;
471   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstart;
472   PetscInt        rend=baij->rend,cstart=baij->cstart,stepval;
473   PetscInt        cend=baij->cend,bs=mat->bs,bs2=baij->bs2;
474 
475   PetscFunctionBegin;
476   if(!barray) {
477     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
478     baij->barray = barray;
479   }
480 
481   if (roworiented) {
482     stepval = (n-1)*bs;
483   } else {
484     stepval = (m-1)*bs;
485   }
486   for (i=0; i<m; i++) {
487     if (im[i] < 0) continue;
488 #if defined(PETSC_USE_DEBUG)
489     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
490 #endif
491     if (im[i] >= rstart && im[i] < rend) {
492       row = im[i] - rstart;
493       for (j=0; j<n; j++) {
494         /* If NumCol = 1 then a copy is not required */
495         if ((roworiented) && (n == 1)) {
496           barray = (MatScalar*)v + i*bs2;
497         } else if((!roworiented) && (m == 1)) {
498           barray = (MatScalar*)v + j*bs2;
499         } else { /* Here a copy is required */
500           if (roworiented) {
501             value = v + i*(stepval+bs)*bs + j*bs;
502           } else {
503             value = v + j*(stepval+bs)*bs + i*bs;
504           }
505           for (ii=0; ii<bs; ii++,value+=stepval) {
506             for (jj=0; jj<bs; jj++) {
507               *barray++  = *value++;
508             }
509           }
510           barray -=bs2;
511         }
512 
513         if (in[j] >= cstart && in[j] < cend){
514           col  = in[j] - cstart;
515           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
516         }
517         else if (in[j] < 0) continue;
518 #if defined(PETSC_USE_DEBUG)
519         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
520 #endif
521         else {
522           if (mat->was_assembled) {
523             if (!baij->colmap) {
524               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
525             }
526 
527 #if defined(PETSC_USE_DEBUG)
528 #if defined (PETSC_USE_CTABLE)
529             { PetscInt data;
530               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
531               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
532             }
533 #else
534             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
535 #endif
536 #endif
537 #if defined (PETSC_USE_CTABLE)
538 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
539             col  = (col - 1)/bs;
540 #else
541             col = (baij->colmap[in[j]] - 1)/bs;
542 #endif
543             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
544               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
545               col =  in[j];
546             }
547           }
548           else col = in[j];
549           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
550         }
551       }
552     } else {
553       if (!baij->donotstash) {
554         if (roworiented) {
555           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
556         } else {
557           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
558         }
559       }
560     }
561   }
562   PetscFunctionReturn(0);
563 }
564 
565 #define HASH_KEY 0.6180339887
566 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
567 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
568 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
569 #undef __FUNCT__
570 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar"
571 PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
572 {
573   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
574   PetscTruth     roworiented = baij->roworiented;
575   PetscErrorCode ierr;
576   PetscInt       i,j,row,col;
577   PetscInt       rstart_orig=baij->rstart_bs;
578   PetscInt       rend_orig=baij->rend_bs,Nbs=baij->Nbs;
579   PetscInt       h1,key,size=baij->ht_size,bs=mat->bs,*HT=baij->ht,idx;
580   PetscReal      tmp;
581   MatScalar      **HD = baij->hd,value;
582 #if defined(PETSC_USE_DEBUG)
583   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
584 #endif
585 
586   PetscFunctionBegin;
587 
588   for (i=0; i<m; i++) {
589 #if defined(PETSC_USE_DEBUG)
590     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
591     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
592 #endif
593       row = im[i];
594     if (row >= rstart_orig && row < rend_orig) {
595       for (j=0; j<n; j++) {
596         col = in[j];
597         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
598         /* Look up PetscInto the Hash Table */
599         key = (row/bs)*Nbs+(col/bs)+1;
600         h1  = HASH(size,key,tmp);
601 
602 
603         idx = h1;
604 #if defined(PETSC_USE_DEBUG)
605         insert_ct++;
606         total_ct++;
607         if (HT[idx] != key) {
608           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
609           if (idx == size) {
610             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
611             if (idx == h1) {
612               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
613             }
614           }
615         }
616 #else
617         if (HT[idx] != key) {
618           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
619           if (idx == size) {
620             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
621             if (idx == h1) {
622               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
623             }
624           }
625         }
626 #endif
627         /* A HASH table entry is found, so insert the values at the correct address */
628         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
629         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
630       }
631     } else {
632       if (!baij->donotstash) {
633         if (roworiented) {
634           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
635         } else {
636           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
637         }
638       }
639     }
640   }
641 #if defined(PETSC_USE_DEBUG)
642   baij->ht_total_ct = total_ct;
643   baij->ht_insert_ct = insert_ct;
644 #endif
645   PetscFunctionReturn(0);
646 }
647 
648 #undef __FUNCT__
649 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar"
650 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
651 {
652   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
653   PetscTruth      roworiented = baij->roworiented;
654   PetscErrorCode  ierr;
655   PetscInt        i,j,ii,jj,row,col;
656   PetscInt        rstart=baij->rstart ;
657   PetscInt        rend=baij->rend,stepval,bs=mat->bs,bs2=baij->bs2;
658   PetscInt        h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
659   PetscReal       tmp;
660   MatScalar       **HD = baij->hd,*baij_a;
661   const MatScalar *v_t,*value;
662 #if defined(PETSC_USE_DEBUG)
663   PetscInt        total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
664 #endif
665 
666   PetscFunctionBegin;
667 
668   if (roworiented) {
669     stepval = (n-1)*bs;
670   } else {
671     stepval = (m-1)*bs;
672   }
673   for (i=0; i<m; i++) {
674 #if defined(PETSC_USE_DEBUG)
675     if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
676     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
677 #endif
678     row   = im[i];
679     v_t   = v + i*bs2;
680     if (row >= rstart && row < rend) {
681       for (j=0; j<n; j++) {
682         col = in[j];
683 
684         /* Look up into the Hash Table */
685         key = row*Nbs+col+1;
686         h1  = HASH(size,key,tmp);
687 
688         idx = h1;
689 #if defined(PETSC_USE_DEBUG)
690         total_ct++;
691         insert_ct++;
692        if (HT[idx] != key) {
693           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
694           if (idx == size) {
695             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
696             if (idx == h1) {
697               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
698             }
699           }
700         }
701 #else
702         if (HT[idx] != key) {
703           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
704           if (idx == size) {
705             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
706             if (idx == h1) {
707               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
708             }
709           }
710         }
711 #endif
712         baij_a = HD[idx];
713         if (roworiented) {
714           /*value = v + i*(stepval+bs)*bs + j*bs;*/
715           /* value = v + (i*(stepval+bs)+j)*bs; */
716           value = v_t;
717           v_t  += bs;
718           if (addv == ADD_VALUES) {
719             for (ii=0; ii<bs; ii++,value+=stepval) {
720               for (jj=ii; jj<bs2; jj+=bs) {
721                 baij_a[jj]  += *value++;
722               }
723             }
724           } else {
725             for (ii=0; ii<bs; ii++,value+=stepval) {
726               for (jj=ii; jj<bs2; jj+=bs) {
727                 baij_a[jj]  = *value++;
728               }
729             }
730           }
731         } else {
732           value = v + j*(stepval+bs)*bs + i*bs;
733           if (addv == ADD_VALUES) {
734             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
735               for (jj=0; jj<bs; jj++) {
736                 baij_a[jj]  += *value++;
737               }
738             }
739           } else {
740             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
741               for (jj=0; jj<bs; jj++) {
742                 baij_a[jj]  = *value++;
743               }
744             }
745           }
746         }
747       }
748     } else {
749       if (!baij->donotstash) {
750         if (roworiented) {
751           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
752         } else {
753           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
754         }
755       }
756     }
757   }
758 #if defined(PETSC_USE_DEBUG)
759   baij->ht_total_ct = total_ct;
760   baij->ht_insert_ct = insert_ct;
761 #endif
762   PetscFunctionReturn(0);
763 }
764 
765 #undef __FUNCT__
766 #define __FUNCT__ "MatGetValues_MPIBAIJ"
767 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
768 {
769   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
770   PetscErrorCode ierr;
771   PetscInt       bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
772   PetscInt       bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
773 
774   PetscFunctionBegin;
775   for (i=0; i<m; i++) {
776     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
777     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
778     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
779       row = idxm[i] - bsrstart;
780       for (j=0; j<n; j++) {
781         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
782         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
783         if (idxn[j] >= bscstart && idxn[j] < bscend){
784           col = idxn[j] - bscstart;
785           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
786         } else {
787           if (!baij->colmap) {
788             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
789           }
790 #if defined (PETSC_USE_CTABLE)
791           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
792           data --;
793 #else
794           data = baij->colmap[idxn[j]/bs]-1;
795 #endif
796           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
797           else {
798             col  = data + idxn[j]%bs;
799             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
800           }
801         }
802       }
803     } else {
804       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
805     }
806   }
807  PetscFunctionReturn(0);
808 }
809 
810 #undef __FUNCT__
811 #define __FUNCT__ "MatNorm_MPIBAIJ"
812 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
813 {
814   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
815   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
816   PetscErrorCode ierr;
817   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->bs,nz,row,col;
818   PetscReal      sum = 0.0;
819   MatScalar      *v;
820 
821   PetscFunctionBegin;
822   if (baij->size == 1) {
823     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
824   } else {
825     if (type == NORM_FROBENIUS) {
826       v = amat->a;
827       nz = amat->nz*bs2;
828       for (i=0; i<nz; i++) {
829 #if defined(PETSC_USE_COMPLEX)
830         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
831 #else
832         sum += (*v)*(*v); v++;
833 #endif
834       }
835       v = bmat->a;
836       nz = bmat->nz*bs2;
837       for (i=0; i<nz; i++) {
838 #if defined(PETSC_USE_COMPLEX)
839         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
840 #else
841         sum += (*v)*(*v); v++;
842 #endif
843       }
844       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
845       *nrm = sqrt(*nrm);
846     } else if (type == NORM_1) { /* max column sum */
847       PetscReal *tmp,*tmp2;
848       PetscInt  *jj,*garray=baij->garray,cstart=baij->cstart;
849       ierr = PetscMalloc((2*mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
850       tmp2 = tmp + mat->N;
851       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
852       v = amat->a; jj = amat->j;
853       for (i=0; i<amat->nz; i++) {
854         for (j=0; j<bs; j++){
855           col = bs*(cstart + *jj) + j; /* column index */
856           for (row=0; row<bs; row++){
857             tmp[col] += PetscAbsScalar(*v);  v++;
858           }
859         }
860         jj++;
861       }
862       v = bmat->a; jj = bmat->j;
863       for (i=0; i<bmat->nz; i++) {
864         for (j=0; j<bs; j++){
865           col = bs*garray[*jj] + j;
866           for (row=0; row<bs; row++){
867             tmp[col] += PetscAbsScalar(*v); v++;
868           }
869         }
870         jj++;
871       }
872       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
873       *nrm = 0.0;
874       for (j=0; j<mat->N; j++) {
875         if (tmp2[j] > *nrm) *nrm = tmp2[j];
876       }
877       ierr = PetscFree(tmp);CHKERRQ(ierr);
878     } else if (type == NORM_INFINITY) { /* max row sum */
879       PetscReal sums[bs];
880       sum = 0.0;
881       for (j=0; j<amat->mbs; j++) {
882         for (row=0; row<bs; row++) sums[row] = 0.0;
883         v = amat->a + bs2*amat->i[j];
884         nz = amat->i[j+1]-amat->i[j];
885         for (i=0; i<nz; i++) {
886           for (col=0; col<bs; col++){
887             for (row=0; row<bs; row++){
888               sums[row] += PetscAbsScalar(*v); v++;
889             }
890           }
891         }
892         v = bmat->a + bs2*bmat->i[j];
893         nz = bmat->i[j+1]-bmat->i[j];
894         for (i=0; i<nz; i++) {
895           for (col=0; col<bs; col++){
896             for (row=0; row<bs; row++){
897               sums[row] += PetscAbsScalar(*v); v++;
898             }
899           }
900         }
901         for (row=0; row<bs; row++){
902           if (sums[row] > sum) sum = sums[row];
903         }
904       }
905       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
906     } else {
907       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
908     }
909   }
910   PetscFunctionReturn(0);
911 }
912 
913 /*
914   Creates the hash table, and sets the table
915   This table is created only once.
916   If new entried need to be added to the matrix
917   then the hash table has to be destroyed and
918   recreated.
919 */
920 #undef __FUNCT__
921 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
922 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
923 {
924   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
925   Mat            A = baij->A,B=baij->B;
926   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
927   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
928   PetscErrorCode ierr;
929   PetscInt       size,bs2=baij->bs2,rstart=baij->rstart;
930   PetscInt       cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
931   PetscInt       *HT,key;
932   MatScalar      **HD;
933   PetscReal      tmp;
934 #if defined(PETSC_USE_DEBUG)
935   PetscInt       ct=0,max=0;
936 #endif
937 
938   PetscFunctionBegin;
939   baij->ht_size=(PetscInt)(factor*nz);
940   size = baij->ht_size;
941 
942   if (baij->ht) {
943     PetscFunctionReturn(0);
944   }
945 
946   /* Allocate Memory for Hash Table */
947   ierr     = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr);
948   baij->ht = (PetscInt*)(baij->hd + size);
949   HD       = baij->hd;
950   HT       = baij->ht;
951 
952 
953   ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr);
954 
955 
956   /* Loop Over A */
957   for (i=0; i<a->mbs; i++) {
958     for (j=ai[i]; j<ai[i+1]; j++) {
959       row = i+rstart;
960       col = aj[j]+cstart;
961 
962       key = row*Nbs + col + 1;
963       h1  = HASH(size,key,tmp);
964       for (k=0; k<size; k++){
965         if (!HT[(h1+k)%size]) {
966           HT[(h1+k)%size] = key;
967           HD[(h1+k)%size] = a->a + j*bs2;
968           break;
969 #if defined(PETSC_USE_DEBUG)
970         } else {
971           ct++;
972 #endif
973         }
974       }
975 #if defined(PETSC_USE_DEBUG)
976       if (k> max) max = k;
977 #endif
978     }
979   }
980   /* Loop Over B */
981   for (i=0; i<b->mbs; i++) {
982     for (j=bi[i]; j<bi[i+1]; j++) {
983       row = i+rstart;
984       col = garray[bj[j]];
985       key = row*Nbs + col + 1;
986       h1  = HASH(size,key,tmp);
987       for (k=0; k<size; k++){
988         if (!HT[(h1+k)%size]) {
989           HT[(h1+k)%size] = key;
990           HD[(h1+k)%size] = b->a + j*bs2;
991           break;
992 #if defined(PETSC_USE_DEBUG)
993         } else {
994           ct++;
995 #endif
996         }
997       }
998 #if defined(PETSC_USE_DEBUG)
999       if (k> max) max = k;
1000 #endif
1001     }
1002   }
1003 
1004   /* Print Summary */
1005 #if defined(PETSC_USE_DEBUG)
1006   for (i=0,j=0; i<size; i++) {
1007     if (HT[i]) {j++;}
1008   }
1009   ierr = PetscLogInfo((0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max));CHKERRQ(ierr);
1010 #endif
1011   PetscFunctionReturn(0);
1012 }
1013 
1014 #undef __FUNCT__
1015 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
1016 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
1017 {
1018   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1019   PetscErrorCode ierr;
1020   PetscInt       nstash,reallocs;
1021   InsertMode     addv;
1022 
1023   PetscFunctionBegin;
1024   if (baij->donotstash) {
1025     PetscFunctionReturn(0);
1026   }
1027 
1028   /* make sure all processors are either in INSERTMODE or ADDMODE */
1029   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
1030   if (addv == (ADD_VALUES|INSERT_VALUES)) {
1031     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
1032   }
1033   mat->insertmode = addv; /* in case this processor had no cache */
1034 
1035   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
1036   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
1037   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
1038   ierr = PetscLogInfo((0,"MatAssemblyBegin_MPIBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
1039   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
1040   ierr = PetscLogInfo((0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
1041   PetscFunctionReturn(0);
1042 }
1043 
1044 #undef __FUNCT__
1045 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
1046 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
1047 {
1048   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
1049   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
1050   PetscErrorCode ierr;
1051   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
1052   PetscInt       *row,*col,other_disassembled;
1053   PetscTruth     r1,r2,r3;
1054   MatScalar      *val;
1055   InsertMode     addv = mat->insertmode;
1056   PetscMPIInt    n;
1057 
1058   PetscFunctionBegin;
1059   if (!baij->donotstash) {
1060     while (1) {
1061       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1062       if (!flg) break;
1063 
1064       for (i=0; i<n;) {
1065         /* Now identify the consecutive vals belonging to the same row */
1066         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1067         if (j < n) ncols = j-i;
1068         else       ncols = n-i;
1069         /* Now assemble all these values with a single function call */
1070         ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
1071         i = j;
1072       }
1073     }
1074     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
1075     /* Now process the block-stash. Since the values are stashed column-oriented,
1076        set the roworiented flag to column oriented, and after MatSetValues()
1077        restore the original flags */
1078     r1 = baij->roworiented;
1079     r2 = a->roworiented;
1080     r3 = b->roworiented;
1081     baij->roworiented = PETSC_FALSE;
1082     a->roworiented    = PETSC_FALSE;
1083     b->roworiented    = PETSC_FALSE;
1084     while (1) {
1085       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1086       if (!flg) break;
1087 
1088       for (i=0; i<n;) {
1089         /* Now identify the consecutive vals belonging to the same row */
1090         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1091         if (j < n) ncols = j-i;
1092         else       ncols = n-i;
1093         ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
1094         i = j;
1095       }
1096     }
1097     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
1098     baij->roworiented = r1;
1099     a->roworiented    = r2;
1100     b->roworiented    = r3;
1101   }
1102 
1103   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
1104   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
1105 
1106   /* determine if any processor has disassembled, if so we must
1107      also disassemble ourselfs, in order that we may reassemble. */
1108   /*
1109      if nonzero structure of submatrix B cannot change then we know that
1110      no processor disassembled thus we can skip this stuff
1111   */
1112   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
1113     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
1114     if (mat->was_assembled && !other_disassembled) {
1115       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
1116     }
1117   }
1118 
1119   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1120     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
1121   }
1122   b->compressedrow.use = PETSC_TRUE;
1123   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1124   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1125 
1126 #if defined(PETSC_USE_DEBUG)
1127   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1128     ierr = PetscLogInfo((0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct));CHKERRQ(ierr);
1129     baij->ht_total_ct  = 0;
1130     baij->ht_insert_ct = 0;
1131   }
1132 #endif
1133   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1134     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1135     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1136     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1137   }
1138 
1139   if (baij->rowvalues) {
1140     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1141     baij->rowvalues = 0;
1142   }
1143   PetscFunctionReturn(0);
1144 }
1145 
1146 #undef __FUNCT__
1147 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
1148 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1149 {
1150   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
1151   PetscErrorCode    ierr;
1152   PetscMPIInt       size = baij->size,rank = baij->rank;
1153   PetscInt          bs = mat->bs;
1154   PetscTruth        iascii,isdraw;
1155   PetscViewer       sviewer;
1156   PetscViewerFormat format;
1157 
1158   PetscFunctionBegin;
1159   /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */
1160   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1161   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1162   if (iascii) {
1163     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1164     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1165       MatInfo info;
1166       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
1167       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1168       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
1169               rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
1170               mat->bs,(PetscInt)info.memory);CHKERRQ(ierr);
1171       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1172       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
1173       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1174       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
1175       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1176       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1177       PetscFunctionReturn(0);
1178     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1179       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1180       PetscFunctionReturn(0);
1181     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1182       PetscFunctionReturn(0);
1183     }
1184   }
1185 
1186   if (isdraw) {
1187     PetscDraw       draw;
1188     PetscTruth isnull;
1189     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1190     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1191   }
1192 
1193   if (size == 1) {
1194     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
1195     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1196   } else {
1197     /* assemble the entire matrix onto first processor. */
1198     Mat         A;
1199     Mat_SeqBAIJ *Aloc;
1200     PetscInt    M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1201     MatScalar   *a;
1202 
1203     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1204     /* Perhaps this should be the type of mat? */
1205     if (!rank) {
1206       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
1207     } else {
1208       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
1209     }
1210     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1211     ierr = MatMPIBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1212     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1213 
1214     /* copy over the A part */
1215     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1216     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1217     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1218 
1219     for (i=0; i<mbs; i++) {
1220       rvals[0] = bs*(baij->rstart + i);
1221       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1222       for (j=ai[i]; j<ai[i+1]; j++) {
1223         col = (baij->cstart+aj[j])*bs;
1224         for (k=0; k<bs; k++) {
1225           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1226           col++; a += bs;
1227         }
1228       }
1229     }
1230     /* copy over the B part */
1231     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1232     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1233     for (i=0; i<mbs; i++) {
1234       rvals[0] = bs*(baij->rstart + i);
1235       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1236       for (j=ai[i]; j<ai[i+1]; j++) {
1237         col = baij->garray[aj[j]]*bs;
1238         for (k=0; k<bs; k++) {
1239           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1240           col++; a += bs;
1241         }
1242       }
1243     }
1244     ierr = PetscFree(rvals);CHKERRQ(ierr);
1245     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1246     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1247     /*
1248        Everyone has to call to draw the matrix since the graphics waits are
1249        synchronized across all processors that share the PetscDraw object
1250     */
1251     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1252     if (!rank) {
1253       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1254       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1255     }
1256     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1257     ierr = MatDestroy(A);CHKERRQ(ierr);
1258   }
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 #undef __FUNCT__
1263 #define __FUNCT__ "MatView_MPIBAIJ"
1264 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1265 {
1266   PetscErrorCode ierr;
1267   PetscTruth     iascii,isdraw,issocket,isbinary;
1268 
1269   PetscFunctionBegin;
1270   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1271   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1272   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1273   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1274   if (iascii || isdraw || issocket || isbinary) {
1275     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1276   } else {
1277     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1278   }
1279   PetscFunctionReturn(0);
1280 }
1281 
1282 #undef __FUNCT__
1283 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1284 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1285 {
1286   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1287   PetscErrorCode ierr;
1288 
1289   PetscFunctionBegin;
1290 #if defined(PETSC_USE_LOG)
1291   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N);
1292 #endif
1293   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1294   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1295   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1296   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1297   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1298 #if defined (PETSC_USE_CTABLE)
1299   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1300 #else
1301   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1302 #endif
1303   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1304   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1305   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1306   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1307   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1308   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
1309 #if defined(PETSC_USE_MAT_SINGLE)
1310   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
1311 #endif
1312   ierr = PetscFree(baij);CHKERRQ(ierr);
1313 
1314   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1315   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1316   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1317   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1318   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1319   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1320   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1321   PetscFunctionReturn(0);
1322 }
1323 
1324 #undef __FUNCT__
1325 #define __FUNCT__ "MatMult_MPIBAIJ"
1326 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1327 {
1328   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1329   PetscErrorCode ierr;
1330   PetscInt       nt;
1331 
1332   PetscFunctionBegin;
1333   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1334   if (nt != A->n) {
1335     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1336   }
1337   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1338   if (nt != A->m) {
1339     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1340   }
1341   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1342   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1343   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1344   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1345   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1346   PetscFunctionReturn(0);
1347 }
1348 
1349 #undef __FUNCT__
1350 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1351 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1352 {
1353   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1354   PetscErrorCode ierr;
1355 
1356   PetscFunctionBegin;
1357   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1358   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1359   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1360   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1361   PetscFunctionReturn(0);
1362 }
1363 
1364 #undef __FUNCT__
1365 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1366 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1367 {
1368   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1369   PetscErrorCode ierr;
1370   PetscTruth     merged;
1371 
1372   PetscFunctionBegin;
1373   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1374   /* do nondiagonal part */
1375   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1376   if (!merged) {
1377     /* send it on its way */
1378     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1379     /* do local part */
1380     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1381     /* receive remote parts: note this assumes the values are not actually */
1382     /* inserted in yy until the next line */
1383     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1384   } else {
1385     /* do local part */
1386     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1387     /* send it on its way */
1388     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1389     /* values actually were received in the Begin() but we need to call this nop */
1390     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1391   }
1392   PetscFunctionReturn(0);
1393 }
1394 
1395 #undef __FUNCT__
1396 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1397 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1398 {
1399   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1400   PetscErrorCode ierr;
1401 
1402   PetscFunctionBegin;
1403   /* do nondiagonal part */
1404   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1405   /* send it on its way */
1406   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1407   /* do local part */
1408   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1409   /* receive remote parts: note this assumes the values are not actually */
1410   /* inserted in yy until the next line, which is true for my implementation*/
1411   /* but is not perhaps always true. */
1412   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1413   PetscFunctionReturn(0);
1414 }
1415 
1416 /*
1417   This only works correctly for square matrices where the subblock A->A is the
1418    diagonal block
1419 */
1420 #undef __FUNCT__
1421 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1422 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1423 {
1424   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1425   PetscErrorCode ierr;
1426 
1427   PetscFunctionBegin;
1428   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1429   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1430   PetscFunctionReturn(0);
1431 }
1432 
1433 #undef __FUNCT__
1434 #define __FUNCT__ "MatScale_MPIBAIJ"
1435 PetscErrorCode MatScale_MPIBAIJ(const PetscScalar *aa,Mat A)
1436 {
1437   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1438   PetscErrorCode ierr;
1439 
1440   PetscFunctionBegin;
1441   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1442   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1443   PetscFunctionReturn(0);
1444 }
1445 
1446 #undef __FUNCT__
1447 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1448 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1449 {
1450   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1451   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1452   PetscErrorCode ierr;
1453   PetscInt       bs = matin->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1454   PetscInt       nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1455   PetscInt       *cmap,*idx_p,cstart = mat->cstart;
1456 
1457   PetscFunctionBegin;
1458   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1459   mat->getrowactive = PETSC_TRUE;
1460 
1461   if (!mat->rowvalues && (idx || v)) {
1462     /*
1463         allocate enough space to hold information from the longest row.
1464     */
1465     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1466     PetscInt     max = 1,mbs = mat->mbs,tmp;
1467     for (i=0; i<mbs; i++) {
1468       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1469       if (max < tmp) { max = tmp; }
1470     }
1471     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1472     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1473   }
1474 
1475   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1476   lrow = row - brstart;
1477 
1478   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1479   if (!v)   {pvA = 0; pvB = 0;}
1480   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1481   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1482   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1483   nztot = nzA + nzB;
1484 
1485   cmap  = mat->garray;
1486   if (v  || idx) {
1487     if (nztot) {
1488       /* Sort by increasing column numbers, assuming A and B already sorted */
1489       PetscInt imark = -1;
1490       if (v) {
1491         *v = v_p = mat->rowvalues;
1492         for (i=0; i<nzB; i++) {
1493           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1494           else break;
1495         }
1496         imark = i;
1497         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1498         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1499       }
1500       if (idx) {
1501         *idx = idx_p = mat->rowindices;
1502         if (imark > -1) {
1503           for (i=0; i<imark; i++) {
1504             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1505           }
1506         } else {
1507           for (i=0; i<nzB; i++) {
1508             if (cmap[cworkB[i]/bs] < cstart)
1509               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1510             else break;
1511           }
1512           imark = i;
1513         }
1514         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1515         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1516       }
1517     } else {
1518       if (idx) *idx = 0;
1519       if (v)   *v   = 0;
1520     }
1521   }
1522   *nz = nztot;
1523   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1524   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1525   PetscFunctionReturn(0);
1526 }
1527 
1528 #undef __FUNCT__
1529 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1530 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1531 {
1532   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1533 
1534   PetscFunctionBegin;
1535   if (!baij->getrowactive) {
1536     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1537   }
1538   baij->getrowactive = PETSC_FALSE;
1539   PetscFunctionReturn(0);
1540 }
1541 
1542 #undef __FUNCT__
1543 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1544 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1545 {
1546   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1547   PetscErrorCode ierr;
1548 
1549   PetscFunctionBegin;
1550   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1551   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1552   PetscFunctionReturn(0);
1553 }
1554 
1555 #undef __FUNCT__
1556 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1557 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1558 {
1559   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1560   Mat            A = a->A,B = a->B;
1561   PetscErrorCode ierr;
1562   PetscReal      isend[5],irecv[5];
1563 
1564   PetscFunctionBegin;
1565   info->block_size     = (PetscReal)matin->bs;
1566   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1567   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1568   isend[3] = info->memory;  isend[4] = info->mallocs;
1569   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1570   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1571   isend[3] += info->memory;  isend[4] += info->mallocs;
1572   if (flag == MAT_LOCAL) {
1573     info->nz_used      = isend[0];
1574     info->nz_allocated = isend[1];
1575     info->nz_unneeded  = isend[2];
1576     info->memory       = isend[3];
1577     info->mallocs      = isend[4];
1578   } else if (flag == MAT_GLOBAL_MAX) {
1579     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1580     info->nz_used      = irecv[0];
1581     info->nz_allocated = irecv[1];
1582     info->nz_unneeded  = irecv[2];
1583     info->memory       = irecv[3];
1584     info->mallocs      = irecv[4];
1585   } else if (flag == MAT_GLOBAL_SUM) {
1586     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1587     info->nz_used      = irecv[0];
1588     info->nz_allocated = irecv[1];
1589     info->nz_unneeded  = irecv[2];
1590     info->memory       = irecv[3];
1591     info->mallocs      = irecv[4];
1592   } else {
1593     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1594   }
1595   info->rows_global       = (PetscReal)A->M;
1596   info->columns_global    = (PetscReal)A->N;
1597   info->rows_local        = (PetscReal)A->m;
1598   info->columns_local     = (PetscReal)A->N;
1599   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1600   info->fill_ratio_needed = 0;
1601   info->factor_mallocs    = 0;
1602   PetscFunctionReturn(0);
1603 }
1604 
1605 #undef __FUNCT__
1606 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1607 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op)
1608 {
1609   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1610   PetscErrorCode ierr;
1611 
1612   PetscFunctionBegin;
1613   switch (op) {
1614   case MAT_NO_NEW_NONZERO_LOCATIONS:
1615   case MAT_YES_NEW_NONZERO_LOCATIONS:
1616   case MAT_COLUMNS_UNSORTED:
1617   case MAT_COLUMNS_SORTED:
1618   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1619   case MAT_KEEP_ZEROED_ROWS:
1620   case MAT_NEW_NONZERO_LOCATION_ERR:
1621     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1622     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1623     break;
1624   case MAT_ROW_ORIENTED:
1625     a->roworiented = PETSC_TRUE;
1626     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1627     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1628     break;
1629   case MAT_ROWS_SORTED:
1630   case MAT_ROWS_UNSORTED:
1631   case MAT_YES_NEW_DIAGONALS:
1632     ierr = PetscLogInfo((A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"));CHKERRQ(ierr);
1633     break;
1634   case MAT_COLUMN_ORIENTED:
1635     a->roworiented = PETSC_FALSE;
1636     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1637     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1638     break;
1639   case MAT_IGNORE_OFF_PROC_ENTRIES:
1640     a->donotstash = PETSC_TRUE;
1641     break;
1642   case MAT_NO_NEW_DIAGONALS:
1643     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1644   case MAT_USE_HASH_TABLE:
1645     a->ht_flag = PETSC_TRUE;
1646     break;
1647   case MAT_SYMMETRIC:
1648   case MAT_STRUCTURALLY_SYMMETRIC:
1649   case MAT_HERMITIAN:
1650   case MAT_SYMMETRY_ETERNAL:
1651     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1652     break;
1653   case MAT_NOT_SYMMETRIC:
1654   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1655   case MAT_NOT_HERMITIAN:
1656   case MAT_NOT_SYMMETRY_ETERNAL:
1657     break;
1658   default:
1659     SETERRQ(PETSC_ERR_SUP,"unknown option");
1660   }
1661   PetscFunctionReturn(0);
1662 }
1663 
1664 #undef __FUNCT__
1665 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1666 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1667 {
1668   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1669   Mat_SeqBAIJ    *Aloc;
1670   Mat            B;
1671   PetscErrorCode ierr;
1672   PetscInt       M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col;
1673   PetscInt       bs=A->bs,mbs=baij->mbs;
1674   MatScalar      *a;
1675 
1676   PetscFunctionBegin;
1677   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1678   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1679   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1680   ierr = MatMPIBAIJSetPreallocation(B,A->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1681 
1682   /* copy over the A part */
1683   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1684   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1685   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1686 
1687   for (i=0; i<mbs; i++) {
1688     rvals[0] = bs*(baij->rstart + i);
1689     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1690     for (j=ai[i]; j<ai[i+1]; j++) {
1691       col = (baij->cstart+aj[j])*bs;
1692       for (k=0; k<bs; k++) {
1693         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1694         col++; a += bs;
1695       }
1696     }
1697   }
1698   /* copy over the B part */
1699   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1700   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1701   for (i=0; i<mbs; i++) {
1702     rvals[0] = bs*(baij->rstart + i);
1703     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1704     for (j=ai[i]; j<ai[i+1]; j++) {
1705       col = baij->garray[aj[j]]*bs;
1706       for (k=0; k<bs; k++) {
1707         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1708         col++; a += bs;
1709       }
1710     }
1711   }
1712   ierr = PetscFree(rvals);CHKERRQ(ierr);
1713   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1714   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1715 
1716   if (matout) {
1717     *matout = B;
1718   } else {
1719     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1720   }
1721   PetscFunctionReturn(0);
1722 }
1723 
1724 #undef __FUNCT__
1725 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1726 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1727 {
1728   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1729   Mat            a = baij->A,b = baij->B;
1730   PetscErrorCode ierr;
1731   PetscInt       s1,s2,s3;
1732 
1733   PetscFunctionBegin;
1734   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1735   if (rr) {
1736     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1737     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1738     /* Overlap communication with computation. */
1739     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1740   }
1741   if (ll) {
1742     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1743     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1744     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1745   }
1746   /* scale  the diagonal block */
1747   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1748 
1749   if (rr) {
1750     /* Do a scatter end and then right scale the off-diagonal block */
1751     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1752     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1753   }
1754 
1755   PetscFunctionReturn(0);
1756 }
1757 
1758 #undef __FUNCT__
1759 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1760 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,IS is,const PetscScalar *diag)
1761 {
1762   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1763   PetscErrorCode ierr;
1764   PetscMPIInt    imdex,size = l->size,n,rank = l->rank;
1765   PetscInt       i,N,*rows,*owners = l->rowners;
1766   PetscInt       *nprocs,j,idx,nsends,row;
1767   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
1768   PetscInt       *rvalues,tag = A->tag,count,base,slen,*source,lastidx = -1;
1769   PetscInt       *lens,*lrows,*values,bs=A->bs,rstart_bs=l->rstart_bs;
1770   MPI_Comm       comm = A->comm;
1771   MPI_Request    *send_waits,*recv_waits;
1772   MPI_Status     recv_status,*send_status;
1773   IS             istmp;
1774 #if defined(PETSC_DEBUG)
1775   PetscTruth     found = PETSC_FALSE;
1776 #endif
1777 
1778   PetscFunctionBegin;
1779   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
1780   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1781 
1782   /*  first count number of contributors to each processor */
1783   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1784   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1785   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1786   j = 0;
1787   for (i=0; i<N; i++) {
1788     if (lastidx > (idx = rows[i])) j = 0;
1789     lastidx = idx;
1790     for (; j<size; j++) {
1791       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1792         nprocs[2*j]++;
1793         nprocs[2*j+1] = 1;
1794         owner[i] = j;
1795 #if defined(PETSC_DEBUG)
1796         found = PETSC_TRUE;
1797 #endif
1798         break;
1799       }
1800     }
1801 #if defined(PETSC_DEBUG)
1802     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1803     found = PETSC_FALSE;
1804 #endif
1805   }
1806   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1807 
1808   /* inform other processors of number of messages and max length*/
1809   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1810 
1811   /* post receives:   */
1812   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1813   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1814   for (i=0; i<nrecvs; i++) {
1815     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1816   }
1817 
1818   /* do sends:
1819      1) starts[i] gives the starting index in svalues for stuff going to
1820      the ith processor
1821   */
1822   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1823   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1824   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1825   starts[0]  = 0;
1826   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1827   for (i=0; i<N; i++) {
1828     svalues[starts[owner[i]]++] = rows[i];
1829   }
1830   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
1831 
1832   starts[0] = 0;
1833   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1834   count = 0;
1835   for (i=0; i<size; i++) {
1836     if (nprocs[2*i+1]) {
1837       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1838     }
1839   }
1840   ierr = PetscFree(starts);CHKERRQ(ierr);
1841 
1842   base = owners[rank]*bs;
1843 
1844   /*  wait on receives */
1845   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
1846   source = lens + nrecvs;
1847   count  = nrecvs; slen = 0;
1848   while (count) {
1849     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1850     /* unpack receives into our local space */
1851     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1852     source[imdex]  = recv_status.MPI_SOURCE;
1853     lens[imdex]    = n;
1854     slen          += n;
1855     count--;
1856   }
1857   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1858 
1859   /* move the data into the send scatter */
1860   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1861   count = 0;
1862   for (i=0; i<nrecvs; i++) {
1863     values = rvalues + i*nmax;
1864     for (j=0; j<lens[i]; j++) {
1865       lrows[count++] = values[j] - base;
1866     }
1867   }
1868   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1869   ierr = PetscFree(lens);CHKERRQ(ierr);
1870   ierr = PetscFree(owner);CHKERRQ(ierr);
1871   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1872 
1873   /* actually zap the local rows */
1874   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1875   ierr = PetscLogObjectParent(A,istmp);CHKERRQ(ierr);
1876 
1877   /*
1878         Zero the required rows. If the "diagonal block" of the matrix
1879      is square and the user wishes to set the diagonal we use seperate
1880      code so that MatSetValues() is not called for each diagonal allocating
1881      new memory, thus calling lots of mallocs and slowing things down.
1882 
1883        Contributed by: Mathew Knepley
1884   */
1885   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1886   ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr);
1887   if (diag && (l->A->M == l->A->N)) {
1888     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr);
1889   } else if (diag) {
1890     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1891     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1892       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1893 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1894     }
1895     for (i=0; i<slen; i++) {
1896       row  = lrows[i] + rstart_bs;
1897       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1898     }
1899     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1900     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1901   } else {
1902     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1903   }
1904 
1905   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1906   ierr = PetscFree(lrows);CHKERRQ(ierr);
1907 
1908   /* wait on sends */
1909   if (nsends) {
1910     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1911     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1912     ierr = PetscFree(send_status);CHKERRQ(ierr);
1913   }
1914   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1915   ierr = PetscFree(svalues);CHKERRQ(ierr);
1916 
1917   PetscFunctionReturn(0);
1918 }
1919 
1920 #undef __FUNCT__
1921 #define __FUNCT__ "MatPrintHelp_MPIBAIJ"
1922 PetscErrorCode MatPrintHelp_MPIBAIJ(Mat A)
1923 {
1924   Mat_MPIBAIJ       *a   = (Mat_MPIBAIJ*)A->data;
1925   MPI_Comm          comm = A->comm;
1926   static PetscTruth called = PETSC_FALSE;
1927   PetscErrorCode    ierr;
1928 
1929   PetscFunctionBegin;
1930   if (!a->rank) {
1931     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1932   }
1933   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1934   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1935   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1936   PetscFunctionReturn(0);
1937 }
1938 
1939 #undef __FUNCT__
1940 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1941 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1942 {
1943   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1944   PetscErrorCode ierr;
1945 
1946   PetscFunctionBegin;
1947   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1948   PetscFunctionReturn(0);
1949 }
1950 
1951 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1952 
1953 #undef __FUNCT__
1954 #define __FUNCT__ "MatEqual_MPIBAIJ"
1955 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1956 {
1957   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1958   Mat            a,b,c,d;
1959   PetscTruth     flg;
1960   PetscErrorCode ierr;
1961 
1962   PetscFunctionBegin;
1963   a = matA->A; b = matA->B;
1964   c = matB->A; d = matB->B;
1965 
1966   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1967   if (flg) {
1968     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1969   }
1970   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1971   PetscFunctionReturn(0);
1972 }
1973 
1974 
1975 #undef __FUNCT__
1976 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1977 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1978 {
1979   PetscErrorCode ierr;
1980 
1981   PetscFunctionBegin;
1982   ierr =  MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1983   PetscFunctionReturn(0);
1984 }
1985 
1986 /* -------------------------------------------------------------------*/
1987 static struct _MatOps MatOps_Values = {
1988        MatSetValues_MPIBAIJ,
1989        MatGetRow_MPIBAIJ,
1990        MatRestoreRow_MPIBAIJ,
1991        MatMult_MPIBAIJ,
1992 /* 4*/ MatMultAdd_MPIBAIJ,
1993        MatMultTranspose_MPIBAIJ,
1994        MatMultTransposeAdd_MPIBAIJ,
1995        0,
1996        0,
1997        0,
1998 /*10*/ 0,
1999        0,
2000        0,
2001        0,
2002        MatTranspose_MPIBAIJ,
2003 /*15*/ MatGetInfo_MPIBAIJ,
2004        MatEqual_MPIBAIJ,
2005        MatGetDiagonal_MPIBAIJ,
2006        MatDiagonalScale_MPIBAIJ,
2007        MatNorm_MPIBAIJ,
2008 /*20*/ MatAssemblyBegin_MPIBAIJ,
2009        MatAssemblyEnd_MPIBAIJ,
2010        0,
2011        MatSetOption_MPIBAIJ,
2012        MatZeroEntries_MPIBAIJ,
2013 /*25*/ MatZeroRows_MPIBAIJ,
2014        0,
2015        0,
2016        0,
2017        0,
2018 /*30*/ MatSetUpPreallocation_MPIBAIJ,
2019        0,
2020        0,
2021        0,
2022        0,
2023 /*35*/ MatDuplicate_MPIBAIJ,
2024        0,
2025        0,
2026        0,
2027        0,
2028 /*40*/ 0,
2029        MatGetSubMatrices_MPIBAIJ,
2030        MatIncreaseOverlap_MPIBAIJ,
2031        MatGetValues_MPIBAIJ,
2032        0,
2033 /*45*/ MatPrintHelp_MPIBAIJ,
2034        MatScale_MPIBAIJ,
2035        0,
2036        0,
2037        0,
2038 /*50*/ 0,
2039        0,
2040        0,
2041        0,
2042        0,
2043 /*55*/ 0,
2044        0,
2045        MatSetUnfactored_MPIBAIJ,
2046        0,
2047        MatSetValuesBlocked_MPIBAIJ,
2048 /*60*/ 0,
2049        MatDestroy_MPIBAIJ,
2050        MatView_MPIBAIJ,
2051        MatGetPetscMaps_Petsc,
2052        0,
2053 /*65*/ 0,
2054        0,
2055        0,
2056        0,
2057        0,
2058 /*70*/ MatGetRowMax_MPIBAIJ,
2059        0,
2060        0,
2061        0,
2062        0,
2063 /*75*/ 0,
2064        0,
2065        0,
2066        0,
2067        0,
2068 /*80*/ 0,
2069        0,
2070        0,
2071        0,
2072        MatLoad_MPIBAIJ,
2073 /*85*/ 0,
2074        0,
2075        0,
2076        0,
2077        0,
2078 /*90*/ 0,
2079        0,
2080        0,
2081        0,
2082        0,
2083 /*95*/ 0,
2084        0,
2085        0,
2086        0};
2087 
2088 
2089 EXTERN_C_BEGIN
2090 #undef __FUNCT__
2091 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2092 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
2093 {
2094   PetscFunctionBegin;
2095   *a      = ((Mat_MPIBAIJ *)A->data)->A;
2096   *iscopy = PETSC_FALSE;
2097   PetscFunctionReturn(0);
2098 }
2099 EXTERN_C_END
2100 
2101 EXTERN_C_BEGIN
2102 extern int MatConvert_MPIBAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
2103 EXTERN_C_END
2104 
2105 #undef __FUNCT__
2106 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2107 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2108 {
2109   Mat_MPIBAIJ    *b  = (Mat_MPIBAIJ *)B->data;
2110   PetscInt       m = B->m/bs,cstart = b->cstart, cend = b->cend,j,nnz,i,d;
2111   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii;
2112   const PetscInt *JJ;
2113   PetscScalar    *values;
2114   PetscErrorCode ierr;
2115 
2116   PetscFunctionBegin;
2117 #if defined(PETSC_OPT_g)
2118   if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]);
2119 #endif
2120   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2121   o_nnz = d_nnz + m;
2122 
2123   for (i=0; i<m; i++) {
2124     nnz     = I[i+1]- I[i];
2125     JJ      = J + I[i];
2126     nnz_max = PetscMax(nnz_max,nnz);
2127 #if defined(PETSC_OPT_g)
2128     if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2129 #endif
2130     for (j=0; j<nnz; j++) {
2131       if (*JJ >= cstart) break;
2132       JJ++;
2133     }
2134     d = 0;
2135     for (; j<nnz; j++) {
2136       if (*JJ++ >= cend) break;
2137       d++;
2138     }
2139     d_nnz[i] = d;
2140     o_nnz[i] = nnz - d;
2141   }
2142   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2143   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2144 
2145   if (v) values = (PetscScalar*)v;
2146   else {
2147     ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2148     ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2149   }
2150 
2151   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2152   for (i=0; i<m; i++) {
2153     ii   = i + rstart;
2154     nnz  = I[i+1]- I[i];
2155     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr);
2156   }
2157   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2158   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2159   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2160 
2161   if (!v) {
2162     ierr = PetscFree(values);CHKERRQ(ierr);
2163   }
2164   PetscFunctionReturn(0);
2165 }
2166 
2167 #undef __FUNCT__
2168 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2169 /*@C
2170    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2171    (the default parallel PETSc format).
2172 
2173    Collective on MPI_Comm
2174 
2175    Input Parameters:
2176 +  A - the matrix
2177 .  i - the indices into j for the start of each local row (starts with zero)
2178 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2179 -  v - optional values in the matrix
2180 
2181    Level: developer
2182 
2183 .keywords: matrix, aij, compressed row, sparse, parallel
2184 
2185 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2186 @*/
2187 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2188 {
2189   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
2190 
2191   PetscFunctionBegin;
2192   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2193   if (f) {
2194     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
2195   }
2196   PetscFunctionReturn(0);
2197 }
2198 
2199 EXTERN_C_BEGIN
2200 #undef __FUNCT__
2201 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2202 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2203 {
2204   Mat_MPIBAIJ    *b;
2205   PetscErrorCode ierr;
2206   PetscInt       i;
2207 
2208   PetscFunctionBegin;
2209   B->preallocated = PETSC_TRUE;
2210   ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2211 
2212   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2213   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2214   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2215   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2216   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2217   if (d_nnz) {
2218   for (i=0; i<B->m/bs; i++) {
2219       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2220     }
2221   }
2222   if (o_nnz) {
2223     for (i=0; i<B->m/bs; i++) {
2224       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2225     }
2226   }
2227 
2228   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
2229   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
2230   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
2231   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
2232 
2233   b = (Mat_MPIBAIJ*)B->data;
2234   B->bs  = bs;
2235   b->bs2 = bs*bs;
2236   b->mbs = B->m/bs;
2237   b->nbs = B->n/bs;
2238   b->Mbs = B->M/bs;
2239   b->Nbs = B->N/bs;
2240 
2241   ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
2242   b->rowners[0]    = 0;
2243   for (i=2; i<=b->size; i++) {
2244     b->rowners[i] += b->rowners[i-1];
2245   }
2246   b->rstart    = b->rowners[b->rank];
2247   b->rend      = b->rowners[b->rank+1];
2248 
2249   ierr = MPI_Allgather(&b->nbs,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
2250   b->cowners[0] = 0;
2251   for (i=2; i<=b->size; i++) {
2252     b->cowners[i] += b->cowners[i-1];
2253   }
2254   b->cstart    = b->cowners[b->rank];
2255   b->cend      = b->cowners[b->rank+1];
2256 
2257   for (i=0; i<=b->size; i++) {
2258     b->rowners_bs[i] = b->rowners[i]*bs;
2259   }
2260   b->rstart_bs = b->rstart*bs;
2261   b->rend_bs   = b->rend*bs;
2262   b->cstart_bs = b->cstart*bs;
2263   b->cend_bs   = b->cend*bs;
2264 
2265   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
2266   ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2267   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2268   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2269   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
2270   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2271   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2272   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2273 
2274   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
2275 
2276   PetscFunctionReturn(0);
2277 }
2278 EXTERN_C_END
2279 
2280 EXTERN_C_BEGIN
2281 EXTERN PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2282 EXTERN PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2283 EXTERN_C_END
2284 
2285 /*MC
2286    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2287 
2288    Options Database Keys:
2289 . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2290 
2291   Level: beginner
2292 
2293 .seealso: MatCreateMPIBAIJ
2294 M*/
2295 
2296 EXTERN_C_BEGIN
2297 #undef __FUNCT__
2298 #define __FUNCT__ "MatCreate_MPIBAIJ"
2299 PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2300 {
2301   Mat_MPIBAIJ    *b;
2302   PetscErrorCode ierr;
2303   PetscTruth     flg;
2304 
2305   PetscFunctionBegin;
2306   ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr);
2307   B->data = (void*)b;
2308 
2309   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2310   B->mapping    = 0;
2311   B->factor     = 0;
2312   B->assembled  = PETSC_FALSE;
2313 
2314   B->insertmode = NOT_SET_VALUES;
2315   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
2316   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
2317 
2318   /* build local table of row and column ownerships */
2319   ierr          = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
2320   ierr = PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
2321   b->cowners    = b->rowners + b->size + 2;
2322   b->rowners_bs = b->cowners + b->size + 2;
2323 
2324   /* build cache for off array entries formed */
2325   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
2326   b->donotstash  = PETSC_FALSE;
2327   b->colmap      = PETSC_NULL;
2328   b->garray      = PETSC_NULL;
2329   b->roworiented = PETSC_TRUE;
2330 
2331 #if defined(PETSC_USE_MAT_SINGLE)
2332   /* stuff for MatSetValues_XXX in single precision */
2333   b->setvalueslen     = 0;
2334   b->setvaluescopy    = PETSC_NULL;
2335 #endif
2336 
2337   /* stuff used in block assembly */
2338   b->barray       = 0;
2339 
2340   /* stuff used for matrix vector multiply */
2341   b->lvec         = 0;
2342   b->Mvctx        = 0;
2343 
2344   /* stuff for MatGetRow() */
2345   b->rowindices   = 0;
2346   b->rowvalues    = 0;
2347   b->getrowactive = PETSC_FALSE;
2348 
2349   /* hash table stuff */
2350   b->ht           = 0;
2351   b->hd           = 0;
2352   b->ht_size      = 0;
2353   b->ht_flag      = PETSC_FALSE;
2354   b->ht_fact      = 0;
2355   b->ht_total_ct  = 0;
2356   b->ht_insert_ct = 0;
2357 
2358   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2359   if (flg) {
2360     PetscReal fact = 1.39;
2361     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2362     ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2363     if (fact <= 1.0) fact = 1.39;
2364     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2365     ierr = PetscLogInfo((0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact));CHKERRQ(ierr);
2366   }
2367   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2368                                      "MatStoreValues_MPIBAIJ",
2369                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2370   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2371                                      "MatRetrieveValues_MPIBAIJ",
2372                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2373   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2374                                      "MatGetDiagonalBlock_MPIBAIJ",
2375                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2376   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
2377                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
2378                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2379   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
2380 				     "MatMPIBAIJSetPreallocationCSR_MPIAIJ",
2381 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
2382   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
2383                                      "MatDiagonalScaleLocal_MPIBAIJ",
2384                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
2385   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
2386                                      "MatSetHashTableFactor_MPIBAIJ",
2387                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2388   PetscFunctionReturn(0);
2389 }
2390 EXTERN_C_END
2391 
2392 /*MC
2393    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2394 
2395    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2396    and MATMPIBAIJ otherwise.
2397 
2398    Options Database Keys:
2399 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2400 
2401   Level: beginner
2402 
2403 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2404 M*/
2405 
2406 EXTERN_C_BEGIN
2407 #undef __FUNCT__
2408 #define __FUNCT__ "MatCreate_BAIJ"
2409 PetscErrorCode MatCreate_BAIJ(Mat A)
2410 {
2411   PetscErrorCode ierr;
2412   PetscMPIInt    size;
2413 
2414   PetscFunctionBegin;
2415   ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr);
2416   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2417   if (size == 1) {
2418     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
2419   } else {
2420     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
2421   }
2422   PetscFunctionReturn(0);
2423 }
2424 EXTERN_C_END
2425 
2426 #undef __FUNCT__
2427 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
2428 /*@C
2429    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
2430    (block compressed row).  For good matrix assembly performance
2431    the user should preallocate the matrix storage by setting the parameters
2432    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2433    performance can be increased by more than a factor of 50.
2434 
2435    Collective on Mat
2436 
2437    Input Parameters:
2438 +  A - the matrix
2439 .  bs   - size of blockk
2440 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2441            submatrix  (same for all local rows)
2442 .  d_nnz - array containing the number of block nonzeros in the various block rows
2443            of the in diagonal portion of the local (possibly different for each block
2444            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2445 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2446            submatrix (same for all local rows).
2447 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2448            off-diagonal portion of the local submatrix (possibly different for
2449            each block row) or PETSC_NULL.
2450 
2451    If the *_nnz parameter is given then the *_nz parameter is ignored
2452 
2453    Options Database Keys:
2454 .   -mat_no_unroll - uses code that does not unroll the loops in the
2455                      block calculations (much slower)
2456 .   -mat_block_size - size of the blocks to use
2457 
2458    Notes:
2459    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2460    than it must be used on all processors that share the object for that argument.
2461 
2462    Storage Information:
2463    For a square global matrix we define each processor's diagonal portion
2464    to be its local rows and the corresponding columns (a square submatrix);
2465    each processor's off-diagonal portion encompasses the remainder of the
2466    local matrix (a rectangular submatrix).
2467 
2468    The user can specify preallocated storage for the diagonal part of
2469    the local submatrix with either d_nz or d_nnz (not both).  Set
2470    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2471    memory allocation.  Likewise, specify preallocated storage for the
2472    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2473 
2474    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2475    the figure below we depict these three local rows and all columns (0-11).
2476 
2477 .vb
2478            0 1 2 3 4 5 6 7 8 9 10 11
2479           -------------------
2480    row 3  |  o o o d d d o o o o o o
2481    row 4  |  o o o d d d o o o o o o
2482    row 5  |  o o o d d d o o o o o o
2483           -------------------
2484 .ve
2485 
2486    Thus, any entries in the d locations are stored in the d (diagonal)
2487    submatrix, and any entries in the o locations are stored in the
2488    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2489    stored simply in the MATSEQBAIJ format for compressed row storage.
2490 
2491    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2492    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2493    In general, for PDE problems in which most nonzeros are near the diagonal,
2494    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2495    or you will get TERRIBLE performance; see the users' manual chapter on
2496    matrices.
2497 
2498    Level: intermediate
2499 
2500 .keywords: matrix, block, aij, compressed row, sparse, parallel
2501 
2502 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
2503 @*/
2504 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2505 {
2506   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2507 
2508   PetscFunctionBegin;
2509   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2510   if (f) {
2511     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2512   }
2513   PetscFunctionReturn(0);
2514 }
2515 
2516 #undef __FUNCT__
2517 #define __FUNCT__ "MatCreateMPIBAIJ"
2518 /*@C
2519    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
2520    (block compressed row).  For good matrix assembly performance
2521    the user should preallocate the matrix storage by setting the parameters
2522    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2523    performance can be increased by more than a factor of 50.
2524 
2525    Collective on MPI_Comm
2526 
2527    Input Parameters:
2528 +  comm - MPI communicator
2529 .  bs   - size of blockk
2530 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2531            This value should be the same as the local size used in creating the
2532            y vector for the matrix-vector product y = Ax.
2533 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2534            This value should be the same as the local size used in creating the
2535            x vector for the matrix-vector product y = Ax.
2536 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2537 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2538 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
2539            submatrix  (same for all local rows)
2540 .  d_nnz - array containing the number of nonzero blocks in the various block rows
2541            of the in diagonal portion of the local (possibly different for each block
2542            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2543 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
2544            submatrix (same for all local rows).
2545 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
2546            off-diagonal portion of the local submatrix (possibly different for
2547            each block row) or PETSC_NULL.
2548 
2549    Output Parameter:
2550 .  A - the matrix
2551 
2552    Options Database Keys:
2553 .   -mat_no_unroll - uses code that does not unroll the loops in the
2554                      block calculations (much slower)
2555 .   -mat_block_size - size of the blocks to use
2556 
2557    Notes:
2558    If the *_nnz parameter is given then the *_nz parameter is ignored
2559 
2560    A nonzero block is any block that as 1 or more nonzeros in it
2561 
2562    The user MUST specify either the local or global matrix dimensions
2563    (possibly both).
2564 
2565    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2566    than it must be used on all processors that share the object for that argument.
2567 
2568    Storage Information:
2569    For a square global matrix we define each processor's diagonal portion
2570    to be its local rows and the corresponding columns (a square submatrix);
2571    each processor's off-diagonal portion encompasses the remainder of the
2572    local matrix (a rectangular submatrix).
2573 
2574    The user can specify preallocated storage for the diagonal part of
2575    the local submatrix with either d_nz or d_nnz (not both).  Set
2576    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2577    memory allocation.  Likewise, specify preallocated storage for the
2578    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2579 
2580    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2581    the figure below we depict these three local rows and all columns (0-11).
2582 
2583 .vb
2584            0 1 2 3 4 5 6 7 8 9 10 11
2585           -------------------
2586    row 3  |  o o o d d d o o o o o o
2587    row 4  |  o o o d d d o o o o o o
2588    row 5  |  o o o d d d o o o o o o
2589           -------------------
2590 .ve
2591 
2592    Thus, any entries in the d locations are stored in the d (diagonal)
2593    submatrix, and any entries in the o locations are stored in the
2594    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2595    stored simply in the MATSEQBAIJ format for compressed row storage.
2596 
2597    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2598    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2599    In general, for PDE problems in which most nonzeros are near the diagonal,
2600    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2601    or you will get TERRIBLE performance; see the users' manual chapter on
2602    matrices.
2603 
2604    Level: intermediate
2605 
2606 .keywords: matrix, block, aij, compressed row, sparse, parallel
2607 
2608 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2609 @*/
2610 PetscErrorCode MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2611 {
2612   PetscErrorCode ierr;
2613   PetscMPIInt    size;
2614 
2615   PetscFunctionBegin;
2616   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2617   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2618   if (size > 1) {
2619     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
2620     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2621   } else {
2622     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2623     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2624   }
2625   PetscFunctionReturn(0);
2626 }
2627 
2628 #undef __FUNCT__
2629 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
2630 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2631 {
2632   Mat            mat;
2633   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2634   PetscErrorCode ierr;
2635   PetscInt       len=0;
2636 
2637   PetscFunctionBegin;
2638   *newmat       = 0;
2639   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2640   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2641   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2642 
2643   mat->factor       = matin->factor;
2644   mat->preallocated = PETSC_TRUE;
2645   mat->assembled    = PETSC_TRUE;
2646   mat->insertmode   = NOT_SET_VALUES;
2647 
2648   a      = (Mat_MPIBAIJ*)mat->data;
2649   mat->bs  = matin->bs;
2650   a->bs2   = oldmat->bs2;
2651   a->mbs   = oldmat->mbs;
2652   a->nbs   = oldmat->nbs;
2653   a->Mbs   = oldmat->Mbs;
2654   a->Nbs   = oldmat->Nbs;
2655 
2656   a->rstart       = oldmat->rstart;
2657   a->rend         = oldmat->rend;
2658   a->cstart       = oldmat->cstart;
2659   a->cend         = oldmat->cend;
2660   a->size         = oldmat->size;
2661   a->rank         = oldmat->rank;
2662   a->donotstash   = oldmat->donotstash;
2663   a->roworiented  = oldmat->roworiented;
2664   a->rowindices   = 0;
2665   a->rowvalues    = 0;
2666   a->getrowactive = PETSC_FALSE;
2667   a->barray       = 0;
2668   a->rstart_bs    = oldmat->rstart_bs;
2669   a->rend_bs      = oldmat->rend_bs;
2670   a->cstart_bs    = oldmat->cstart_bs;
2671   a->cend_bs      = oldmat->cend_bs;
2672 
2673   /* hash table stuff */
2674   a->ht           = 0;
2675   a->hd           = 0;
2676   a->ht_size      = 0;
2677   a->ht_flag      = oldmat->ht_flag;
2678   a->ht_fact      = oldmat->ht_fact;
2679   a->ht_total_ct  = 0;
2680   a->ht_insert_ct = 0;
2681 
2682   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2683   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2684   ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr);
2685   if (oldmat->colmap) {
2686 #if defined (PETSC_USE_CTABLE)
2687   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2688 #else
2689   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2690   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2691   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2692 #endif
2693   } else a->colmap = 0;
2694 
2695   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2696     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2697     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2698     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2699   } else a->garray = 0;
2700 
2701   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2702   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2703   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2704   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2705 
2706   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2707   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2708   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2709   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2710   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2711   *newmat = mat;
2712 
2713   PetscFunctionReturn(0);
2714 }
2715 
2716 #include "petscsys.h"
2717 
2718 #undef __FUNCT__
2719 #define __FUNCT__ "MatLoad_MPIBAIJ"
2720 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
2721 {
2722   Mat            A;
2723   PetscErrorCode ierr;
2724   int            fd;
2725   PetscInt       i,nz,j,rstart,rend;
2726   PetscScalar    *vals,*buf;
2727   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2728   MPI_Status     status;
2729   PetscMPIInt    rank,size,maxnz;
2730   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
2731   PetscInt       *locrowlens,*procsnz = 0,*browners;
2732   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
2733   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
2734   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2735   PetscInt       dcount,kmax,k,nzcount,tmp,mend;
2736 
2737   PetscFunctionBegin;
2738   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2739 
2740   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2741   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2742   if (!rank) {
2743     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2744     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2745     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2746   }
2747 
2748   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2749   M = header[1]; N = header[2];
2750 
2751   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2752 
2753   /*
2754      This code adds extra rows to make sure the number of rows is
2755      divisible by the blocksize
2756   */
2757   Mbs        = M/bs;
2758   extra_rows = bs - M + bs*Mbs;
2759   if (extra_rows == bs) extra_rows = 0;
2760   else                  Mbs++;
2761   if (extra_rows && !rank) {
2762     ierr = PetscLogInfo((0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr);
2763   }
2764 
2765   /* determine ownership of all rows */
2766   mbs        = Mbs/size + ((Mbs % size) > rank);
2767   m          = mbs*bs;
2768   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
2769   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2770 
2771   /* process 0 needs enough room for process with most rows */
2772   if (!rank) {
2773     mmax = rowners[1];
2774     for (i=2; i<size; i++) {
2775       mmax = PetscMax(mmax,rowners[i]);
2776     }
2777     mmax*=bs;
2778   } else mmax = m;
2779 
2780   rowners[0] = 0;
2781   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
2782   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2783   rstart = rowners[rank];
2784   rend   = rowners[rank+1];
2785 
2786   /* distribute row lengths to all processors */
2787   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
2788   if (!rank) {
2789     mend = m;
2790     if (size == 1) mend = mend - extra_rows;
2791     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
2792     for (j=mend; j<m; j++) locrowlens[j] = 1;
2793     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2794     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2795     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2796     for (j=0; j<m; j++) {
2797       procsnz[0] += locrowlens[j];
2798     }
2799     for (i=1; i<size; i++) {
2800       mend = browners[i+1] - browners[i];
2801       if (i == size-1) mend = mend - extra_rows;
2802       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
2803       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
2804       /* calculate the number of nonzeros on each processor */
2805       for (j=0; j<browners[i+1]-browners[i]; j++) {
2806         procsnz[i] += rowlengths[j];
2807       }
2808       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2809     }
2810     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2811   } else {
2812     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2813   }
2814 
2815   if (!rank) {
2816     /* determine max buffer needed and allocate it */
2817     maxnz = procsnz[0];
2818     for (i=1; i<size; i++) {
2819       maxnz = PetscMax(maxnz,procsnz[i]);
2820     }
2821     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2822 
2823     /* read in my part of the matrix column indices  */
2824     nz     = procsnz[0];
2825     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2826     mycols = ibuf;
2827     if (size == 1)  nz -= extra_rows;
2828     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2829     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2830 
2831     /* read in every ones (except the last) and ship off */
2832     for (i=1; i<size-1; i++) {
2833       nz   = procsnz[i];
2834       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2835       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2836     }
2837     /* read in the stuff for the last proc */
2838     if (size != 1) {
2839       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2840       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2841       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2842       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2843     }
2844     ierr = PetscFree(cols);CHKERRQ(ierr);
2845   } else {
2846     /* determine buffer space needed for message */
2847     nz = 0;
2848     for (i=0; i<m; i++) {
2849       nz += locrowlens[i];
2850     }
2851     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2852     mycols = ibuf;
2853     /* receive message of column indices*/
2854     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2855     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2856     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2857   }
2858 
2859   /* loop over local rows, determining number of off diagonal entries */
2860   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2861   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2862   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2863   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2864   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2865   rowcount = 0; nzcount = 0;
2866   for (i=0; i<mbs; i++) {
2867     dcount  = 0;
2868     odcount = 0;
2869     for (j=0; j<bs; j++) {
2870       kmax = locrowlens[rowcount];
2871       for (k=0; k<kmax; k++) {
2872         tmp = mycols[nzcount++]/bs;
2873         if (!mask[tmp]) {
2874           mask[tmp] = 1;
2875           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2876           else masked1[dcount++] = tmp;
2877         }
2878       }
2879       rowcount++;
2880     }
2881 
2882     dlens[i]  = dcount;
2883     odlens[i] = odcount;
2884 
2885     /* zero out the mask elements we set */
2886     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2887     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2888   }
2889 
2890   /* create our matrix */
2891   ierr = MatCreate(comm,m,m,M+extra_rows,N+extra_rows,&A);CHKERRQ(ierr);
2892   ierr = MatSetType(A,type);CHKERRQ(ierr)
2893   ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2894 
2895   /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */
2896   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2897 
2898   if (!rank) {
2899     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2900     /* read in my part of the matrix numerical values  */
2901     nz = procsnz[0];
2902     vals = buf;
2903     mycols = ibuf;
2904     if (size == 1)  nz -= extra_rows;
2905     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2906     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2907 
2908     /* insert into matrix */
2909     jj      = rstart*bs;
2910     for (i=0; i<m; i++) {
2911       ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2912       mycols += locrowlens[i];
2913       vals   += locrowlens[i];
2914       jj++;
2915     }
2916     /* read in other processors (except the last one) and ship out */
2917     for (i=1; i<size-1; i++) {
2918       nz   = procsnz[i];
2919       vals = buf;
2920       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2921       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2922     }
2923     /* the last proc */
2924     if (size != 1){
2925       nz   = procsnz[i] - extra_rows;
2926       vals = buf;
2927       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2928       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2929       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2930     }
2931     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2932   } else {
2933     /* receive numeric values */
2934     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2935 
2936     /* receive message of values*/
2937     vals   = buf;
2938     mycols = ibuf;
2939     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2940     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2941     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2942 
2943     /* insert into matrix */
2944     jj      = rstart*bs;
2945     for (i=0; i<m; i++) {
2946       ierr    = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2947       mycols += locrowlens[i];
2948       vals   += locrowlens[i];
2949       jj++;
2950     }
2951   }
2952   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2953   ierr = PetscFree(buf);CHKERRQ(ierr);
2954   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2955   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2956   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2957   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2958   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2959   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2960 
2961   *newmat = A;
2962   PetscFunctionReturn(0);
2963 }
2964 
2965 #undef __FUNCT__
2966 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
2967 /*@
2968    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2969 
2970    Input Parameters:
2971 .  mat  - the matrix
2972 .  fact - factor
2973 
2974    Collective on Mat
2975 
2976    Level: advanced
2977 
2978   Notes:
2979    This can also be set by the command line option: -mat_use_hash_table fact
2980 
2981 .keywords: matrix, hashtable, factor, HT
2982 
2983 .seealso: MatSetOption()
2984 @*/
2985 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2986 {
2987   PetscErrorCode ierr,(*f)(Mat,PetscReal);
2988 
2989   PetscFunctionBegin;
2990   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
2991   if (f) {
2992     ierr = (*f)(mat,fact);CHKERRQ(ierr);
2993   }
2994   PetscFunctionReturn(0);
2995 }
2996 
2997 #undef __FUNCT__
2998 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
2999 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3000 {
3001   Mat_MPIBAIJ *baij;
3002 
3003   PetscFunctionBegin;
3004   baij = (Mat_MPIBAIJ*)mat->data;
3005   baij->ht_fact = fact;
3006   PetscFunctionReturn(0);
3007 }
3008 
3009 #undef __FUNCT__
3010 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3011 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3012 {
3013   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3014   PetscFunctionBegin;
3015   *Ad     = a->A;
3016   *Ao     = a->B;
3017   *colmap = a->garray;
3018   PetscFunctionReturn(0);
3019 }
3020