xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision bfd1092c8783874607dbeb0d0a9f7692ea8a27d4)
1 
2 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
3 
4 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
5 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
6 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
7 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
8 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
9 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
10 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
11 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
12 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13 EXTERN PetscErrorCode MatPrintHelp_SeqBAIJ(Mat);
14 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,const PetscScalar*);
15 
16 /*  UGLY, ugly, ugly
17    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
18    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
19    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
20    converts the entries into single precision and then calls ..._MatScalar() to put them
21    into the single precision data structures.
22 */
23 #if defined(PETSC_USE_MAT_SINGLE)
24 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
25 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
26 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
27 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
28 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
29 #else
30 #define MatSetValuesBlocked_SeqBAIJ_MatScalar      MatSetValuesBlocked_SeqBAIJ
31 #define MatSetValues_MPIBAIJ_MatScalar             MatSetValues_MPIBAIJ
32 #define MatSetValuesBlocked_MPIBAIJ_MatScalar      MatSetValuesBlocked_MPIBAIJ
33 #define MatSetValues_MPIBAIJ_HT_MatScalar          MatSetValues_MPIBAIJ_HT
34 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar   MatSetValuesBlocked_MPIBAIJ_HT
35 #endif
36 
37 #undef __FUNCT__
38 #define __FUNCT__ "MatGetRowMax_MPIBAIJ"
39 PetscErrorCode MatGetRowMax_MPIBAIJ(Mat A,Vec v)
40 {
41   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
42   PetscErrorCode ierr;
43   PetscInt       i;
44   PetscScalar    *va,*vb;
45   Vec            vtmp;
46 
47   PetscFunctionBegin;
48 
49   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
50   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
51 
52   ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr);
53   ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr);
54   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
55 
56   for (i=0; i<A->m; i++){
57     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i];
58   }
59 
60   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
61   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
62   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
63 
64   PetscFunctionReturn(0);
65 }
66 
67 EXTERN_C_BEGIN
68 #undef __FUNCT__
69 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
70 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat)
71 {
72   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
73   PetscErrorCode ierr;
74 
75   PetscFunctionBegin;
76   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
77   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
78   PetscFunctionReturn(0);
79 }
80 EXTERN_C_END
81 
82 EXTERN_C_BEGIN
83 #undef __FUNCT__
84 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
85 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat)
86 {
87   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
88   PetscErrorCode ierr;
89 
90   PetscFunctionBegin;
91   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
92   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
93   PetscFunctionReturn(0);
94 }
95 EXTERN_C_END
96 
97 /*
98      Local utility routine that creates a mapping from the global column
99    number to the local number in the off-diagonal part of the local
100    storage of the matrix.  This is done in a non scable way since the
101    length of colmap equals the global matrix length.
102 */
103 #undef __FUNCT__
104 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
105 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
106 {
107   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
108   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
109   PetscErrorCode ierr;
110   PetscInt       nbs = B->nbs,i,bs=mat->bs;
111 
112   PetscFunctionBegin;
113 #if defined (PETSC_USE_CTABLE)
114   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
115   for (i=0; i<nbs; i++){
116     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
117   }
118 #else
119   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
120   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
121   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
122   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
123 #endif
124   PetscFunctionReturn(0);
125 }
126 
127 #define CHUNKSIZE  10
128 
129 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
130 { \
131  \
132     brow = row/bs;  \
133     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
134     rmax = aimax[brow]; nrow = ailen[brow]; \
135       bcol = col/bs; \
136       ridx = row % bs; cidx = col % bs; \
137       low = 0; high = nrow; \
138       while (high-low > 3) { \
139         t = (low+high)/2; \
140         if (rp[t] > bcol) high = t; \
141         else              low  = t; \
142       } \
143       for (_i=low; _i<high; _i++) { \
144         if (rp[_i] > bcol) break; \
145         if (rp[_i] == bcol) { \
146           bap  = ap +  bs2*_i + bs*cidx + ridx; \
147           if (addv == ADD_VALUES) *bap += value;  \
148           else                    *bap  = value;  \
149           goto a_noinsert; \
150         } \
151       } \
152       if (a->nonew == 1) goto a_noinsert; \
153       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
154       if (nrow >= rmax) { \
155         /* there is no extra room in row, therefore enlarge */ \
156         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
157         MatScalar *new_a; \
158  \
159         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
160  \
161         /* malloc new storage space */ \
162         len     = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); \
163         ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
164         new_j   = (PetscInt*)(new_a + bs2*new_nz); \
165         new_i   = new_j + new_nz; \
166  \
167         /* copy over old data into new slots */ \
168         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
169         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
170         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
171         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
172         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
173         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
174         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
175         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
176                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
177         /* free up old matrix storage */ \
178         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
179         if (!a->singlemalloc) { \
180           ierr = PetscFree(a->i);CHKERRQ(ierr); \
181           ierr = PetscFree(a->j);CHKERRQ(ierr);\
182         } \
183         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
184         a->singlemalloc = PETSC_TRUE; \
185  \
186         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
187         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
188         ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); \
189         a->maxnz += bs2*CHUNKSIZE; \
190         a->reallocs++; \
191         a->nz++; \
192       } \
193       N = nrow++ - 1;  \
194       /* shift up all the later entries in this row */ \
195       for (ii=N; ii>=_i; ii--) { \
196         rp[ii+1] = rp[ii]; \
197         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
198       } \
199       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
200       rp[_i]                      = bcol;  \
201       ap[bs2*_i + bs*cidx + ridx] = value;  \
202       a_noinsert:; \
203     ailen[brow] = nrow; \
204 }
205 
206 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
207 { \
208     brow = row/bs;  \
209     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
210     rmax = bimax[brow]; nrow = bilen[brow]; \
211       bcol = col/bs; \
212       ridx = row % bs; cidx = col % bs; \
213       low = 0; high = nrow; \
214       while (high-low > 3) { \
215         t = (low+high)/2; \
216         if (rp[t] > bcol) high = t; \
217         else              low  = t; \
218       } \
219       for (_i=low; _i<high; _i++) { \
220         if (rp[_i] > bcol) break; \
221         if (rp[_i] == bcol) { \
222           bap  = ap +  bs2*_i + bs*cidx + ridx; \
223           if (addv == ADD_VALUES) *bap += value;  \
224           else                    *bap  = value;  \
225           goto b_noinsert; \
226         } \
227       } \
228       if (b->nonew == 1) goto b_noinsert; \
229       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
230       if (nrow >= rmax) { \
231         /* there is no extra room in row, therefore enlarge */ \
232         PetscInt       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
233         MatScalar *new_a; \
234  \
235         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
236  \
237         /* malloc new storage space */ \
238         len     = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(PetscInt); \
239         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
240         new_j   = (PetscInt*)(new_a + bs2*new_nz); \
241         new_i   = new_j + new_nz; \
242  \
243         /* copy over old data into new slots */ \
244         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
245         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
246         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
247         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
248         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
249         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
250         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
251         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
252                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
253         /* free up old matrix storage */ \
254         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
255         if (!b->singlemalloc) { \
256           ierr = PetscFree(b->i);CHKERRQ(ierr); \
257           ierr = PetscFree(b->j);CHKERRQ(ierr); \
258         } \
259         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
260         b->singlemalloc = PETSC_TRUE; \
261  \
262         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
263         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
264         ierr = PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); \
265         b->maxnz += bs2*CHUNKSIZE; \
266         b->reallocs++; \
267         b->nz++; \
268       } \
269       N = nrow++ - 1;  \
270       /* shift up all the later entries in this row */ \
271       for (ii=N; ii>=_i; ii--) { \
272         rp[ii+1] = rp[ii]; \
273         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
274       } \
275       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
276       rp[_i]                      = bcol;  \
277       ap[bs2*_i + bs*cidx + ridx] = value;  \
278       b_noinsert:; \
279     bilen[brow] = nrow; \
280 }
281 
282 #if defined(PETSC_USE_MAT_SINGLE)
283 #undef __FUNCT__
284 #define __FUNCT__ "MatSetValues_MPIBAIJ"
285 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
286 {
287   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
288   PetscErrorCode ierr;
289   PetscInt       i,N = m*n;
290   MatScalar      *vsingle;
291 
292   PetscFunctionBegin;
293   if (N > b->setvalueslen) {
294     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
295     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
296     b->setvalueslen  = N;
297   }
298   vsingle = b->setvaluescopy;
299 
300   for (i=0; i<N; i++) {
301     vsingle[i] = v[i];
302   }
303   ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
304   PetscFunctionReturn(0);
305 }
306 
307 #undef __FUNCT__
308 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
309 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
310 {
311   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
312   PetscErrorCode ierr;
313   PetscInt       i,N = m*n*b->bs2;
314   MatScalar      *vsingle;
315 
316   PetscFunctionBegin;
317   if (N > b->setvalueslen) {
318     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
319     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
320     b->setvalueslen  = N;
321   }
322   vsingle = b->setvaluescopy;
323   for (i=0; i<N; i++) {
324     vsingle[i] = v[i];
325   }
326   ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
327   PetscFunctionReturn(0);
328 }
329 
330 #undef __FUNCT__
331 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
332 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
333 {
334   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
335   PetscErrorCode ierr;
336   PetscInt       i,N = m*n;
337   MatScalar      *vsingle;
338 
339   PetscFunctionBegin;
340   if (N > b->setvalueslen) {
341     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
342     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
343     b->setvalueslen  = N;
344   }
345   vsingle = b->setvaluescopy;
346   for (i=0; i<N; i++) {
347     vsingle[i] = v[i];
348   }
349   ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
350   PetscFunctionReturn(0);
351 }
352 
353 #undef __FUNCT__
354 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
355 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
356 {
357   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
358   PetscErrorCode ierr;
359   PetscInt       i,N = m*n*b->bs2;
360   MatScalar      *vsingle;
361 
362   PetscFunctionBegin;
363   if (N > b->setvalueslen) {
364     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
365     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
366     b->setvalueslen  = N;
367   }
368   vsingle = b->setvaluescopy;
369   for (i=0; i<N; i++) {
370     vsingle[i] = v[i];
371   }
372   ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
373   PetscFunctionReturn(0);
374 }
375 #endif
376 
377 #undef __FUNCT__
378 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
379 PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
380 {
381   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
382   MatScalar      value;
383   PetscTruth     roworiented = baij->roworiented;
384   PetscErrorCode ierr;
385   PetscInt       i,j,row,col;
386   PetscInt       rstart_orig=baij->rstart_bs;
387   PetscInt       rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
388   PetscInt       cend_orig=baij->cend_bs,bs=mat->bs;
389 
390   /* Some Variables required in the macro */
391   Mat            A = baij->A;
392   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
393   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
394   MatScalar      *aa=a->a;
395 
396   Mat            B = baij->B;
397   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
398   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
399   MatScalar      *ba=b->a;
400 
401   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
402   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
403   MatScalar      *ap,*bap;
404 
405   PetscFunctionBegin;
406   for (i=0; i<m; i++) {
407     if (im[i] < 0) continue;
408 #if defined(PETSC_USE_DEBUG)
409     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
410 #endif
411     if (im[i] >= rstart_orig && im[i] < rend_orig) {
412       row = im[i] - rstart_orig;
413       for (j=0; j<n; j++) {
414         if (in[j] >= cstart_orig && in[j] < cend_orig){
415           col = in[j] - cstart_orig;
416           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
417           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
418           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
419         } else if (in[j] < 0) continue;
420 #if defined(PETSC_USE_DEBUG)
421         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->N-1);}
422 #endif
423         else {
424           if (mat->was_assembled) {
425             if (!baij->colmap) {
426               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
427             }
428 #if defined (PETSC_USE_CTABLE)
429             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
430             col  = col - 1;
431 #else
432             col = baij->colmap[in[j]/bs] - 1;
433 #endif
434             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
435               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
436               col =  in[j];
437               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
438               B = baij->B;
439               b = (Mat_SeqBAIJ*)(B)->data;
440               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
441               ba=b->a;
442             } else col += in[j]%bs;
443           } else col = in[j];
444           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
445           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
446           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
447         }
448       }
449     } else {
450       if (!baij->donotstash) {
451         if (roworiented) {
452           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
453         } else {
454           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
455         }
456       }
457     }
458   }
459   PetscFunctionReturn(0);
460 }
461 
462 #undef __FUNCT__
463 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar"
464 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
465 {
466   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
467   const MatScalar *value;
468   MatScalar       *barray=baij->barray;
469   PetscTruth      roworiented = baij->roworiented;
470   PetscErrorCode  ierr;
471   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstart;
472   PetscInt        rend=baij->rend,cstart=baij->cstart,stepval;
473   PetscInt        cend=baij->cend,bs=mat->bs,bs2=baij->bs2;
474 
475   PetscFunctionBegin;
476   if(!barray) {
477     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
478     baij->barray = barray;
479   }
480 
481   if (roworiented) {
482     stepval = (n-1)*bs;
483   } else {
484     stepval = (m-1)*bs;
485   }
486   for (i=0; i<m; i++) {
487     if (im[i] < 0) continue;
488 #if defined(PETSC_USE_DEBUG)
489     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
490 #endif
491     if (im[i] >= rstart && im[i] < rend) {
492       row = im[i] - rstart;
493       for (j=0; j<n; j++) {
494         /* If NumCol = 1 then a copy is not required */
495         if ((roworiented) && (n == 1)) {
496           barray = (MatScalar*)v + i*bs2;
497         } else if((!roworiented) && (m == 1)) {
498           barray = (MatScalar*)v + j*bs2;
499         } else { /* Here a copy is required */
500           if (roworiented) {
501             value = v + i*(stepval+bs)*bs + j*bs;
502           } else {
503             value = v + j*(stepval+bs)*bs + i*bs;
504           }
505           for (ii=0; ii<bs; ii++,value+=stepval) {
506             for (jj=0; jj<bs; jj++) {
507               *barray++  = *value++;
508             }
509           }
510           barray -=bs2;
511         }
512 
513         if (in[j] >= cstart && in[j] < cend){
514           col  = in[j] - cstart;
515           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
516         }
517         else if (in[j] < 0) continue;
518 #if defined(PETSC_USE_DEBUG)
519         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
520 #endif
521         else {
522           if (mat->was_assembled) {
523             if (!baij->colmap) {
524               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
525             }
526 
527 #if defined(PETSC_USE_DEBUG)
528 #if defined (PETSC_USE_CTABLE)
529             { PetscInt data;
530               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
531               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
532             }
533 #else
534             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
535 #endif
536 #endif
537 #if defined (PETSC_USE_CTABLE)
538 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
539             col  = (col - 1)/bs;
540 #else
541             col = (baij->colmap[in[j]] - 1)/bs;
542 #endif
543             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
544               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
545               col =  in[j];
546             }
547           }
548           else col = in[j];
549           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
550         }
551       }
552     } else {
553       if (!baij->donotstash) {
554         if (roworiented) {
555           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
556         } else {
557           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
558         }
559       }
560     }
561   }
562   PetscFunctionReturn(0);
563 }
564 
565 #define HASH_KEY 0.6180339887
566 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
567 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
568 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
569 #undef __FUNCT__
570 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar"
571 PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
572 {
573   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
574   PetscTruth     roworiented = baij->roworiented;
575   PetscErrorCode ierr;
576   PetscInt       i,j,row,col;
577   PetscInt       rstart_orig=baij->rstart_bs;
578   PetscInt       rend_orig=baij->rend_bs,Nbs=baij->Nbs;
579   PetscInt       h1,key,size=baij->ht_size,bs=mat->bs,*HT=baij->ht,idx;
580   PetscReal      tmp;
581   MatScalar      **HD = baij->hd,value;
582 #if defined(PETSC_USE_DEBUG)
583   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
584 #endif
585 
586   PetscFunctionBegin;
587 
588   for (i=0; i<m; i++) {
589 #if defined(PETSC_USE_DEBUG)
590     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
591     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
592 #endif
593       row = im[i];
594     if (row >= rstart_orig && row < rend_orig) {
595       for (j=0; j<n; j++) {
596         col = in[j];
597         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
598         /* Look up PetscInto the Hash Table */
599         key = (row/bs)*Nbs+(col/bs)+1;
600         h1  = HASH(size,key,tmp);
601 
602 
603         idx = h1;
604 #if defined(PETSC_USE_DEBUG)
605         insert_ct++;
606         total_ct++;
607         if (HT[idx] != key) {
608           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
609           if (idx == size) {
610             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
611             if (idx == h1) {
612               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
613             }
614           }
615         }
616 #else
617         if (HT[idx] != key) {
618           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
619           if (idx == size) {
620             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
621             if (idx == h1) {
622               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
623             }
624           }
625         }
626 #endif
627         /* A HASH table entry is found, so insert the values at the correct address */
628         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
629         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
630       }
631     } else {
632       if (!baij->donotstash) {
633         if (roworiented) {
634           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
635         } else {
636           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
637         }
638       }
639     }
640   }
641 #if defined(PETSC_USE_DEBUG)
642   baij->ht_total_ct = total_ct;
643   baij->ht_insert_ct = insert_ct;
644 #endif
645   PetscFunctionReturn(0);
646 }
647 
648 #undef __FUNCT__
649 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar"
650 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
651 {
652   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
653   PetscTruth      roworiented = baij->roworiented;
654   PetscErrorCode  ierr;
655   PetscInt        i,j,ii,jj,row,col;
656   PetscInt        rstart=baij->rstart ;
657   PetscInt        rend=baij->rend,stepval,bs=mat->bs,bs2=baij->bs2;
658   PetscInt        h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
659   PetscReal       tmp;
660   MatScalar       **HD = baij->hd,*baij_a;
661   const MatScalar *v_t,*value;
662 #if defined(PETSC_USE_DEBUG)
663   PetscInt        total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
664 #endif
665 
666   PetscFunctionBegin;
667 
668   if (roworiented) {
669     stepval = (n-1)*bs;
670   } else {
671     stepval = (m-1)*bs;
672   }
673   for (i=0; i<m; i++) {
674 #if defined(PETSC_USE_DEBUG)
675     if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
676     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
677 #endif
678     row   = im[i];
679     v_t   = v + i*bs2;
680     if (row >= rstart && row < rend) {
681       for (j=0; j<n; j++) {
682         col = in[j];
683 
684         /* Look up into the Hash Table */
685         key = row*Nbs+col+1;
686         h1  = HASH(size,key,tmp);
687 
688         idx = h1;
689 #if defined(PETSC_USE_DEBUG)
690         total_ct++;
691         insert_ct++;
692        if (HT[idx] != key) {
693           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
694           if (idx == size) {
695             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
696             if (idx == h1) {
697               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
698             }
699           }
700         }
701 #else
702         if (HT[idx] != key) {
703           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
704           if (idx == size) {
705             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
706             if (idx == h1) {
707               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
708             }
709           }
710         }
711 #endif
712         baij_a = HD[idx];
713         if (roworiented) {
714           /*value = v + i*(stepval+bs)*bs + j*bs;*/
715           /* value = v + (i*(stepval+bs)+j)*bs; */
716           value = v_t;
717           v_t  += bs;
718           if (addv == ADD_VALUES) {
719             for (ii=0; ii<bs; ii++,value+=stepval) {
720               for (jj=ii; jj<bs2; jj+=bs) {
721                 baij_a[jj]  += *value++;
722               }
723             }
724           } else {
725             for (ii=0; ii<bs; ii++,value+=stepval) {
726               for (jj=ii; jj<bs2; jj+=bs) {
727                 baij_a[jj]  = *value++;
728               }
729             }
730           }
731         } else {
732           value = v + j*(stepval+bs)*bs + i*bs;
733           if (addv == ADD_VALUES) {
734             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
735               for (jj=0; jj<bs; jj++) {
736                 baij_a[jj]  += *value++;
737               }
738             }
739           } else {
740             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
741               for (jj=0; jj<bs; jj++) {
742                 baij_a[jj]  = *value++;
743               }
744             }
745           }
746         }
747       }
748     } else {
749       if (!baij->donotstash) {
750         if (roworiented) {
751           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
752         } else {
753           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
754         }
755       }
756     }
757   }
758 #if defined(PETSC_USE_DEBUG)
759   baij->ht_total_ct = total_ct;
760   baij->ht_insert_ct = insert_ct;
761 #endif
762   PetscFunctionReturn(0);
763 }
764 
765 #undef __FUNCT__
766 #define __FUNCT__ "MatGetValues_MPIBAIJ"
767 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
768 {
769   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
770   PetscErrorCode ierr;
771   PetscInt       bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
772   PetscInt       bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
773 
774   PetscFunctionBegin;
775   for (i=0; i<m; i++) {
776     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
777     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
778     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
779       row = idxm[i] - bsrstart;
780       for (j=0; j<n; j++) {
781         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
782         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
783         if (idxn[j] >= bscstart && idxn[j] < bscend){
784           col = idxn[j] - bscstart;
785           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
786         } else {
787           if (!baij->colmap) {
788             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
789           }
790 #if defined (PETSC_USE_CTABLE)
791           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
792           data --;
793 #else
794           data = baij->colmap[idxn[j]/bs]-1;
795 #endif
796           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
797           else {
798             col  = data + idxn[j]%bs;
799             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
800           }
801         }
802       }
803     } else {
804       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
805     }
806   }
807  PetscFunctionReturn(0);
808 }
809 
810 #undef __FUNCT__
811 #define __FUNCT__ "MatNorm_MPIBAIJ"
812 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
813 {
814   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
815   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
816   PetscErrorCode ierr;
817   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->bs,nz,row,col;
818   PetscReal      sum = 0.0;
819   MatScalar      *v;
820 
821   PetscFunctionBegin;
822   if (baij->size == 1) {
823     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
824   } else {
825     if (type == NORM_FROBENIUS) {
826       v = amat->a;
827       nz = amat->nz*bs2;
828       for (i=0; i<nz; i++) {
829 #if defined(PETSC_USE_COMPLEX)
830         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
831 #else
832         sum += (*v)*(*v); v++;
833 #endif
834       }
835       v = bmat->a;
836       nz = bmat->nz*bs2;
837       for (i=0; i<nz; i++) {
838 #if defined(PETSC_USE_COMPLEX)
839         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
840 #else
841         sum += (*v)*(*v); v++;
842 #endif
843       }
844       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
845       *nrm = sqrt(*nrm);
846     } else if (type == NORM_1) { /* max column sum */
847       PetscReal *tmp,*tmp2;
848       PetscInt  *jj,*garray=baij->garray,cstart=baij->cstart;
849       ierr = PetscMalloc((2*mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
850       tmp2 = tmp + mat->N;
851       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
852       v = amat->a; jj = amat->j;
853       for (i=0; i<amat->nz; i++) {
854         for (j=0; j<bs; j++){
855           col = bs*(cstart + *jj) + j; /* column index */
856           for (row=0; row<bs; row++){
857             tmp[col] += PetscAbsScalar(*v);  v++;
858           }
859         }
860         jj++;
861       }
862       v = bmat->a; jj = bmat->j;
863       for (i=0; i<bmat->nz; i++) {
864         for (j=0; j<bs; j++){
865           col = bs*garray[*jj] + j;
866           for (row=0; row<bs; row++){
867             tmp[col] += PetscAbsScalar(*v); v++;
868           }
869         }
870         jj++;
871       }
872       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
873       *nrm = 0.0;
874       for (j=0; j<mat->N; j++) {
875         if (tmp2[j] > *nrm) *nrm = tmp2[j];
876       }
877       ierr = PetscFree(tmp);CHKERRQ(ierr);
878     } else if (type == NORM_INFINITY) { /* max row sum */
879       PetscReal *sums;
880       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr)
881       sum = 0.0;
882       for (j=0; j<amat->mbs; j++) {
883         for (row=0; row<bs; row++) sums[row] = 0.0;
884         v = amat->a + bs2*amat->i[j];
885         nz = amat->i[j+1]-amat->i[j];
886         for (i=0; i<nz; i++) {
887           for (col=0; col<bs; col++){
888             for (row=0; row<bs; row++){
889               sums[row] += PetscAbsScalar(*v); v++;
890             }
891           }
892         }
893         v = bmat->a + bs2*bmat->i[j];
894         nz = bmat->i[j+1]-bmat->i[j];
895         for (i=0; i<nz; i++) {
896           for (col=0; col<bs; col++){
897             for (row=0; row<bs; row++){
898               sums[row] += PetscAbsScalar(*v); v++;
899             }
900           }
901         }
902         for (row=0; row<bs; row++){
903           if (sums[row] > sum) sum = sums[row];
904         }
905       }
906       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
907       ierr = PetscFree(sums);CHKERRQ(ierr);
908     } else {
909       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
910     }
911   }
912   PetscFunctionReturn(0);
913 }
914 
915 /*
916   Creates the hash table, and sets the table
917   This table is created only once.
918   If new entried need to be added to the matrix
919   then the hash table has to be destroyed and
920   recreated.
921 */
922 #undef __FUNCT__
923 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
924 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
925 {
926   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
927   Mat            A = baij->A,B=baij->B;
928   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
929   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
930   PetscErrorCode ierr;
931   PetscInt       size,bs2=baij->bs2,rstart=baij->rstart;
932   PetscInt       cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
933   PetscInt       *HT,key;
934   MatScalar      **HD;
935   PetscReal      tmp;
936 #if defined(PETSC_USE_DEBUG)
937   PetscInt       ct=0,max=0;
938 #endif
939 
940   PetscFunctionBegin;
941   baij->ht_size=(PetscInt)(factor*nz);
942   size = baij->ht_size;
943 
944   if (baij->ht) {
945     PetscFunctionReturn(0);
946   }
947 
948   /* Allocate Memory for Hash Table */
949   ierr     = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr);
950   baij->ht = (PetscInt*)(baij->hd + size);
951   HD       = baij->hd;
952   HT       = baij->ht;
953 
954 
955   ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr);
956 
957 
958   /* Loop Over A */
959   for (i=0; i<a->mbs; i++) {
960     for (j=ai[i]; j<ai[i+1]; j++) {
961       row = i+rstart;
962       col = aj[j]+cstart;
963 
964       key = row*Nbs + col + 1;
965       h1  = HASH(size,key,tmp);
966       for (k=0; k<size; k++){
967         if (!HT[(h1+k)%size]) {
968           HT[(h1+k)%size] = key;
969           HD[(h1+k)%size] = a->a + j*bs2;
970           break;
971 #if defined(PETSC_USE_DEBUG)
972         } else {
973           ct++;
974 #endif
975         }
976       }
977 #if defined(PETSC_USE_DEBUG)
978       if (k> max) max = k;
979 #endif
980     }
981   }
982   /* Loop Over B */
983   for (i=0; i<b->mbs; i++) {
984     for (j=bi[i]; j<bi[i+1]; j++) {
985       row = i+rstart;
986       col = garray[bj[j]];
987       key = row*Nbs + col + 1;
988       h1  = HASH(size,key,tmp);
989       for (k=0; k<size; k++){
990         if (!HT[(h1+k)%size]) {
991           HT[(h1+k)%size] = key;
992           HD[(h1+k)%size] = b->a + j*bs2;
993           break;
994 #if defined(PETSC_USE_DEBUG)
995         } else {
996           ct++;
997 #endif
998         }
999       }
1000 #if defined(PETSC_USE_DEBUG)
1001       if (k> max) max = k;
1002 #endif
1003     }
1004   }
1005 
1006   /* Print Summary */
1007 #if defined(PETSC_USE_DEBUG)
1008   for (i=0,j=0; i<size; i++) {
1009     if (HT[i]) {j++;}
1010   }
1011   ierr = PetscLogInfo((0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max));CHKERRQ(ierr);
1012 #endif
1013   PetscFunctionReturn(0);
1014 }
1015 
1016 #undef __FUNCT__
1017 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
1018 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
1019 {
1020   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1021   PetscErrorCode ierr;
1022   PetscInt       nstash,reallocs;
1023   InsertMode     addv;
1024 
1025   PetscFunctionBegin;
1026   if (baij->donotstash) {
1027     PetscFunctionReturn(0);
1028   }
1029 
1030   /* make sure all processors are either in INSERTMODE or ADDMODE */
1031   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
1032   if (addv == (ADD_VALUES|INSERT_VALUES)) {
1033     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
1034   }
1035   mat->insertmode = addv; /* in case this processor had no cache */
1036 
1037   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
1038   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
1039   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
1040   ierr = PetscLogInfo((0,"MatAssemblyBegin_MPIBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
1041   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
1042   ierr = PetscLogInfo((0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
1043   PetscFunctionReturn(0);
1044 }
1045 
1046 #undef __FUNCT__
1047 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
1048 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
1049 {
1050   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
1051   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
1052   PetscErrorCode ierr;
1053   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
1054   PetscInt       *row,*col,other_disassembled;
1055   PetscTruth     r1,r2,r3;
1056   MatScalar      *val;
1057   InsertMode     addv = mat->insertmode;
1058   PetscMPIInt    n;
1059 
1060   PetscFunctionBegin;
1061   if (!baij->donotstash) {
1062     while (1) {
1063       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1064       if (!flg) break;
1065 
1066       for (i=0; i<n;) {
1067         /* Now identify the consecutive vals belonging to the same row */
1068         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1069         if (j < n) ncols = j-i;
1070         else       ncols = n-i;
1071         /* Now assemble all these values with a single function call */
1072         ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
1073         i = j;
1074       }
1075     }
1076     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
1077     /* Now process the block-stash. Since the values are stashed column-oriented,
1078        set the roworiented flag to column oriented, and after MatSetValues()
1079        restore the original flags */
1080     r1 = baij->roworiented;
1081     r2 = a->roworiented;
1082     r3 = b->roworiented;
1083     baij->roworiented = PETSC_FALSE;
1084     a->roworiented    = PETSC_FALSE;
1085     b->roworiented    = PETSC_FALSE;
1086     while (1) {
1087       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1088       if (!flg) break;
1089 
1090       for (i=0; i<n;) {
1091         /* Now identify the consecutive vals belonging to the same row */
1092         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1093         if (j < n) ncols = j-i;
1094         else       ncols = n-i;
1095         ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
1096         i = j;
1097       }
1098     }
1099     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
1100     baij->roworiented = r1;
1101     a->roworiented    = r2;
1102     b->roworiented    = r3;
1103   }
1104 
1105   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
1106   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
1107 
1108   /* determine if any processor has disassembled, if so we must
1109      also disassemble ourselfs, in order that we may reassemble. */
1110   /*
1111      if nonzero structure of submatrix B cannot change then we know that
1112      no processor disassembled thus we can skip this stuff
1113   */
1114   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
1115     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
1116     if (mat->was_assembled && !other_disassembled) {
1117       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
1118     }
1119   }
1120 
1121   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1122     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
1123   }
1124   b->compressedrow.use = PETSC_TRUE;
1125   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1126   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1127 
1128 #if defined(PETSC_USE_DEBUG)
1129   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1130     ierr = PetscLogInfo((0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct));CHKERRQ(ierr);
1131     baij->ht_total_ct  = 0;
1132     baij->ht_insert_ct = 0;
1133   }
1134 #endif
1135   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1136     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1137     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1138     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1139   }
1140 
1141   if (baij->rowvalues) {
1142     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1143     baij->rowvalues = 0;
1144   }
1145   PetscFunctionReturn(0);
1146 }
1147 
1148 #undef __FUNCT__
1149 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
1150 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1151 {
1152   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
1153   PetscErrorCode    ierr;
1154   PetscMPIInt       size = baij->size,rank = baij->rank;
1155   PetscInt          bs = mat->bs;
1156   PetscTruth        iascii,isdraw;
1157   PetscViewer       sviewer;
1158   PetscViewerFormat format;
1159 
1160   PetscFunctionBegin;
1161   /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */
1162   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1163   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1164   if (iascii) {
1165     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1166     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1167       MatInfo info;
1168       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
1169       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1170       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
1171               rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
1172               mat->bs,(PetscInt)info.memory);CHKERRQ(ierr);
1173       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1174       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
1175       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1176       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
1177       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1178       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1179       PetscFunctionReturn(0);
1180     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1181       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1182       PetscFunctionReturn(0);
1183     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1184       PetscFunctionReturn(0);
1185     }
1186   }
1187 
1188   if (isdraw) {
1189     PetscDraw       draw;
1190     PetscTruth isnull;
1191     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1192     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1193   }
1194 
1195   if (size == 1) {
1196     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
1197     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1198   } else {
1199     /* assemble the entire matrix onto first processor. */
1200     Mat         A;
1201     Mat_SeqBAIJ *Aloc;
1202     PetscInt    M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1203     MatScalar   *a;
1204 
1205     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1206     /* Perhaps this should be the type of mat? */
1207     if (!rank) {
1208       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
1209     } else {
1210       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
1211     }
1212     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1213     ierr = MatMPIBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1214     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1215 
1216     /* copy over the A part */
1217     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1218     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1219     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1220 
1221     for (i=0; i<mbs; i++) {
1222       rvals[0] = bs*(baij->rstart + i);
1223       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1224       for (j=ai[i]; j<ai[i+1]; j++) {
1225         col = (baij->cstart+aj[j])*bs;
1226         for (k=0; k<bs; k++) {
1227           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1228           col++; a += bs;
1229         }
1230       }
1231     }
1232     /* copy over the B part */
1233     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1234     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1235     for (i=0; i<mbs; i++) {
1236       rvals[0] = bs*(baij->rstart + i);
1237       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1238       for (j=ai[i]; j<ai[i+1]; j++) {
1239         col = baij->garray[aj[j]]*bs;
1240         for (k=0; k<bs; k++) {
1241           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1242           col++; a += bs;
1243         }
1244       }
1245     }
1246     ierr = PetscFree(rvals);CHKERRQ(ierr);
1247     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1248     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1249     /*
1250        Everyone has to call to draw the matrix since the graphics waits are
1251        synchronized across all processors that share the PetscDraw object
1252     */
1253     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1254     if (!rank) {
1255       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1256       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1257     }
1258     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1259     ierr = MatDestroy(A);CHKERRQ(ierr);
1260   }
1261   PetscFunctionReturn(0);
1262 }
1263 
1264 #undef __FUNCT__
1265 #define __FUNCT__ "MatView_MPIBAIJ"
1266 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1267 {
1268   PetscErrorCode ierr;
1269   PetscTruth     iascii,isdraw,issocket,isbinary;
1270 
1271   PetscFunctionBegin;
1272   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1273   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1274   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1275   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1276   if (iascii || isdraw || issocket || isbinary) {
1277     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1278   } else {
1279     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1280   }
1281   PetscFunctionReturn(0);
1282 }
1283 
1284 #undef __FUNCT__
1285 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1286 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1287 {
1288   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1289   PetscErrorCode ierr;
1290 
1291   PetscFunctionBegin;
1292 #if defined(PETSC_USE_LOG)
1293   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N);
1294 #endif
1295   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1296   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1297   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1298   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1299   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1300 #if defined (PETSC_USE_CTABLE)
1301   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1302 #else
1303   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1304 #endif
1305   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1306   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1307   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1308   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1309   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1310   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
1311 #if defined(PETSC_USE_MAT_SINGLE)
1312   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
1313 #endif
1314   ierr = PetscFree(baij);CHKERRQ(ierr);
1315 
1316   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1317   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1318   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1319   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1320   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1321   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1322   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1323   PetscFunctionReturn(0);
1324 }
1325 
1326 #undef __FUNCT__
1327 #define __FUNCT__ "MatMult_MPIBAIJ"
1328 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1329 {
1330   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1331   PetscErrorCode ierr;
1332   PetscInt       nt;
1333 
1334   PetscFunctionBegin;
1335   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1336   if (nt != A->n) {
1337     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1338   }
1339   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1340   if (nt != A->m) {
1341     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1342   }
1343   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1344   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1345   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1346   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1347   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1348   PetscFunctionReturn(0);
1349 }
1350 
1351 #undef __FUNCT__
1352 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1353 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1354 {
1355   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1356   PetscErrorCode ierr;
1357 
1358   PetscFunctionBegin;
1359   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1360   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1361   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1362   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1363   PetscFunctionReturn(0);
1364 }
1365 
1366 #undef __FUNCT__
1367 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1368 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1369 {
1370   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1371   PetscErrorCode ierr;
1372   PetscTruth     merged;
1373 
1374   PetscFunctionBegin;
1375   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1376   /* do nondiagonal part */
1377   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1378   if (!merged) {
1379     /* send it on its way */
1380     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1381     /* do local part */
1382     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1383     /* receive remote parts: note this assumes the values are not actually */
1384     /* inserted in yy until the next line */
1385     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1386   } else {
1387     /* do local part */
1388     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1389     /* send it on its way */
1390     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1391     /* values actually were received in the Begin() but we need to call this nop */
1392     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1393   }
1394   PetscFunctionReturn(0);
1395 }
1396 
1397 #undef __FUNCT__
1398 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1399 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1400 {
1401   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1402   PetscErrorCode ierr;
1403 
1404   PetscFunctionBegin;
1405   /* do nondiagonal part */
1406   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1407   /* send it on its way */
1408   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1409   /* do local part */
1410   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1411   /* receive remote parts: note this assumes the values are not actually */
1412   /* inserted in yy until the next line, which is true for my implementation*/
1413   /* but is not perhaps always true. */
1414   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1415   PetscFunctionReturn(0);
1416 }
1417 
1418 /*
1419   This only works correctly for square matrices where the subblock A->A is the
1420    diagonal block
1421 */
1422 #undef __FUNCT__
1423 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1424 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1425 {
1426   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1427   PetscErrorCode ierr;
1428 
1429   PetscFunctionBegin;
1430   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1431   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1432   PetscFunctionReturn(0);
1433 }
1434 
1435 #undef __FUNCT__
1436 #define __FUNCT__ "MatScale_MPIBAIJ"
1437 PetscErrorCode MatScale_MPIBAIJ(const PetscScalar *aa,Mat A)
1438 {
1439   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1440   PetscErrorCode ierr;
1441 
1442   PetscFunctionBegin;
1443   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1444   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1445   PetscFunctionReturn(0);
1446 }
1447 
1448 #undef __FUNCT__
1449 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1450 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1451 {
1452   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1453   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1454   PetscErrorCode ierr;
1455   PetscInt       bs = matin->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1456   PetscInt       nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1457   PetscInt       *cmap,*idx_p,cstart = mat->cstart;
1458 
1459   PetscFunctionBegin;
1460   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1461   mat->getrowactive = PETSC_TRUE;
1462 
1463   if (!mat->rowvalues && (idx || v)) {
1464     /*
1465         allocate enough space to hold information from the longest row.
1466     */
1467     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1468     PetscInt     max = 1,mbs = mat->mbs,tmp;
1469     for (i=0; i<mbs; i++) {
1470       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1471       if (max < tmp) { max = tmp; }
1472     }
1473     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1474     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1475   }
1476 
1477   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1478   lrow = row - brstart;
1479 
1480   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1481   if (!v)   {pvA = 0; pvB = 0;}
1482   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1483   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1484   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1485   nztot = nzA + nzB;
1486 
1487   cmap  = mat->garray;
1488   if (v  || idx) {
1489     if (nztot) {
1490       /* Sort by increasing column numbers, assuming A and B already sorted */
1491       PetscInt imark = -1;
1492       if (v) {
1493         *v = v_p = mat->rowvalues;
1494         for (i=0; i<nzB; i++) {
1495           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1496           else break;
1497         }
1498         imark = i;
1499         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1500         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1501       }
1502       if (idx) {
1503         *idx = idx_p = mat->rowindices;
1504         if (imark > -1) {
1505           for (i=0; i<imark; i++) {
1506             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1507           }
1508         } else {
1509           for (i=0; i<nzB; i++) {
1510             if (cmap[cworkB[i]/bs] < cstart)
1511               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1512             else break;
1513           }
1514           imark = i;
1515         }
1516         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1517         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1518       }
1519     } else {
1520       if (idx) *idx = 0;
1521       if (v)   *v   = 0;
1522     }
1523   }
1524   *nz = nztot;
1525   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1526   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1527   PetscFunctionReturn(0);
1528 }
1529 
1530 #undef __FUNCT__
1531 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1532 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1533 {
1534   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1535 
1536   PetscFunctionBegin;
1537   if (!baij->getrowactive) {
1538     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1539   }
1540   baij->getrowactive = PETSC_FALSE;
1541   PetscFunctionReturn(0);
1542 }
1543 
1544 #undef __FUNCT__
1545 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1546 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1547 {
1548   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1549   PetscErrorCode ierr;
1550 
1551   PetscFunctionBegin;
1552   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1553   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1554   PetscFunctionReturn(0);
1555 }
1556 
1557 #undef __FUNCT__
1558 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1559 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1560 {
1561   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1562   Mat            A = a->A,B = a->B;
1563   PetscErrorCode ierr;
1564   PetscReal      isend[5],irecv[5];
1565 
1566   PetscFunctionBegin;
1567   info->block_size     = (PetscReal)matin->bs;
1568   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1569   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1570   isend[3] = info->memory;  isend[4] = info->mallocs;
1571   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1572   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1573   isend[3] += info->memory;  isend[4] += info->mallocs;
1574   if (flag == MAT_LOCAL) {
1575     info->nz_used      = isend[0];
1576     info->nz_allocated = isend[1];
1577     info->nz_unneeded  = isend[2];
1578     info->memory       = isend[3];
1579     info->mallocs      = isend[4];
1580   } else if (flag == MAT_GLOBAL_MAX) {
1581     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1582     info->nz_used      = irecv[0];
1583     info->nz_allocated = irecv[1];
1584     info->nz_unneeded  = irecv[2];
1585     info->memory       = irecv[3];
1586     info->mallocs      = irecv[4];
1587   } else if (flag == MAT_GLOBAL_SUM) {
1588     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1589     info->nz_used      = irecv[0];
1590     info->nz_allocated = irecv[1];
1591     info->nz_unneeded  = irecv[2];
1592     info->memory       = irecv[3];
1593     info->mallocs      = irecv[4];
1594   } else {
1595     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1596   }
1597   info->rows_global       = (PetscReal)A->M;
1598   info->columns_global    = (PetscReal)A->N;
1599   info->rows_local        = (PetscReal)A->m;
1600   info->columns_local     = (PetscReal)A->N;
1601   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1602   info->fill_ratio_needed = 0;
1603   info->factor_mallocs    = 0;
1604   PetscFunctionReturn(0);
1605 }
1606 
1607 #undef __FUNCT__
1608 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1609 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op)
1610 {
1611   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1612   PetscErrorCode ierr;
1613 
1614   PetscFunctionBegin;
1615   switch (op) {
1616   case MAT_NO_NEW_NONZERO_LOCATIONS:
1617   case MAT_YES_NEW_NONZERO_LOCATIONS:
1618   case MAT_COLUMNS_UNSORTED:
1619   case MAT_COLUMNS_SORTED:
1620   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1621   case MAT_KEEP_ZEROED_ROWS:
1622   case MAT_NEW_NONZERO_LOCATION_ERR:
1623     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1624     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1625     break;
1626   case MAT_ROW_ORIENTED:
1627     a->roworiented = PETSC_TRUE;
1628     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1629     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1630     break;
1631   case MAT_ROWS_SORTED:
1632   case MAT_ROWS_UNSORTED:
1633   case MAT_YES_NEW_DIAGONALS:
1634     ierr = PetscLogInfo((A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"));CHKERRQ(ierr);
1635     break;
1636   case MAT_COLUMN_ORIENTED:
1637     a->roworiented = PETSC_FALSE;
1638     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1639     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1640     break;
1641   case MAT_IGNORE_OFF_PROC_ENTRIES:
1642     a->donotstash = PETSC_TRUE;
1643     break;
1644   case MAT_NO_NEW_DIAGONALS:
1645     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1646   case MAT_USE_HASH_TABLE:
1647     a->ht_flag = PETSC_TRUE;
1648     break;
1649   case MAT_SYMMETRIC:
1650   case MAT_STRUCTURALLY_SYMMETRIC:
1651   case MAT_HERMITIAN:
1652   case MAT_SYMMETRY_ETERNAL:
1653     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1654     break;
1655   case MAT_NOT_SYMMETRIC:
1656   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1657   case MAT_NOT_HERMITIAN:
1658   case MAT_NOT_SYMMETRY_ETERNAL:
1659     break;
1660   default:
1661     SETERRQ(PETSC_ERR_SUP,"unknown option");
1662   }
1663   PetscFunctionReturn(0);
1664 }
1665 
1666 #undef __FUNCT__
1667 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1668 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1669 {
1670   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1671   Mat_SeqBAIJ    *Aloc;
1672   Mat            B;
1673   PetscErrorCode ierr;
1674   PetscInt       M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col;
1675   PetscInt       bs=A->bs,mbs=baij->mbs;
1676   MatScalar      *a;
1677 
1678   PetscFunctionBegin;
1679   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1680   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1681   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1682   ierr = MatMPIBAIJSetPreallocation(B,A->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1683 
1684   /* copy over the A part */
1685   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1686   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1687   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1688 
1689   for (i=0; i<mbs; i++) {
1690     rvals[0] = bs*(baij->rstart + i);
1691     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1692     for (j=ai[i]; j<ai[i+1]; j++) {
1693       col = (baij->cstart+aj[j])*bs;
1694       for (k=0; k<bs; k++) {
1695         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1696         col++; a += bs;
1697       }
1698     }
1699   }
1700   /* copy over the B part */
1701   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1702   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1703   for (i=0; i<mbs; i++) {
1704     rvals[0] = bs*(baij->rstart + i);
1705     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1706     for (j=ai[i]; j<ai[i+1]; j++) {
1707       col = baij->garray[aj[j]]*bs;
1708       for (k=0; k<bs; k++) {
1709         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1710         col++; a += bs;
1711       }
1712     }
1713   }
1714   ierr = PetscFree(rvals);CHKERRQ(ierr);
1715   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1716   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1717 
1718   if (matout) {
1719     *matout = B;
1720   } else {
1721     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1722   }
1723   PetscFunctionReturn(0);
1724 }
1725 
1726 #undef __FUNCT__
1727 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1728 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1729 {
1730   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1731   Mat            a = baij->A,b = baij->B;
1732   PetscErrorCode ierr;
1733   PetscInt       s1,s2,s3;
1734 
1735   PetscFunctionBegin;
1736   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1737   if (rr) {
1738     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1739     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1740     /* Overlap communication with computation. */
1741     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1742   }
1743   if (ll) {
1744     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1745     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1746     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1747   }
1748   /* scale  the diagonal block */
1749   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1750 
1751   if (rr) {
1752     /* Do a scatter end and then right scale the off-diagonal block */
1753     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1754     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1755   }
1756 
1757   PetscFunctionReturn(0);
1758 }
1759 
1760 #undef __FUNCT__
1761 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1762 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,IS is,const PetscScalar *diag)
1763 {
1764   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1765   PetscErrorCode ierr;
1766   PetscMPIInt    imdex,size = l->size,n,rank = l->rank;
1767   PetscInt       i,N,*rows,*owners = l->rowners;
1768   PetscInt       *nprocs,j,idx,nsends,row;
1769   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
1770   PetscInt       *rvalues,tag = A->tag,count,base,slen,*source,lastidx = -1;
1771   PetscInt       *lens,*lrows,*values,bs=A->bs,rstart_bs=l->rstart_bs;
1772   MPI_Comm       comm = A->comm;
1773   MPI_Request    *send_waits,*recv_waits;
1774   MPI_Status     recv_status,*send_status;
1775   IS             istmp;
1776 #if defined(PETSC_DEBUG)
1777   PetscTruth     found = PETSC_FALSE;
1778 #endif
1779 
1780   PetscFunctionBegin;
1781   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
1782   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1783 
1784   /*  first count number of contributors to each processor */
1785   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1786   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1787   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1788   j = 0;
1789   for (i=0; i<N; i++) {
1790     if (lastidx > (idx = rows[i])) j = 0;
1791     lastidx = idx;
1792     for (; j<size; j++) {
1793       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1794         nprocs[2*j]++;
1795         nprocs[2*j+1] = 1;
1796         owner[i] = j;
1797 #if defined(PETSC_DEBUG)
1798         found = PETSC_TRUE;
1799 #endif
1800         break;
1801       }
1802     }
1803 #if defined(PETSC_DEBUG)
1804     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1805     found = PETSC_FALSE;
1806 #endif
1807   }
1808   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1809 
1810   /* inform other processors of number of messages and max length*/
1811   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1812 
1813   /* post receives:   */
1814   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1815   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1816   for (i=0; i<nrecvs; i++) {
1817     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1818   }
1819 
1820   /* do sends:
1821      1) starts[i] gives the starting index in svalues for stuff going to
1822      the ith processor
1823   */
1824   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1825   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1826   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1827   starts[0]  = 0;
1828   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1829   for (i=0; i<N; i++) {
1830     svalues[starts[owner[i]]++] = rows[i];
1831   }
1832   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
1833 
1834   starts[0] = 0;
1835   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1836   count = 0;
1837   for (i=0; i<size; i++) {
1838     if (nprocs[2*i+1]) {
1839       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1840     }
1841   }
1842   ierr = PetscFree(starts);CHKERRQ(ierr);
1843 
1844   base = owners[rank]*bs;
1845 
1846   /*  wait on receives */
1847   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
1848   source = lens + nrecvs;
1849   count  = nrecvs; slen = 0;
1850   while (count) {
1851     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1852     /* unpack receives into our local space */
1853     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1854     source[imdex]  = recv_status.MPI_SOURCE;
1855     lens[imdex]    = n;
1856     slen          += n;
1857     count--;
1858   }
1859   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1860 
1861   /* move the data into the send scatter */
1862   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1863   count = 0;
1864   for (i=0; i<nrecvs; i++) {
1865     values = rvalues + i*nmax;
1866     for (j=0; j<lens[i]; j++) {
1867       lrows[count++] = values[j] - base;
1868     }
1869   }
1870   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1871   ierr = PetscFree(lens);CHKERRQ(ierr);
1872   ierr = PetscFree(owner);CHKERRQ(ierr);
1873   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1874 
1875   /* actually zap the local rows */
1876   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1877   ierr = PetscLogObjectParent(A,istmp);CHKERRQ(ierr);
1878 
1879   /*
1880         Zero the required rows. If the "diagonal block" of the matrix
1881      is square and the user wishes to set the diagonal we use seperate
1882      code so that MatSetValues() is not called for each diagonal allocating
1883      new memory, thus calling lots of mallocs and slowing things down.
1884 
1885        Contributed by: Mathew Knepley
1886   */
1887   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1888   ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr);
1889   if (diag && (l->A->M == l->A->N)) {
1890     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr);
1891   } else if (diag) {
1892     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1893     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1894       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1895 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1896     }
1897     for (i=0; i<slen; i++) {
1898       row  = lrows[i] + rstart_bs;
1899       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1900     }
1901     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1902     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1903   } else {
1904     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1905   }
1906 
1907   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1908   ierr = PetscFree(lrows);CHKERRQ(ierr);
1909 
1910   /* wait on sends */
1911   if (nsends) {
1912     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1913     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1914     ierr = PetscFree(send_status);CHKERRQ(ierr);
1915   }
1916   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1917   ierr = PetscFree(svalues);CHKERRQ(ierr);
1918 
1919   PetscFunctionReturn(0);
1920 }
1921 
1922 #undef __FUNCT__
1923 #define __FUNCT__ "MatPrintHelp_MPIBAIJ"
1924 PetscErrorCode MatPrintHelp_MPIBAIJ(Mat A)
1925 {
1926   Mat_MPIBAIJ       *a   = (Mat_MPIBAIJ*)A->data;
1927   MPI_Comm          comm = A->comm;
1928   static PetscTruth called = PETSC_FALSE;
1929   PetscErrorCode    ierr;
1930 
1931   PetscFunctionBegin;
1932   if (!a->rank) {
1933     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1934   }
1935   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1936   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1937   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1938   PetscFunctionReturn(0);
1939 }
1940 
1941 #undef __FUNCT__
1942 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1943 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1944 {
1945   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1946   PetscErrorCode ierr;
1947 
1948   PetscFunctionBegin;
1949   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1950   PetscFunctionReturn(0);
1951 }
1952 
1953 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1954 
1955 #undef __FUNCT__
1956 #define __FUNCT__ "MatEqual_MPIBAIJ"
1957 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1958 {
1959   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1960   Mat            a,b,c,d;
1961   PetscTruth     flg;
1962   PetscErrorCode ierr;
1963 
1964   PetscFunctionBegin;
1965   a = matA->A; b = matA->B;
1966   c = matB->A; d = matB->B;
1967 
1968   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1969   if (flg) {
1970     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1971   }
1972   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1973   PetscFunctionReturn(0);
1974 }
1975 
1976 
1977 #undef __FUNCT__
1978 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1979 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1980 {
1981   PetscErrorCode ierr;
1982 
1983   PetscFunctionBegin;
1984   ierr =  MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1985   PetscFunctionReturn(0);
1986 }
1987 
1988 /* -------------------------------------------------------------------*/
1989 static struct _MatOps MatOps_Values = {
1990        MatSetValues_MPIBAIJ,
1991        MatGetRow_MPIBAIJ,
1992        MatRestoreRow_MPIBAIJ,
1993        MatMult_MPIBAIJ,
1994 /* 4*/ MatMultAdd_MPIBAIJ,
1995        MatMultTranspose_MPIBAIJ,
1996        MatMultTransposeAdd_MPIBAIJ,
1997        0,
1998        0,
1999        0,
2000 /*10*/ 0,
2001        0,
2002        0,
2003        0,
2004        MatTranspose_MPIBAIJ,
2005 /*15*/ MatGetInfo_MPIBAIJ,
2006        MatEqual_MPIBAIJ,
2007        MatGetDiagonal_MPIBAIJ,
2008        MatDiagonalScale_MPIBAIJ,
2009        MatNorm_MPIBAIJ,
2010 /*20*/ MatAssemblyBegin_MPIBAIJ,
2011        MatAssemblyEnd_MPIBAIJ,
2012        0,
2013        MatSetOption_MPIBAIJ,
2014        MatZeroEntries_MPIBAIJ,
2015 /*25*/ MatZeroRows_MPIBAIJ,
2016        0,
2017        0,
2018        0,
2019        0,
2020 /*30*/ MatSetUpPreallocation_MPIBAIJ,
2021        0,
2022        0,
2023        0,
2024        0,
2025 /*35*/ MatDuplicate_MPIBAIJ,
2026        0,
2027        0,
2028        0,
2029        0,
2030 /*40*/ 0,
2031        MatGetSubMatrices_MPIBAIJ,
2032        MatIncreaseOverlap_MPIBAIJ,
2033        MatGetValues_MPIBAIJ,
2034        0,
2035 /*45*/ MatPrintHelp_MPIBAIJ,
2036        MatScale_MPIBAIJ,
2037        0,
2038        0,
2039        0,
2040 /*50*/ 0,
2041        0,
2042        0,
2043        0,
2044        0,
2045 /*55*/ 0,
2046        0,
2047        MatSetUnfactored_MPIBAIJ,
2048        0,
2049        MatSetValuesBlocked_MPIBAIJ,
2050 /*60*/ 0,
2051        MatDestroy_MPIBAIJ,
2052        MatView_MPIBAIJ,
2053        MatGetPetscMaps_Petsc,
2054        0,
2055 /*65*/ 0,
2056        0,
2057        0,
2058        0,
2059        0,
2060 /*70*/ MatGetRowMax_MPIBAIJ,
2061        0,
2062        0,
2063        0,
2064        0,
2065 /*75*/ 0,
2066        0,
2067        0,
2068        0,
2069        0,
2070 /*80*/ 0,
2071        0,
2072        0,
2073        0,
2074        MatLoad_MPIBAIJ,
2075 /*85*/ 0,
2076        0,
2077        0,
2078        0,
2079        0,
2080 /*90*/ 0,
2081        0,
2082        0,
2083        0,
2084        0,
2085 /*95*/ 0,
2086        0,
2087        0,
2088        0};
2089 
2090 
2091 EXTERN_C_BEGIN
2092 #undef __FUNCT__
2093 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2094 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
2095 {
2096   PetscFunctionBegin;
2097   *a      = ((Mat_MPIBAIJ *)A->data)->A;
2098   *iscopy = PETSC_FALSE;
2099   PetscFunctionReturn(0);
2100 }
2101 EXTERN_C_END
2102 
2103 EXTERN_C_BEGIN
2104 extern int MatConvert_MPIBAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
2105 EXTERN_C_END
2106 
2107 #undef __FUNCT__
2108 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2109 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2110 {
2111   Mat_MPIBAIJ    *b  = (Mat_MPIBAIJ *)B->data;
2112   PetscInt       m = B->m/bs,cstart = b->cstart, cend = b->cend,j,nnz,i,d;
2113   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii;
2114   const PetscInt *JJ;
2115   PetscScalar    *values;
2116   PetscErrorCode ierr;
2117 
2118   PetscFunctionBegin;
2119 #if defined(PETSC_OPT_g)
2120   if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]);
2121 #endif
2122   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2123   o_nnz = d_nnz + m;
2124 
2125   for (i=0; i<m; i++) {
2126     nnz     = I[i+1]- I[i];
2127     JJ      = J + I[i];
2128     nnz_max = PetscMax(nnz_max,nnz);
2129 #if defined(PETSC_OPT_g)
2130     if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2131 #endif
2132     for (j=0; j<nnz; j++) {
2133       if (*JJ >= cstart) break;
2134       JJ++;
2135     }
2136     d = 0;
2137     for (; j<nnz; j++) {
2138       if (*JJ++ >= cend) break;
2139       d++;
2140     }
2141     d_nnz[i] = d;
2142     o_nnz[i] = nnz - d;
2143   }
2144   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2145   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2146 
2147   if (v) values = (PetscScalar*)v;
2148   else {
2149     ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2150     ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2151   }
2152 
2153   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2154   for (i=0; i<m; i++) {
2155     ii   = i + rstart;
2156     nnz  = I[i+1]- I[i];
2157     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr);
2158   }
2159   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2160   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2161   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2162 
2163   if (!v) {
2164     ierr = PetscFree(values);CHKERRQ(ierr);
2165   }
2166   PetscFunctionReturn(0);
2167 }
2168 
2169 #undef __FUNCT__
2170 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2171 /*@C
2172    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2173    (the default parallel PETSc format).
2174 
2175    Collective on MPI_Comm
2176 
2177    Input Parameters:
2178 +  A - the matrix
2179 .  i - the indices into j for the start of each local row (starts with zero)
2180 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2181 -  v - optional values in the matrix
2182 
2183    Level: developer
2184 
2185 .keywords: matrix, aij, compressed row, sparse, parallel
2186 
2187 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2188 @*/
2189 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2190 {
2191   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
2192 
2193   PetscFunctionBegin;
2194   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2195   if (f) {
2196     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
2197   }
2198   PetscFunctionReturn(0);
2199 }
2200 
2201 EXTERN_C_BEGIN
2202 #undef __FUNCT__
2203 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2204 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2205 {
2206   Mat_MPIBAIJ    *b;
2207   PetscErrorCode ierr;
2208   PetscInt       i;
2209 
2210   PetscFunctionBegin;
2211   B->preallocated = PETSC_TRUE;
2212   ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2213 
2214   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2215   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2216   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2217   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2218   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2219   if (d_nnz) {
2220   for (i=0; i<B->m/bs; i++) {
2221       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2222     }
2223   }
2224   if (o_nnz) {
2225     for (i=0; i<B->m/bs; i++) {
2226       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2227     }
2228   }
2229 
2230   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
2231   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
2232   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
2233   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
2234 
2235   b = (Mat_MPIBAIJ*)B->data;
2236   B->bs  = bs;
2237   b->bs2 = bs*bs;
2238   b->mbs = B->m/bs;
2239   b->nbs = B->n/bs;
2240   b->Mbs = B->M/bs;
2241   b->Nbs = B->N/bs;
2242 
2243   ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
2244   b->rowners[0]    = 0;
2245   for (i=2; i<=b->size; i++) {
2246     b->rowners[i] += b->rowners[i-1];
2247   }
2248   b->rstart    = b->rowners[b->rank];
2249   b->rend      = b->rowners[b->rank+1];
2250 
2251   ierr = MPI_Allgather(&b->nbs,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
2252   b->cowners[0] = 0;
2253   for (i=2; i<=b->size; i++) {
2254     b->cowners[i] += b->cowners[i-1];
2255   }
2256   b->cstart    = b->cowners[b->rank];
2257   b->cend      = b->cowners[b->rank+1];
2258 
2259   for (i=0; i<=b->size; i++) {
2260     b->rowners_bs[i] = b->rowners[i]*bs;
2261   }
2262   b->rstart_bs = b->rstart*bs;
2263   b->rend_bs   = b->rend*bs;
2264   b->cstart_bs = b->cstart*bs;
2265   b->cend_bs   = b->cend*bs;
2266 
2267   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
2268   ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2269   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2270   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2271   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
2272   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2273   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2274   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2275 
2276   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
2277 
2278   PetscFunctionReturn(0);
2279 }
2280 EXTERN_C_END
2281 
2282 EXTERN_C_BEGIN
2283 EXTERN PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2284 EXTERN PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2285 EXTERN_C_END
2286 
2287 /*MC
2288    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2289 
2290    Options Database Keys:
2291 . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2292 
2293   Level: beginner
2294 
2295 .seealso: MatCreateMPIBAIJ
2296 M*/
2297 
2298 EXTERN_C_BEGIN
2299 #undef __FUNCT__
2300 #define __FUNCT__ "MatCreate_MPIBAIJ"
2301 PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2302 {
2303   Mat_MPIBAIJ    *b;
2304   PetscErrorCode ierr;
2305   PetscTruth     flg;
2306 
2307   PetscFunctionBegin;
2308   ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr);
2309   B->data = (void*)b;
2310 
2311   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2312   B->mapping    = 0;
2313   B->factor     = 0;
2314   B->assembled  = PETSC_FALSE;
2315 
2316   B->insertmode = NOT_SET_VALUES;
2317   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
2318   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
2319 
2320   /* build local table of row and column ownerships */
2321   ierr          = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
2322   ierr = PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
2323   b->cowners    = b->rowners + b->size + 2;
2324   b->rowners_bs = b->cowners + b->size + 2;
2325 
2326   /* build cache for off array entries formed */
2327   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
2328   b->donotstash  = PETSC_FALSE;
2329   b->colmap      = PETSC_NULL;
2330   b->garray      = PETSC_NULL;
2331   b->roworiented = PETSC_TRUE;
2332 
2333 #if defined(PETSC_USE_MAT_SINGLE)
2334   /* stuff for MatSetValues_XXX in single precision */
2335   b->setvalueslen     = 0;
2336   b->setvaluescopy    = PETSC_NULL;
2337 #endif
2338 
2339   /* stuff used in block assembly */
2340   b->barray       = 0;
2341 
2342   /* stuff used for matrix vector multiply */
2343   b->lvec         = 0;
2344   b->Mvctx        = 0;
2345 
2346   /* stuff for MatGetRow() */
2347   b->rowindices   = 0;
2348   b->rowvalues    = 0;
2349   b->getrowactive = PETSC_FALSE;
2350 
2351   /* hash table stuff */
2352   b->ht           = 0;
2353   b->hd           = 0;
2354   b->ht_size      = 0;
2355   b->ht_flag      = PETSC_FALSE;
2356   b->ht_fact      = 0;
2357   b->ht_total_ct  = 0;
2358   b->ht_insert_ct = 0;
2359 
2360   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2361   if (flg) {
2362     PetscReal fact = 1.39;
2363     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2364     ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2365     if (fact <= 1.0) fact = 1.39;
2366     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2367     ierr = PetscLogInfo((0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact));CHKERRQ(ierr);
2368   }
2369   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2370                                      "MatStoreValues_MPIBAIJ",
2371                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2372   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2373                                      "MatRetrieveValues_MPIBAIJ",
2374                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2375   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2376                                      "MatGetDiagonalBlock_MPIBAIJ",
2377                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2378   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
2379                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
2380                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2381   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
2382 				     "MatMPIBAIJSetPreallocationCSR_MPIAIJ",
2383 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
2384   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
2385                                      "MatDiagonalScaleLocal_MPIBAIJ",
2386                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
2387   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
2388                                      "MatSetHashTableFactor_MPIBAIJ",
2389                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2390   PetscFunctionReturn(0);
2391 }
2392 EXTERN_C_END
2393 
2394 /*MC
2395    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2396 
2397    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2398    and MATMPIBAIJ otherwise.
2399 
2400    Options Database Keys:
2401 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2402 
2403   Level: beginner
2404 
2405 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2406 M*/
2407 
2408 EXTERN_C_BEGIN
2409 #undef __FUNCT__
2410 #define __FUNCT__ "MatCreate_BAIJ"
2411 PetscErrorCode MatCreate_BAIJ(Mat A)
2412 {
2413   PetscErrorCode ierr;
2414   PetscMPIInt    size;
2415 
2416   PetscFunctionBegin;
2417   ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr);
2418   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2419   if (size == 1) {
2420     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
2421   } else {
2422     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
2423   }
2424   PetscFunctionReturn(0);
2425 }
2426 EXTERN_C_END
2427 
2428 #undef __FUNCT__
2429 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
2430 /*@C
2431    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
2432    (block compressed row).  For good matrix assembly performance
2433    the user should preallocate the matrix storage by setting the parameters
2434    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2435    performance can be increased by more than a factor of 50.
2436 
2437    Collective on Mat
2438 
2439    Input Parameters:
2440 +  A - the matrix
2441 .  bs   - size of blockk
2442 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2443            submatrix  (same for all local rows)
2444 .  d_nnz - array containing the number of block nonzeros in the various block rows
2445            of the in diagonal portion of the local (possibly different for each block
2446            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2447 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2448            submatrix (same for all local rows).
2449 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2450            off-diagonal portion of the local submatrix (possibly different for
2451            each block row) or PETSC_NULL.
2452 
2453    If the *_nnz parameter is given then the *_nz parameter is ignored
2454 
2455    Options Database Keys:
2456 .   -mat_no_unroll - uses code that does not unroll the loops in the
2457                      block calculations (much slower)
2458 .   -mat_block_size - size of the blocks to use
2459 
2460    Notes:
2461    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2462    than it must be used on all processors that share the object for that argument.
2463 
2464    Storage Information:
2465    For a square global matrix we define each processor's diagonal portion
2466    to be its local rows and the corresponding columns (a square submatrix);
2467    each processor's off-diagonal portion encompasses the remainder of the
2468    local matrix (a rectangular submatrix).
2469 
2470    The user can specify preallocated storage for the diagonal part of
2471    the local submatrix with either d_nz or d_nnz (not both).  Set
2472    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2473    memory allocation.  Likewise, specify preallocated storage for the
2474    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2475 
2476    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2477    the figure below we depict these three local rows and all columns (0-11).
2478 
2479 .vb
2480            0 1 2 3 4 5 6 7 8 9 10 11
2481           -------------------
2482    row 3  |  o o o d d d o o o o o o
2483    row 4  |  o o o d d d o o o o o o
2484    row 5  |  o o o d d d o o o o o o
2485           -------------------
2486 .ve
2487 
2488    Thus, any entries in the d locations are stored in the d (diagonal)
2489    submatrix, and any entries in the o locations are stored in the
2490    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2491    stored simply in the MATSEQBAIJ format for compressed row storage.
2492 
2493    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2494    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2495    In general, for PDE problems in which most nonzeros are near the diagonal,
2496    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2497    or you will get TERRIBLE performance; see the users' manual chapter on
2498    matrices.
2499 
2500    Level: intermediate
2501 
2502 .keywords: matrix, block, aij, compressed row, sparse, parallel
2503 
2504 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
2505 @*/
2506 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2507 {
2508   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2509 
2510   PetscFunctionBegin;
2511   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2512   if (f) {
2513     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2514   }
2515   PetscFunctionReturn(0);
2516 }
2517 
2518 #undef __FUNCT__
2519 #define __FUNCT__ "MatCreateMPIBAIJ"
2520 /*@C
2521    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
2522    (block compressed row).  For good matrix assembly performance
2523    the user should preallocate the matrix storage by setting the parameters
2524    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2525    performance can be increased by more than a factor of 50.
2526 
2527    Collective on MPI_Comm
2528 
2529    Input Parameters:
2530 +  comm - MPI communicator
2531 .  bs   - size of blockk
2532 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2533            This value should be the same as the local size used in creating the
2534            y vector for the matrix-vector product y = Ax.
2535 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2536            This value should be the same as the local size used in creating the
2537            x vector for the matrix-vector product y = Ax.
2538 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2539 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2540 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
2541            submatrix  (same for all local rows)
2542 .  d_nnz - array containing the number of nonzero blocks in the various block rows
2543            of the in diagonal portion of the local (possibly different for each block
2544            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2545 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
2546            submatrix (same for all local rows).
2547 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
2548            off-diagonal portion of the local submatrix (possibly different for
2549            each block row) or PETSC_NULL.
2550 
2551    Output Parameter:
2552 .  A - the matrix
2553 
2554    Options Database Keys:
2555 .   -mat_no_unroll - uses code that does not unroll the loops in the
2556                      block calculations (much slower)
2557 .   -mat_block_size - size of the blocks to use
2558 
2559    Notes:
2560    If the *_nnz parameter is given then the *_nz parameter is ignored
2561 
2562    A nonzero block is any block that as 1 or more nonzeros in it
2563 
2564    The user MUST specify either the local or global matrix dimensions
2565    (possibly both).
2566 
2567    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2568    than it must be used on all processors that share the object for that argument.
2569 
2570    Storage Information:
2571    For a square global matrix we define each processor's diagonal portion
2572    to be its local rows and the corresponding columns (a square submatrix);
2573    each processor's off-diagonal portion encompasses the remainder of the
2574    local matrix (a rectangular submatrix).
2575 
2576    The user can specify preallocated storage for the diagonal part of
2577    the local submatrix with either d_nz or d_nnz (not both).  Set
2578    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2579    memory allocation.  Likewise, specify preallocated storage for the
2580    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2581 
2582    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2583    the figure below we depict these three local rows and all columns (0-11).
2584 
2585 .vb
2586            0 1 2 3 4 5 6 7 8 9 10 11
2587           -------------------
2588    row 3  |  o o o d d d o o o o o o
2589    row 4  |  o o o d d d o o o o o o
2590    row 5  |  o o o d d d o o o o o o
2591           -------------------
2592 .ve
2593 
2594    Thus, any entries in the d locations are stored in the d (diagonal)
2595    submatrix, and any entries in the o locations are stored in the
2596    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2597    stored simply in the MATSEQBAIJ format for compressed row storage.
2598 
2599    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2600    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2601    In general, for PDE problems in which most nonzeros are near the diagonal,
2602    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2603    or you will get TERRIBLE performance; see the users' manual chapter on
2604    matrices.
2605 
2606    Level: intermediate
2607 
2608 .keywords: matrix, block, aij, compressed row, sparse, parallel
2609 
2610 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2611 @*/
2612 PetscErrorCode MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2613 {
2614   PetscErrorCode ierr;
2615   PetscMPIInt    size;
2616 
2617   PetscFunctionBegin;
2618   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2619   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2620   if (size > 1) {
2621     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
2622     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2623   } else {
2624     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2625     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2626   }
2627   PetscFunctionReturn(0);
2628 }
2629 
2630 #undef __FUNCT__
2631 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
2632 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2633 {
2634   Mat            mat;
2635   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2636   PetscErrorCode ierr;
2637   PetscInt       len=0;
2638 
2639   PetscFunctionBegin;
2640   *newmat       = 0;
2641   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2642   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2643   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2644 
2645   mat->factor       = matin->factor;
2646   mat->preallocated = PETSC_TRUE;
2647   mat->assembled    = PETSC_TRUE;
2648   mat->insertmode   = NOT_SET_VALUES;
2649 
2650   a      = (Mat_MPIBAIJ*)mat->data;
2651   mat->bs  = matin->bs;
2652   a->bs2   = oldmat->bs2;
2653   a->mbs   = oldmat->mbs;
2654   a->nbs   = oldmat->nbs;
2655   a->Mbs   = oldmat->Mbs;
2656   a->Nbs   = oldmat->Nbs;
2657 
2658   a->rstart       = oldmat->rstart;
2659   a->rend         = oldmat->rend;
2660   a->cstart       = oldmat->cstart;
2661   a->cend         = oldmat->cend;
2662   a->size         = oldmat->size;
2663   a->rank         = oldmat->rank;
2664   a->donotstash   = oldmat->donotstash;
2665   a->roworiented  = oldmat->roworiented;
2666   a->rowindices   = 0;
2667   a->rowvalues    = 0;
2668   a->getrowactive = PETSC_FALSE;
2669   a->barray       = 0;
2670   a->rstart_bs    = oldmat->rstart_bs;
2671   a->rend_bs      = oldmat->rend_bs;
2672   a->cstart_bs    = oldmat->cstart_bs;
2673   a->cend_bs      = oldmat->cend_bs;
2674 
2675   /* hash table stuff */
2676   a->ht           = 0;
2677   a->hd           = 0;
2678   a->ht_size      = 0;
2679   a->ht_flag      = oldmat->ht_flag;
2680   a->ht_fact      = oldmat->ht_fact;
2681   a->ht_total_ct  = 0;
2682   a->ht_insert_ct = 0;
2683 
2684   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2685   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2686   ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr);
2687   if (oldmat->colmap) {
2688 #if defined (PETSC_USE_CTABLE)
2689   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2690 #else
2691   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2692   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2693   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2694 #endif
2695   } else a->colmap = 0;
2696 
2697   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2698     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2699     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2700     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2701   } else a->garray = 0;
2702 
2703   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2704   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2705   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2706   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2707 
2708   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2709   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2710   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2711   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2712   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2713   *newmat = mat;
2714 
2715   PetscFunctionReturn(0);
2716 }
2717 
2718 #include "petscsys.h"
2719 
2720 #undef __FUNCT__
2721 #define __FUNCT__ "MatLoad_MPIBAIJ"
2722 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
2723 {
2724   Mat            A;
2725   PetscErrorCode ierr;
2726   int            fd;
2727   PetscInt       i,nz,j,rstart,rend;
2728   PetscScalar    *vals,*buf;
2729   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2730   MPI_Status     status;
2731   PetscMPIInt    rank,size,maxnz;
2732   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
2733   PetscInt       *locrowlens,*procsnz = 0,*browners;
2734   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
2735   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
2736   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2737   PetscInt       dcount,kmax,k,nzcount,tmp,mend;
2738 
2739   PetscFunctionBegin;
2740   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2741 
2742   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2743   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2744   if (!rank) {
2745     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2746     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2747     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2748   }
2749 
2750   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2751   M = header[1]; N = header[2];
2752 
2753   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2754 
2755   /*
2756      This code adds extra rows to make sure the number of rows is
2757      divisible by the blocksize
2758   */
2759   Mbs        = M/bs;
2760   extra_rows = bs - M + bs*Mbs;
2761   if (extra_rows == bs) extra_rows = 0;
2762   else                  Mbs++;
2763   if (extra_rows && !rank) {
2764     ierr = PetscLogInfo((0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr);
2765   }
2766 
2767   /* determine ownership of all rows */
2768   mbs        = Mbs/size + ((Mbs % size) > rank);
2769   m          = mbs*bs;
2770   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
2771   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2772 
2773   /* process 0 needs enough room for process with most rows */
2774   if (!rank) {
2775     mmax = rowners[1];
2776     for (i=2; i<size; i++) {
2777       mmax = PetscMax(mmax,rowners[i]);
2778     }
2779     mmax*=bs;
2780   } else mmax = m;
2781 
2782   rowners[0] = 0;
2783   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
2784   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2785   rstart = rowners[rank];
2786   rend   = rowners[rank+1];
2787 
2788   /* distribute row lengths to all processors */
2789   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
2790   if (!rank) {
2791     mend = m;
2792     if (size == 1) mend = mend - extra_rows;
2793     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
2794     for (j=mend; j<m; j++) locrowlens[j] = 1;
2795     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2796     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2797     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2798     for (j=0; j<m; j++) {
2799       procsnz[0] += locrowlens[j];
2800     }
2801     for (i=1; i<size; i++) {
2802       mend = browners[i+1] - browners[i];
2803       if (i == size-1) mend = mend - extra_rows;
2804       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
2805       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
2806       /* calculate the number of nonzeros on each processor */
2807       for (j=0; j<browners[i+1]-browners[i]; j++) {
2808         procsnz[i] += rowlengths[j];
2809       }
2810       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2811     }
2812     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2813   } else {
2814     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2815   }
2816 
2817   if (!rank) {
2818     /* determine max buffer needed and allocate it */
2819     maxnz = procsnz[0];
2820     for (i=1; i<size; i++) {
2821       maxnz = PetscMax(maxnz,procsnz[i]);
2822     }
2823     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2824 
2825     /* read in my part of the matrix column indices  */
2826     nz     = procsnz[0];
2827     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2828     mycols = ibuf;
2829     if (size == 1)  nz -= extra_rows;
2830     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2831     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2832 
2833     /* read in every ones (except the last) and ship off */
2834     for (i=1; i<size-1; i++) {
2835       nz   = procsnz[i];
2836       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2837       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2838     }
2839     /* read in the stuff for the last proc */
2840     if (size != 1) {
2841       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2842       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2843       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2844       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2845     }
2846     ierr = PetscFree(cols);CHKERRQ(ierr);
2847   } else {
2848     /* determine buffer space needed for message */
2849     nz = 0;
2850     for (i=0; i<m; i++) {
2851       nz += locrowlens[i];
2852     }
2853     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2854     mycols = ibuf;
2855     /* receive message of column indices*/
2856     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2857     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2858     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2859   }
2860 
2861   /* loop over local rows, determining number of off diagonal entries */
2862   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2863   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2864   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2865   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2866   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2867   rowcount = 0; nzcount = 0;
2868   for (i=0; i<mbs; i++) {
2869     dcount  = 0;
2870     odcount = 0;
2871     for (j=0; j<bs; j++) {
2872       kmax = locrowlens[rowcount];
2873       for (k=0; k<kmax; k++) {
2874         tmp = mycols[nzcount++]/bs;
2875         if (!mask[tmp]) {
2876           mask[tmp] = 1;
2877           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2878           else masked1[dcount++] = tmp;
2879         }
2880       }
2881       rowcount++;
2882     }
2883 
2884     dlens[i]  = dcount;
2885     odlens[i] = odcount;
2886 
2887     /* zero out the mask elements we set */
2888     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2889     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2890   }
2891 
2892   /* create our matrix */
2893   ierr = MatCreate(comm,m,m,M+extra_rows,N+extra_rows,&A);CHKERRQ(ierr);
2894   ierr = MatSetType(A,type);CHKERRQ(ierr)
2895   ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2896 
2897   /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */
2898   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2899 
2900   if (!rank) {
2901     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2902     /* read in my part of the matrix numerical values  */
2903     nz = procsnz[0];
2904     vals = buf;
2905     mycols = ibuf;
2906     if (size == 1)  nz -= extra_rows;
2907     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2908     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2909 
2910     /* insert into matrix */
2911     jj      = rstart*bs;
2912     for (i=0; i<m; i++) {
2913       ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2914       mycols += locrowlens[i];
2915       vals   += locrowlens[i];
2916       jj++;
2917     }
2918     /* read in other processors (except the last one) and ship out */
2919     for (i=1; i<size-1; i++) {
2920       nz   = procsnz[i];
2921       vals = buf;
2922       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2923       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2924     }
2925     /* the last proc */
2926     if (size != 1){
2927       nz   = procsnz[i] - extra_rows;
2928       vals = buf;
2929       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2930       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2931       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2932     }
2933     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2934   } else {
2935     /* receive numeric values */
2936     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2937 
2938     /* receive message of values*/
2939     vals   = buf;
2940     mycols = ibuf;
2941     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2942     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2943     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2944 
2945     /* insert into matrix */
2946     jj      = rstart*bs;
2947     for (i=0; i<m; i++) {
2948       ierr    = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2949       mycols += locrowlens[i];
2950       vals   += locrowlens[i];
2951       jj++;
2952     }
2953   }
2954   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2955   ierr = PetscFree(buf);CHKERRQ(ierr);
2956   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2957   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2958   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2959   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2960   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2961   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2962 
2963   *newmat = A;
2964   PetscFunctionReturn(0);
2965 }
2966 
2967 #undef __FUNCT__
2968 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
2969 /*@
2970    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2971 
2972    Input Parameters:
2973 .  mat  - the matrix
2974 .  fact - factor
2975 
2976    Collective on Mat
2977 
2978    Level: advanced
2979 
2980   Notes:
2981    This can also be set by the command line option: -mat_use_hash_table fact
2982 
2983 .keywords: matrix, hashtable, factor, HT
2984 
2985 .seealso: MatSetOption()
2986 @*/
2987 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2988 {
2989   PetscErrorCode ierr,(*f)(Mat,PetscReal);
2990 
2991   PetscFunctionBegin;
2992   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
2993   if (f) {
2994     ierr = (*f)(mat,fact);CHKERRQ(ierr);
2995   }
2996   PetscFunctionReturn(0);
2997 }
2998 
2999 #undef __FUNCT__
3000 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3001 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3002 {
3003   Mat_MPIBAIJ *baij;
3004 
3005   PetscFunctionBegin;
3006   baij = (Mat_MPIBAIJ*)mat->data;
3007   baij->ht_fact = fact;
3008   PetscFunctionReturn(0);
3009 }
3010 
3011 #undef __FUNCT__
3012 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3013 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3014 {
3015   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3016   PetscFunctionBegin;
3017   *Ad     = a->A;
3018   *Ao     = a->B;
3019   *colmap = a->garray;
3020   PetscFunctionReturn(0);
3021 }
3022