xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 0c2fff18d3777e2004c38c700aa5f85e14cc2190)
1 /*$Id: mpibaij.c,v 1.234 2001/09/25 22:56:49 balay Exp $*/
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4 #include "src/vec/vecimpl.h"
5 
6 EXTERN int MatSetUpMultiply_MPIBAIJ(Mat);
7 EXTERN int DisAssemble_MPIBAIJ(Mat);
8 EXTERN int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
9 EXTERN int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
10 EXTERN int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,PetscScalar *);
11 EXTERN int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,PetscScalar *,InsertMode);
12 EXTERN int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
13 EXTERN int MatGetRow_SeqBAIJ(Mat,int,int*,int**,PetscScalar**);
14 EXTERN int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,PetscScalar**);
15 EXTERN int MatPrintHelp_SeqBAIJ(Mat);
16 EXTERN int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar*);
17 
18 /*  UGLY, ugly, ugly
19    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
20    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
21    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
22    converts the entries into single precision and then calls ..._MatScalar() to put them
23    into the single precision data structures.
24 */
25 #if defined(PETSC_USE_MAT_SINGLE)
26 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
27 EXTERN int MatSetValues_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
28 EXTERN int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
29 EXTERN int MatSetValues_MPIBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
30 EXTERN int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
31 #else
32 #define MatSetValuesBlocked_SeqBAIJ_MatScalar      MatSetValuesBlocked_SeqBAIJ
33 #define MatSetValues_MPIBAIJ_MatScalar             MatSetValues_MPIBAIJ
34 #define MatSetValuesBlocked_MPIBAIJ_MatScalar      MatSetValuesBlocked_MPIBAIJ
35 #define MatSetValues_MPIBAIJ_HT_MatScalar          MatSetValues_MPIBAIJ_HT
36 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar   MatSetValuesBlocked_MPIBAIJ_HT
37 #endif
38 
39 #undef __FUNCT__
40 #define __FUNCT__ "MatGetRowMax_MPIBAIJ"
41 int MatGetRowMax_MPIBAIJ(Mat A,Vec v)
42 {
43   Mat_MPIBAIJ  *a = (Mat_MPIBAIJ*)A->data;
44   int          ierr,i;
45   PetscScalar  *va,*vb;
46   Vec          vtmp;
47 
48   PetscFunctionBegin;
49 
50   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
51   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
52 
53   ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr);
54   ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr);
55   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
56 
57   for (i=0; i<A->m; i++){
58     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i];
59   }
60 
61   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
62   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
63   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
64 
65   PetscFunctionReturn(0);
66 }
67 
68 EXTERN_C_BEGIN
69 #undef __FUNCT__
70 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
71 int MatStoreValues_MPIBAIJ(Mat mat)
72 {
73   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
74   int         ierr;
75 
76   PetscFunctionBegin;
77   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
78   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
79   PetscFunctionReturn(0);
80 }
81 EXTERN_C_END
82 
83 EXTERN_C_BEGIN
84 #undef __FUNCT__
85 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
86 int MatRetrieveValues_MPIBAIJ(Mat mat)
87 {
88   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
89   int         ierr;
90 
91   PetscFunctionBegin;
92   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
93   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
94   PetscFunctionReturn(0);
95 }
96 EXTERN_C_END
97 
98 /*
99      Local utility routine that creates a mapping from the global column
100    number to the local number in the off-diagonal part of the local
101    storage of the matrix.  This is done in a non scable way since the
102    length of colmap equals the global matrix length.
103 */
104 #undef __FUNCT__
105 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
106 static int CreateColmap_MPIBAIJ_Private(Mat mat)
107 {
108   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
109   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data;
110   int         nbs = B->nbs,i,bs=B->bs,ierr;
111 
112   PetscFunctionBegin;
113 #if defined (PETSC_USE_CTABLE)
114   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
115   for (i=0; i<nbs; i++){
116     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
117   }
118 #else
119   ierr = PetscMalloc((baij->Nbs+1)*sizeof(int),&baij->colmap);CHKERRQ(ierr);
120   PetscLogObjectMemory(mat,baij->Nbs*sizeof(int));
121   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr);
122   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
123 #endif
124   PetscFunctionReturn(0);
125 }
126 
127 #define CHUNKSIZE  10
128 
129 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
130 { \
131  \
132     brow = row/bs;  \
133     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
134     rmax = aimax[brow]; nrow = ailen[brow]; \
135       bcol = col/bs; \
136       ridx = row % bs; cidx = col % bs; \
137       low = 0; high = nrow; \
138       while (high-low > 3) { \
139         t = (low+high)/2; \
140         if (rp[t] > bcol) high = t; \
141         else              low  = t; \
142       } \
143       for (_i=low; _i<high; _i++) { \
144         if (rp[_i] > bcol) break; \
145         if (rp[_i] == bcol) { \
146           bap  = ap +  bs2*_i + bs*cidx + ridx; \
147           if (addv == ADD_VALUES) *bap += value;  \
148           else                    *bap  = value;  \
149           goto a_noinsert; \
150         } \
151       } \
152       if (a->nonew == 1) goto a_noinsert; \
153       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
154       if (nrow >= rmax) { \
155         /* there is no extra room in row, therefore enlarge */ \
156         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
157         MatScalar *new_a; \
158  \
159         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
160  \
161         /* malloc new storage space */ \
162         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
163         ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
164         new_j   = (int*)(new_a + bs2*new_nz); \
165         new_i   = new_j + new_nz; \
166  \
167         /* copy over old data into new slots */ \
168         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
169         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
170         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
171         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
172         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
173         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
174         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
175         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
176                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
177         /* free up old matrix storage */ \
178         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
179         if (!a->singlemalloc) { \
180           ierr = PetscFree(a->i);CHKERRQ(ierr); \
181           ierr = PetscFree(a->j);CHKERRQ(ierr);\
182         } \
183         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
184         a->singlemalloc = PETSC_TRUE; \
185  \
186         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
187         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
188         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
189         a->maxnz += bs2*CHUNKSIZE; \
190         a->reallocs++; \
191         a->nz++; \
192       } \
193       N = nrow++ - 1;  \
194       /* shift up all the later entries in this row */ \
195       for (ii=N; ii>=_i; ii--) { \
196         rp[ii+1] = rp[ii]; \
197         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
198       } \
199       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
200       rp[_i]                      = bcol;  \
201       ap[bs2*_i + bs*cidx + ridx] = value;  \
202       a_noinsert:; \
203     ailen[brow] = nrow; \
204 }
205 
206 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
207 { \
208     brow = row/bs;  \
209     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
210     rmax = bimax[brow]; nrow = bilen[brow]; \
211       bcol = col/bs; \
212       ridx = row % bs; cidx = col % bs; \
213       low = 0; high = nrow; \
214       while (high-low > 3) { \
215         t = (low+high)/2; \
216         if (rp[t] > bcol) high = t; \
217         else              low  = t; \
218       } \
219       for (_i=low; _i<high; _i++) { \
220         if (rp[_i] > bcol) break; \
221         if (rp[_i] == bcol) { \
222           bap  = ap +  bs2*_i + bs*cidx + ridx; \
223           if (addv == ADD_VALUES) *bap += value;  \
224           else                    *bap  = value;  \
225           goto b_noinsert; \
226         } \
227       } \
228       if (b->nonew == 1) goto b_noinsert; \
229       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
230       if (nrow >= rmax) { \
231         /* there is no extra room in row, therefore enlarge */ \
232         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
233         MatScalar *new_a; \
234  \
235         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
236  \
237         /* malloc new storage space */ \
238         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
239         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
240         new_j   = (int*)(new_a + bs2*new_nz); \
241         new_i   = new_j + new_nz; \
242  \
243         /* copy over old data into new slots */ \
244         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
245         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
246         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
247         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
248         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
249         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
250         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
251         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
252                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
253         /* free up old matrix storage */ \
254         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
255         if (!b->singlemalloc) { \
256           ierr = PetscFree(b->i);CHKERRQ(ierr); \
257           ierr = PetscFree(b->j);CHKERRQ(ierr); \
258         } \
259         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
260         b->singlemalloc = PETSC_TRUE; \
261  \
262         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
263         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
264         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
265         b->maxnz += bs2*CHUNKSIZE; \
266         b->reallocs++; \
267         b->nz++; \
268       } \
269       N = nrow++ - 1;  \
270       /* shift up all the later entries in this row */ \
271       for (ii=N; ii>=_i; ii--) { \
272         rp[ii+1] = rp[ii]; \
273         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
274       } \
275       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
276       rp[_i]                      = bcol;  \
277       ap[bs2*_i + bs*cidx + ridx] = value;  \
278       b_noinsert:; \
279     bilen[brow] = nrow; \
280 }
281 
282 #if defined(PETSC_USE_MAT_SINGLE)
283 #undef __FUNCT__
284 #define __FUNCT__ "MatSetValues_MPIBAIJ"
285 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
286 {
287   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
288   int         ierr,i,N = m*n;
289   MatScalar   *vsingle;
290 
291   PetscFunctionBegin;
292   if (N > b->setvalueslen) {
293     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
294     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
295     b->setvalueslen  = N;
296   }
297   vsingle = b->setvaluescopy;
298 
299   for (i=0; i<N; i++) {
300     vsingle[i] = v[i];
301   }
302   ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
303   PetscFunctionReturn(0);
304 }
305 
306 #undef __FUNCT__
307 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
308 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
309 {
310   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
311   int         ierr,i,N = m*n*b->bs2;
312   MatScalar   *vsingle;
313 
314   PetscFunctionBegin;
315   if (N > b->setvalueslen) {
316     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
317     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
318     b->setvalueslen  = N;
319   }
320   vsingle = b->setvaluescopy;
321   for (i=0; i<N; i++) {
322     vsingle[i] = v[i];
323   }
324   ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
325   PetscFunctionReturn(0);
326 }
327 
328 #undef __FUNCT__
329 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
330 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
331 {
332   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
333   int         ierr,i,N = m*n;
334   MatScalar   *vsingle;
335 
336   PetscFunctionBegin;
337   if (N > b->setvalueslen) {
338     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
339     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
340     b->setvalueslen  = N;
341   }
342   vsingle = b->setvaluescopy;
343   for (i=0; i<N; i++) {
344     vsingle[i] = v[i];
345   }
346   ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
347   PetscFunctionReturn(0);
348 }
349 
350 #undef __FUNCT__
351 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
352 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
353 {
354   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
355   int         ierr,i,N = m*n*b->bs2;
356   MatScalar   *vsingle;
357 
358   PetscFunctionBegin;
359   if (N > b->setvalueslen) {
360     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
361     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
362     b->setvalueslen  = N;
363   }
364   vsingle = b->setvaluescopy;
365   for (i=0; i<N; i++) {
366     vsingle[i] = v[i];
367   }
368   ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
369   PetscFunctionReturn(0);
370 }
371 #endif
372 
373 #undef __FUNCT__
374 #define __FUNCT__ "MatSetValues_MPIBAIJ"
375 int MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
376 {
377   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
378   MatScalar   value;
379   PetscTruth  roworiented = baij->roworiented;
380   int         ierr,i,j,row,col;
381   int         rstart_orig=baij->rstart_bs;
382   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
383   int         cend_orig=baij->cend_bs,bs=baij->bs;
384 
385   /* Some Variables required in the macro */
386   Mat         A = baij->A;
387   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data;
388   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
389   MatScalar   *aa=a->a;
390 
391   Mat         B = baij->B;
392   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data;
393   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
394   MatScalar   *ba=b->a;
395 
396   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
397   int         low,high,t,ridx,cidx,bs2=a->bs2;
398   MatScalar   *ap,*bap;
399 
400   PetscFunctionBegin;
401   for (i=0; i<m; i++) {
402     if (im[i] < 0) continue;
403 #if defined(PETSC_USE_BOPT_g)
404     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
405 #endif
406     if (im[i] >= rstart_orig && im[i] < rend_orig) {
407       row = im[i] - rstart_orig;
408       for (j=0; j<n; j++) {
409         if (in[j] >= cstart_orig && in[j] < cend_orig){
410           col = in[j] - cstart_orig;
411           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
412           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
413           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
414         } else if (in[j] < 0) continue;
415 #if defined(PETSC_USE_BOPT_g)
416         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
417 #endif
418         else {
419           if (mat->was_assembled) {
420             if (!baij->colmap) {
421               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
422             }
423 #if defined (PETSC_USE_CTABLE)
424             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
425             col  = col - 1;
426 #else
427             col = baij->colmap[in[j]/bs] - 1;
428 #endif
429             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
430               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
431               col =  in[j];
432               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
433               B = baij->B;
434               b = (Mat_SeqBAIJ*)(B)->data;
435               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
436               ba=b->a;
437             } else col += in[j]%bs;
438           } else col = in[j];
439           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
440           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
441           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
442         }
443       }
444     } else {
445       if (!baij->donotstash) {
446         if (roworiented) {
447           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
448         } else {
449           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
450         }
451       }
452     }
453   }
454   PetscFunctionReturn(0);
455 }
456 
457 #undef __FUNCT__
458 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar"
459 int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
460 {
461   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
462   MatScalar   *value,*barray=baij->barray;
463   PetscTruth  roworiented = baij->roworiented;
464   int         ierr,i,j,ii,jj,row,col,rstart=baij->rstart;
465   int         rend=baij->rend,cstart=baij->cstart,stepval;
466   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
467 
468   PetscFunctionBegin;
469   if(!barray) {
470     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
471     baij->barray = barray;
472   }
473 
474   if (roworiented) {
475     stepval = (n-1)*bs;
476   } else {
477     stepval = (m-1)*bs;
478   }
479   for (i=0; i<m; i++) {
480     if (im[i] < 0) continue;
481 #if defined(PETSC_USE_BOPT_g)
482     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs);
483 #endif
484     if (im[i] >= rstart && im[i] < rend) {
485       row = im[i] - rstart;
486       for (j=0; j<n; j++) {
487         /* If NumCol = 1 then a copy is not required */
488         if ((roworiented) && (n == 1)) {
489           barray = v + i*bs2;
490         } else if((!roworiented) && (m == 1)) {
491           barray = v + j*bs2;
492         } else { /* Here a copy is required */
493           if (roworiented) {
494             value = v + i*(stepval+bs)*bs + j*bs;
495           } else {
496             value = v + j*(stepval+bs)*bs + i*bs;
497           }
498           for (ii=0; ii<bs; ii++,value+=stepval) {
499             for (jj=0; jj<bs; jj++) {
500               *barray++  = *value++;
501             }
502           }
503           barray -=bs2;
504         }
505 
506         if (in[j] >= cstart && in[j] < cend){
507           col  = in[j] - cstart;
508           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
509         }
510         else if (in[j] < 0) continue;
511 #if defined(PETSC_USE_BOPT_g)
512         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs);}
513 #endif
514         else {
515           if (mat->was_assembled) {
516             if (!baij->colmap) {
517               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
518             }
519 
520 #if defined(PETSC_USE_BOPT_g)
521 #if defined (PETSC_USE_CTABLE)
522             { int data;
523               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
524               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
525             }
526 #else
527             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
528 #endif
529 #endif
530 #if defined (PETSC_USE_CTABLE)
531 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
532             col  = (col - 1)/bs;
533 #else
534             col = (baij->colmap[in[j]] - 1)/bs;
535 #endif
536             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
537               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
538               col =  in[j];
539             }
540           }
541           else col = in[j];
542           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
543         }
544       }
545     } else {
546       if (!baij->donotstash) {
547         if (roworiented) {
548           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
549         } else {
550           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
551         }
552       }
553     }
554   }
555   PetscFunctionReturn(0);
556 }
557 
558 #define HASH_KEY 0.6180339887
559 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
560 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
561 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
562 #undef __FUNCT__
563 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar"
564 int MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
565 {
566   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
567   PetscTruth  roworiented = baij->roworiented;
568   int         ierr,i,j,row,col;
569   int         rstart_orig=baij->rstart_bs;
570   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
571   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
572   PetscReal   tmp;
573   MatScalar   **HD = baij->hd,value;
574 #if defined(PETSC_USE_BOPT_g)
575   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
576 #endif
577 
578   PetscFunctionBegin;
579 
580   for (i=0; i<m; i++) {
581 #if defined(PETSC_USE_BOPT_g)
582     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
583     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
584 #endif
585       row = im[i];
586     if (row >= rstart_orig && row < rend_orig) {
587       for (j=0; j<n; j++) {
588         col = in[j];
589         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
590         /* Look up into the Hash Table */
591         key = (row/bs)*Nbs+(col/bs)+1;
592         h1  = HASH(size,key,tmp);
593 
594 
595         idx = h1;
596 #if defined(PETSC_USE_BOPT_g)
597         insert_ct++;
598         total_ct++;
599         if (HT[idx] != key) {
600           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
601           if (idx == size) {
602             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
603             if (idx == h1) {
604               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table");
605             }
606           }
607         }
608 #else
609         if (HT[idx] != key) {
610           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
611           if (idx == size) {
612             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
613             if (idx == h1) {
614               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table");
615             }
616           }
617         }
618 #endif
619         /* A HASH table entry is found, so insert the values at the correct address */
620         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
621         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
622       }
623     } else {
624       if (!baij->donotstash) {
625         if (roworiented) {
626           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
627         } else {
628           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
629         }
630       }
631     }
632   }
633 #if defined(PETSC_USE_BOPT_g)
634   baij->ht_total_ct = total_ct;
635   baij->ht_insert_ct = insert_ct;
636 #endif
637   PetscFunctionReturn(0);
638 }
639 
640 #undef __FUNCT__
641 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar"
642 int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
643 {
644   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
645   PetscTruth  roworiented = baij->roworiented;
646   int         ierr,i,j,ii,jj,row,col;
647   int         rstart=baij->rstart ;
648   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
649   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
650   PetscReal   tmp;
651   MatScalar   **HD = baij->hd,*baij_a;
652   MatScalar   *v_t,*value;
653 #if defined(PETSC_USE_BOPT_g)
654   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
655 #endif
656 
657   PetscFunctionBegin;
658 
659   if (roworiented) {
660     stepval = (n-1)*bs;
661   } else {
662     stepval = (m-1)*bs;
663   }
664   for (i=0; i<m; i++) {
665 #if defined(PETSC_USE_BOPT_g)
666     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
667     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
668 #endif
669     row   = im[i];
670     v_t   = v + i*bs2;
671     if (row >= rstart && row < rend) {
672       for (j=0; j<n; j++) {
673         col = in[j];
674 
675         /* Look up into the Hash Table */
676         key = row*Nbs+col+1;
677         h1  = HASH(size,key,tmp);
678 
679         idx = h1;
680 #if defined(PETSC_USE_BOPT_g)
681         total_ct++;
682         insert_ct++;
683        if (HT[idx] != key) {
684           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
685           if (idx == size) {
686             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
687             if (idx == h1) {
688               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table");
689             }
690           }
691         }
692 #else
693         if (HT[idx] != key) {
694           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
695           if (idx == size) {
696             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
697             if (idx == h1) {
698               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table");
699             }
700           }
701         }
702 #endif
703         baij_a = HD[idx];
704         if (roworiented) {
705           /*value = v + i*(stepval+bs)*bs + j*bs;*/
706           /* value = v + (i*(stepval+bs)+j)*bs; */
707           value = v_t;
708           v_t  += bs;
709           if (addv == ADD_VALUES) {
710             for (ii=0; ii<bs; ii++,value+=stepval) {
711               for (jj=ii; jj<bs2; jj+=bs) {
712                 baij_a[jj]  += *value++;
713               }
714             }
715           } else {
716             for (ii=0; ii<bs; ii++,value+=stepval) {
717               for (jj=ii; jj<bs2; jj+=bs) {
718                 baij_a[jj]  = *value++;
719               }
720             }
721           }
722         } else {
723           value = v + j*(stepval+bs)*bs + i*bs;
724           if (addv == ADD_VALUES) {
725             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
726               for (jj=0; jj<bs; jj++) {
727                 baij_a[jj]  += *value++;
728               }
729             }
730           } else {
731             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
732               for (jj=0; jj<bs; jj++) {
733                 baij_a[jj]  = *value++;
734               }
735             }
736           }
737         }
738       }
739     } else {
740       if (!baij->donotstash) {
741         if (roworiented) {
742           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
743         } else {
744           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
745         }
746       }
747     }
748   }
749 #if defined(PETSC_USE_BOPT_g)
750   baij->ht_total_ct = total_ct;
751   baij->ht_insert_ct = insert_ct;
752 #endif
753   PetscFunctionReturn(0);
754 }
755 
756 #undef __FUNCT__
757 #define __FUNCT__ "MatGetValues_MPIBAIJ"
758 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
759 {
760   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
761   int        bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
762   int        bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
763 
764   PetscFunctionBegin;
765   for (i=0; i<m; i++) {
766     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
767     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
768     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
769       row = idxm[i] - bsrstart;
770       for (j=0; j<n; j++) {
771         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
772         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
773         if (idxn[j] >= bscstart && idxn[j] < bscend){
774           col = idxn[j] - bscstart;
775           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
776         } else {
777           if (!baij->colmap) {
778             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
779           }
780 #if defined (PETSC_USE_CTABLE)
781           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
782           data --;
783 #else
784           data = baij->colmap[idxn[j]/bs]-1;
785 #endif
786           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
787           else {
788             col  = data + idxn[j]%bs;
789             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
790           }
791         }
792       }
793     } else {
794       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
795     }
796   }
797  PetscFunctionReturn(0);
798 }
799 
800 #undef __FUNCT__
801 #define __FUNCT__ "MatNorm_MPIBAIJ"
802 int MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *norm)
803 {
804   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
805   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
806   int        ierr,i,bs2=baij->bs2;
807   PetscReal  sum = 0.0;
808   MatScalar  *v;
809 
810   PetscFunctionBegin;
811   if (baij->size == 1) {
812     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
813   } else {
814     if (type == NORM_FROBENIUS) {
815       v = amat->a;
816       for (i=0; i<amat->nz*bs2; i++) {
817 #if defined(PETSC_USE_COMPLEX)
818         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
819 #else
820         sum += (*v)*(*v); v++;
821 #endif
822       }
823       v = bmat->a;
824       for (i=0; i<bmat->nz*bs2; i++) {
825 #if defined(PETSC_USE_COMPLEX)
826         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
827 #else
828         sum += (*v)*(*v); v++;
829 #endif
830       }
831       ierr = MPI_Allreduce(&sum,norm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
832       *norm = sqrt(*norm);
833     } else {
834       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
835     }
836   }
837   PetscFunctionReturn(0);
838 }
839 
840 
841 /*
842   Creates the hash table, and sets the table
843   This table is created only once.
844   If new entried need to be added to the matrix
845   then the hash table has to be destroyed and
846   recreated.
847 */
848 #undef __FUNCT__
849 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
850 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
851 {
852   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
853   Mat         A = baij->A,B=baij->B;
854   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
855   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
856   int         size,bs2=baij->bs2,rstart=baij->rstart,ierr;
857   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
858   int         *HT,key;
859   MatScalar   **HD;
860   PetscReal   tmp;
861 #if defined(PETSC_USE_BOPT_g)
862   int         ct=0,max=0;
863 #endif
864 
865   PetscFunctionBegin;
866   baij->ht_size=(int)(factor*nz);
867   size = baij->ht_size;
868 
869   if (baij->ht) {
870     PetscFunctionReturn(0);
871   }
872 
873   /* Allocate Memory for Hash Table */
874   ierr     = PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr);
875   baij->ht = (int*)(baij->hd + size);
876   HD       = baij->hd;
877   HT       = baij->ht;
878 
879 
880   ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(PetscScalar*)));CHKERRQ(ierr);
881 
882 
883   /* Loop Over A */
884   for (i=0; i<a->mbs; i++) {
885     for (j=ai[i]; j<ai[i+1]; j++) {
886       row = i+rstart;
887       col = aj[j]+cstart;
888 
889       key = row*Nbs + col + 1;
890       h1  = HASH(size,key,tmp);
891       for (k=0; k<size; k++){
892         if (HT[(h1+k)%size] == 0.0) {
893           HT[(h1+k)%size] = key;
894           HD[(h1+k)%size] = a->a + j*bs2;
895           break;
896 #if defined(PETSC_USE_BOPT_g)
897         } else {
898           ct++;
899 #endif
900         }
901       }
902 #if defined(PETSC_USE_BOPT_g)
903       if (k> max) max = k;
904 #endif
905     }
906   }
907   /* Loop Over B */
908   for (i=0; i<b->mbs; i++) {
909     for (j=bi[i]; j<bi[i+1]; j++) {
910       row = i+rstart;
911       col = garray[bj[j]];
912       key = row*Nbs + col + 1;
913       h1  = HASH(size,key,tmp);
914       for (k=0; k<size; k++){
915         if (HT[(h1+k)%size] == 0.0) {
916           HT[(h1+k)%size] = key;
917           HD[(h1+k)%size] = b->a + j*bs2;
918           break;
919 #if defined(PETSC_USE_BOPT_g)
920         } else {
921           ct++;
922 #endif
923         }
924       }
925 #if defined(PETSC_USE_BOPT_g)
926       if (k> max) max = k;
927 #endif
928     }
929   }
930 
931   /* Print Summary */
932 #if defined(PETSC_USE_BOPT_g)
933   for (i=0,j=0; i<size; i++) {
934     if (HT[i]) {j++;}
935   }
936   PetscLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max);
937 #endif
938   PetscFunctionReturn(0);
939 }
940 
941 #undef __FUNCT__
942 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
943 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
944 {
945   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
946   int         ierr,nstash,reallocs;
947   InsertMode  addv;
948 
949   PetscFunctionBegin;
950   if (baij->donotstash) {
951     PetscFunctionReturn(0);
952   }
953 
954   /* make sure all processors are either in INSERTMODE or ADDMODE */
955   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
956   if (addv == (ADD_VALUES|INSERT_VALUES)) {
957     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
958   }
959   mat->insertmode = addv; /* in case this processor had no cache */
960 
961   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
962   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
963   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
964   PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
965   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
966   PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
967   PetscFunctionReturn(0);
968 }
969 
970 #undef __FUNCT__
971 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
972 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
973 {
974   Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data;
975   Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
976   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
977   int         *row,*col,other_disassembled;
978   PetscTruth  r1,r2,r3;
979   MatScalar   *val;
980   InsertMode  addv = mat->insertmode;
981 
982   PetscFunctionBegin;
983   if (!baij->donotstash) {
984     while (1) {
985       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
986       if (!flg) break;
987 
988       for (i=0; i<n;) {
989         /* Now identify the consecutive vals belonging to the same row */
990         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
991         if (j < n) ncols = j-i;
992         else       ncols = n-i;
993         /* Now assemble all these values with a single function call */
994         ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
995         i = j;
996       }
997     }
998     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
999     /* Now process the block-stash. Since the values are stashed column-oriented,
1000        set the roworiented flag to column oriented, and after MatSetValues()
1001        restore the original flags */
1002     r1 = baij->roworiented;
1003     r2 = a->roworiented;
1004     r3 = b->roworiented;
1005     baij->roworiented = PETSC_FALSE;
1006     a->roworiented    = PETSC_FALSE;
1007     b->roworiented    = PETSC_FALSE;
1008     while (1) {
1009       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1010       if (!flg) break;
1011 
1012       for (i=0; i<n;) {
1013         /* Now identify the consecutive vals belonging to the same row */
1014         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1015         if (j < n) ncols = j-i;
1016         else       ncols = n-i;
1017         ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
1018         i = j;
1019       }
1020     }
1021     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
1022     baij->roworiented = r1;
1023     a->roworiented    = r2;
1024     b->roworiented    = r3;
1025   }
1026 
1027   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
1028   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
1029 
1030   /* determine if any processor has disassembled, if so we must
1031      also disassemble ourselfs, in order that we may reassemble. */
1032   /*
1033      if nonzero structure of submatrix B cannot change then we know that
1034      no processor disassembled thus we can skip this stuff
1035   */
1036   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
1037     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
1038     if (mat->was_assembled && !other_disassembled) {
1039       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
1040     }
1041   }
1042 
1043   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1044     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
1045   }
1046   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1047   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1048 
1049 #if defined(PETSC_USE_BOPT_g)
1050   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1051     PetscLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
1052     baij->ht_total_ct  = 0;
1053     baij->ht_insert_ct = 0;
1054   }
1055 #endif
1056   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1057     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1058     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1059     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1060   }
1061 
1062   if (baij->rowvalues) {
1063     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1064     baij->rowvalues = 0;
1065   }
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 #undef __FUNCT__
1070 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
1071 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1072 {
1073   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
1074   int               ierr,bs = baij->bs,size = baij->size,rank = baij->rank;
1075   PetscTruth        isascii,isdraw;
1076   PetscViewer       sviewer;
1077   PetscViewerFormat format;
1078 
1079   PetscFunctionBegin;
1080   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
1081   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1082   if (isascii) {
1083     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1084     if (format == PETSC_VIEWER_ASCII_INFO_LONG) {
1085       MatInfo info;
1086       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
1087       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1088       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
1089               rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
1090               baij->bs,(int)info.memory);CHKERRQ(ierr);
1091       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1092       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1093       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1094       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1095       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1096       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1097       PetscFunctionReturn(0);
1098     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1099       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
1100       PetscFunctionReturn(0);
1101     }
1102   }
1103 
1104   if (isdraw) {
1105     PetscDraw       draw;
1106     PetscTruth isnull;
1107     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1108     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1109   }
1110 
1111   if (size == 1) {
1112     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
1113     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1114   } else {
1115     /* assemble the entire matrix onto first processor. */
1116     Mat         A;
1117     Mat_SeqBAIJ *Aloc;
1118     int         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1119     MatScalar   *a;
1120 
1121     if (!rank) {
1122       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1123     } else {
1124       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1125     }
1126     PetscLogObjectParent(mat,A);
1127 
1128     /* copy over the A part */
1129     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1130     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1131     ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
1132 
1133     for (i=0; i<mbs; i++) {
1134       rvals[0] = bs*(baij->rstart + i);
1135       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1136       for (j=ai[i]; j<ai[i+1]; j++) {
1137         col = (baij->cstart+aj[j])*bs;
1138         for (k=0; k<bs; k++) {
1139           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1140           col++; a += bs;
1141         }
1142       }
1143     }
1144     /* copy over the B part */
1145     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1146     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1147     for (i=0; i<mbs; i++) {
1148       rvals[0] = bs*(baij->rstart + i);
1149       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1150       for (j=ai[i]; j<ai[i+1]; j++) {
1151         col = baij->garray[aj[j]]*bs;
1152         for (k=0; k<bs; k++) {
1153           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1154           col++; a += bs;
1155         }
1156       }
1157     }
1158     ierr = PetscFree(rvals);CHKERRQ(ierr);
1159     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1160     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1161     /*
1162        Everyone has to call to draw the matrix since the graphics waits are
1163        synchronized across all processors that share the PetscDraw object
1164     */
1165     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1166     if (!rank) {
1167       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1168       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1169     }
1170     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1171     ierr = MatDestroy(A);CHKERRQ(ierr);
1172   }
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 #undef __FUNCT__
1177 #define __FUNCT__ "MatView_MPIBAIJ"
1178 int MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1179 {
1180   int        ierr;
1181   PetscTruth isascii,isdraw,issocket,isbinary;
1182 
1183   PetscFunctionBegin;
1184   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
1185   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1186   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1187   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1188   if (isascii || isdraw || issocket || isbinary) {
1189     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1190   } else {
1191     SETERRQ1(1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1192   }
1193   PetscFunctionReturn(0);
1194 }
1195 
1196 #undef __FUNCT__
1197 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1198 int MatDestroy_MPIBAIJ(Mat mat)
1199 {
1200   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1201   int         ierr;
1202 
1203   PetscFunctionBegin;
1204 #if defined(PETSC_USE_LOG)
1205   PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N);
1206 #endif
1207   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1208   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1209   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1210   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1211   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1212 #if defined (PETSC_USE_CTABLE)
1213   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1214 #else
1215   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1216 #endif
1217   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1218   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1219   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1220   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1221   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1222   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
1223 #if defined(PETSC_USE_MAT_SINGLE)
1224   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
1225 #endif
1226   ierr = PetscFree(baij);CHKERRQ(ierr);
1227   PetscFunctionReturn(0);
1228 }
1229 
1230 #undef __FUNCT__
1231 #define __FUNCT__ "MatMult_MPIBAIJ"
1232 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1233 {
1234   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1235   int         ierr,nt;
1236 
1237   PetscFunctionBegin;
1238   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1239   if (nt != A->n) {
1240     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1241   }
1242   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1243   if (nt != A->m) {
1244     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1245   }
1246   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1247   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1248   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1249   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1250   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1251   PetscFunctionReturn(0);
1252 }
1253 
1254 #undef __FUNCT__
1255 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1256 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1257 {
1258   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1259   int        ierr;
1260 
1261   PetscFunctionBegin;
1262   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1263   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1264   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1265   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1266   PetscFunctionReturn(0);
1267 }
1268 
1269 #undef __FUNCT__
1270 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1271 int MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1272 {
1273   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1274   int         ierr;
1275 
1276   PetscFunctionBegin;
1277   /* do nondiagonal part */
1278   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1279   /* send it on its way */
1280   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1281   /* do local part */
1282   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1283   /* receive remote parts: note this assumes the values are not actually */
1284   /* inserted in yy until the next line, which is true for my implementation*/
1285   /* but is not perhaps always true. */
1286   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1287   PetscFunctionReturn(0);
1288 }
1289 
1290 #undef __FUNCT__
1291 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1292 int MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1293 {
1294   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1295   int         ierr;
1296 
1297   PetscFunctionBegin;
1298   /* do nondiagonal part */
1299   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1300   /* send it on its way */
1301   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1302   /* do local part */
1303   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1304   /* receive remote parts: note this assumes the values are not actually */
1305   /* inserted in yy until the next line, which is true for my implementation*/
1306   /* but is not perhaps always true. */
1307   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 /*
1312   This only works correctly for square matrices where the subblock A->A is the
1313    diagonal block
1314 */
1315 #undef __FUNCT__
1316 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1317 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1318 {
1319   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1320   int         ierr;
1321 
1322   PetscFunctionBegin;
1323   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1324   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1325   PetscFunctionReturn(0);
1326 }
1327 
1328 #undef __FUNCT__
1329 #define __FUNCT__ "MatScale_MPIBAIJ"
1330 int MatScale_MPIBAIJ(PetscScalar *aa,Mat A)
1331 {
1332   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1333   int         ierr;
1334 
1335   PetscFunctionBegin;
1336   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1337   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1338   PetscFunctionReturn(0);
1339 }
1340 
1341 #undef __FUNCT__
1342 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1343 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1344 {
1345   Mat_MPIBAIJ  *mat = (Mat_MPIBAIJ*)matin->data;
1346   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1347   int          bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1348   int          nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1349   int          *cmap,*idx_p,cstart = mat->cstart;
1350 
1351   PetscFunctionBegin;
1352   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1353   mat->getrowactive = PETSC_TRUE;
1354 
1355   if (!mat->rowvalues && (idx || v)) {
1356     /*
1357         allocate enough space to hold information from the longest row.
1358     */
1359     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1360     int     max = 1,mbs = mat->mbs,tmp;
1361     for (i=0; i<mbs; i++) {
1362       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1363       if (max < tmp) { max = tmp; }
1364     }
1365     ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1366     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1367   }
1368 
1369   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1370   lrow = row - brstart;
1371 
1372   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1373   if (!v)   {pvA = 0; pvB = 0;}
1374   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1375   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1376   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1377   nztot = nzA + nzB;
1378 
1379   cmap  = mat->garray;
1380   if (v  || idx) {
1381     if (nztot) {
1382       /* Sort by increasing column numbers, assuming A and B already sorted */
1383       int imark = -1;
1384       if (v) {
1385         *v = v_p = mat->rowvalues;
1386         for (i=0; i<nzB; i++) {
1387           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1388           else break;
1389         }
1390         imark = i;
1391         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1392         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1393       }
1394       if (idx) {
1395         *idx = idx_p = mat->rowindices;
1396         if (imark > -1) {
1397           for (i=0; i<imark; i++) {
1398             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1399           }
1400         } else {
1401           for (i=0; i<nzB; i++) {
1402             if (cmap[cworkB[i]/bs] < cstart)
1403               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1404             else break;
1405           }
1406           imark = i;
1407         }
1408         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1409         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1410       }
1411     } else {
1412       if (idx) *idx = 0;
1413       if (v)   *v   = 0;
1414     }
1415   }
1416   *nz = nztot;
1417   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1418   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1419   PetscFunctionReturn(0);
1420 }
1421 
1422 #undef __FUNCT__
1423 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1424 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1425 {
1426   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1427 
1428   PetscFunctionBegin;
1429   if (baij->getrowactive == PETSC_FALSE) {
1430     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1431   }
1432   baij->getrowactive = PETSC_FALSE;
1433   PetscFunctionReturn(0);
1434 }
1435 
1436 #undef __FUNCT__
1437 #define __FUNCT__ "MatGetBlockSize_MPIBAIJ"
1438 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
1439 {
1440   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1441 
1442   PetscFunctionBegin;
1443   *bs = baij->bs;
1444   PetscFunctionReturn(0);
1445 }
1446 
1447 #undef __FUNCT__
1448 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1449 int MatZeroEntries_MPIBAIJ(Mat A)
1450 {
1451   Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data;
1452   int         ierr;
1453 
1454   PetscFunctionBegin;
1455   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1456   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1457   PetscFunctionReturn(0);
1458 }
1459 
1460 #undef __FUNCT__
1461 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1462 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1463 {
1464   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data;
1465   Mat         A = a->A,B = a->B;
1466   int         ierr;
1467   PetscReal   isend[5],irecv[5];
1468 
1469   PetscFunctionBegin;
1470   info->block_size     = (PetscReal)a->bs;
1471   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1472   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1473   isend[3] = info->memory;  isend[4] = info->mallocs;
1474   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1475   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1476   isend[3] += info->memory;  isend[4] += info->mallocs;
1477   if (flag == MAT_LOCAL) {
1478     info->nz_used      = isend[0];
1479     info->nz_allocated = isend[1];
1480     info->nz_unneeded  = isend[2];
1481     info->memory       = isend[3];
1482     info->mallocs      = isend[4];
1483   } else if (flag == MAT_GLOBAL_MAX) {
1484     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1485     info->nz_used      = irecv[0];
1486     info->nz_allocated = irecv[1];
1487     info->nz_unneeded  = irecv[2];
1488     info->memory       = irecv[3];
1489     info->mallocs      = irecv[4];
1490   } else if (flag == MAT_GLOBAL_SUM) {
1491     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1492     info->nz_used      = irecv[0];
1493     info->nz_allocated = irecv[1];
1494     info->nz_unneeded  = irecv[2];
1495     info->memory       = irecv[3];
1496     info->mallocs      = irecv[4];
1497   } else {
1498     SETERRQ1(1,"Unknown MatInfoType argument %d",flag);
1499   }
1500   info->rows_global       = (PetscReal)A->M;
1501   info->columns_global    = (PetscReal)A->N;
1502   info->rows_local        = (PetscReal)A->m;
1503   info->columns_local     = (PetscReal)A->N;
1504   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1505   info->fill_ratio_needed = 0;
1506   info->factor_mallocs    = 0;
1507   PetscFunctionReturn(0);
1508 }
1509 
1510 #undef __FUNCT__
1511 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1512 int MatSetOption_MPIBAIJ(Mat A,MatOption op)
1513 {
1514   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1515   int         ierr;
1516 
1517   PetscFunctionBegin;
1518   switch (op) {
1519   case MAT_NO_NEW_NONZERO_LOCATIONS:
1520   case MAT_YES_NEW_NONZERO_LOCATIONS:
1521   case MAT_COLUMNS_UNSORTED:
1522   case MAT_COLUMNS_SORTED:
1523   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1524   case MAT_KEEP_ZEROED_ROWS:
1525   case MAT_NEW_NONZERO_LOCATION_ERR:
1526     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1527     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1528     break;
1529   case MAT_ROW_ORIENTED:
1530     a->roworiented = PETSC_TRUE;
1531     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1532     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1533     break;
1534   case MAT_ROWS_SORTED:
1535   case MAT_ROWS_UNSORTED:
1536   case MAT_YES_NEW_DIAGONALS:
1537   case MAT_USE_SINGLE_PRECISION_SOLVES:
1538     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1539     break;
1540   case MAT_COLUMN_ORIENTED:
1541     a->roworiented = PETSC_FALSE;
1542     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1543     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1544     break;
1545   case MAT_IGNORE_OFF_PROC_ENTRIES:
1546     a->donotstash = PETSC_TRUE;
1547     break;
1548   case MAT_NO_NEW_DIAGONALS:
1549     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1550     break;
1551   case MAT_USE_HASH_TABLE:
1552     a->ht_flag = PETSC_TRUE;
1553     break;
1554   default:
1555     SETERRQ(PETSC_ERR_SUP,"unknown option");
1556     break;
1557   }
1558   PetscFunctionReturn(0);
1559 }
1560 
1561 #undef __FUNCT__
1562 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1563 int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1564 {
1565   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data;
1566   Mat_SeqBAIJ *Aloc;
1567   Mat         B;
1568   int         ierr,M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col;
1569   int         bs=baij->bs,mbs=baij->mbs;
1570   MatScalar   *a;
1571 
1572   PetscFunctionBegin;
1573   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1574   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1575 
1576   /* copy over the A part */
1577   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1578   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1579   ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
1580 
1581   for (i=0; i<mbs; i++) {
1582     rvals[0] = bs*(baij->rstart + i);
1583     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1584     for (j=ai[i]; j<ai[i+1]; j++) {
1585       col = (baij->cstart+aj[j])*bs;
1586       for (k=0; k<bs; k++) {
1587         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1588         col++; a += bs;
1589       }
1590     }
1591   }
1592   /* copy over the B part */
1593   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1594   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1595   for (i=0; i<mbs; i++) {
1596     rvals[0] = bs*(baij->rstart + i);
1597     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1598     for (j=ai[i]; j<ai[i+1]; j++) {
1599       col = baij->garray[aj[j]]*bs;
1600       for (k=0; k<bs; k++) {
1601         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1602         col++; a += bs;
1603       }
1604     }
1605   }
1606   ierr = PetscFree(rvals);CHKERRQ(ierr);
1607   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1608   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1609 
1610   if (matout) {
1611     *matout = B;
1612   } else {
1613     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1614   }
1615   PetscFunctionReturn(0);
1616 }
1617 
1618 #undef __FUNCT__
1619 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1620 int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1621 {
1622   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1623   Mat         a = baij->A,b = baij->B;
1624   int         ierr,s1,s2,s3;
1625 
1626   PetscFunctionBegin;
1627   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1628   if (rr) {
1629     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1630     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1631     /* Overlap communication with computation. */
1632     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1633   }
1634   if (ll) {
1635     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1636     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1637     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1638   }
1639   /* scale  the diagonal block */
1640   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1641 
1642   if (rr) {
1643     /* Do a scatter end and then right scale the off-diagonal block */
1644     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1645     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1646   }
1647 
1648   PetscFunctionReturn(0);
1649 }
1650 
1651 #undef __FUNCT__
1652 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1653 int MatZeroRows_MPIBAIJ(Mat A,IS is,PetscScalar *diag)
1654 {
1655   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1656   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
1657   int            *procs,*nprocs,j,idx,nsends,*work,row;
1658   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1659   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1660   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1661   MPI_Comm       comm = A->comm;
1662   MPI_Request    *send_waits,*recv_waits;
1663   MPI_Status     recv_status,*send_status;
1664   IS             istmp;
1665   PetscTruth     found;
1666 
1667   PetscFunctionBegin;
1668   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
1669   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1670 
1671   /*  first count number of contributors to each processor */
1672   ierr  = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
1673   ierr  = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
1674   procs = nprocs + size;
1675   ierr  = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
1676   for (i=0; i<N; i++) {
1677     idx   = rows[i];
1678     found = PETSC_FALSE;
1679     for (j=0; j<size; j++) {
1680       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1681         nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
1682       }
1683     }
1684     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1685   }
1686   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
1687 
1688   /* inform other processors of number of messages and max length*/
1689   ierr   = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr);
1690   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
1691   nmax   = work[rank];
1692   nrecvs = work[size+rank];
1693   ierr   = PetscFree(work);CHKERRQ(ierr);
1694 
1695   /* post receives:   */
1696   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
1697   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1698   for (i=0; i<nrecvs; i++) {
1699     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1700   }
1701 
1702   /* do sends:
1703      1) starts[i] gives the starting index in svalues for stuff going to
1704      the ith processor
1705   */
1706   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
1707   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1708   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
1709   starts[0]  = 0;
1710   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1711   for (i=0; i<N; i++) {
1712     svalues[starts[owner[i]]++] = rows[i];
1713   }
1714   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
1715 
1716   starts[0] = 0;
1717   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1718   count = 0;
1719   for (i=0; i<size; i++) {
1720     if (procs[i]) {
1721       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1722     }
1723   }
1724   ierr = PetscFree(starts);CHKERRQ(ierr);
1725 
1726   base = owners[rank]*bs;
1727 
1728   /*  wait on receives */
1729   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
1730   source = lens + nrecvs;
1731   count  = nrecvs; slen = 0;
1732   while (count) {
1733     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1734     /* unpack receives into our local space */
1735     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1736     source[imdex]  = recv_status.MPI_SOURCE;
1737     lens[imdex]    = n;
1738     slen          += n;
1739     count--;
1740   }
1741   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1742 
1743   /* move the data into the send scatter */
1744   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
1745   count = 0;
1746   for (i=0; i<nrecvs; i++) {
1747     values = rvalues + i*nmax;
1748     for (j=0; j<lens[i]; j++) {
1749       lrows[count++] = values[j] - base;
1750     }
1751   }
1752   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1753   ierr = PetscFree(lens);CHKERRQ(ierr);
1754   ierr = PetscFree(owner);CHKERRQ(ierr);
1755   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1756 
1757   /* actually zap the local rows */
1758   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1759   PetscLogObjectParent(A,istmp);
1760 
1761   /*
1762         Zero the required rows. If the "diagonal block" of the matrix
1763      is square and the user wishes to set the diagonal we use seperate
1764      code so that MatSetValues() is not called for each diagonal allocating
1765      new memory, thus calling lots of mallocs and slowing things down.
1766 
1767        Contributed by: Mathew Knepley
1768   */
1769   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1770   ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr);
1771   if (diag && (l->A->M == l->A->N)) {
1772     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr);
1773   } else if (diag) {
1774     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1775     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1776       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1777 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1778     }
1779     for (i=0; i<slen; i++) {
1780       row  = lrows[i] + rstart_bs;
1781       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1782     }
1783     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1784     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1785   } else {
1786     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1787   }
1788 
1789   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1790   ierr = PetscFree(lrows);CHKERRQ(ierr);
1791 
1792   /* wait on sends */
1793   if (nsends) {
1794     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1795     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1796     ierr = PetscFree(send_status);CHKERRQ(ierr);
1797   }
1798   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1799   ierr = PetscFree(svalues);CHKERRQ(ierr);
1800 
1801   PetscFunctionReturn(0);
1802 }
1803 
1804 #undef __FUNCT__
1805 #define __FUNCT__ "MatPrintHelp_MPIBAIJ"
1806 int MatPrintHelp_MPIBAIJ(Mat A)
1807 {
1808   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1809   MPI_Comm    comm = A->comm;
1810   static int  called = 0;
1811   int         ierr;
1812 
1813   PetscFunctionBegin;
1814   if (!a->rank) {
1815     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1816   }
1817   if (called) {PetscFunctionReturn(0);} else called = 1;
1818   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1819   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1820   PetscFunctionReturn(0);
1821 }
1822 
1823 #undef __FUNCT__
1824 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1825 int MatSetUnfactored_MPIBAIJ(Mat A)
1826 {
1827   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1828   int         ierr;
1829 
1830   PetscFunctionBegin;
1831   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1832   PetscFunctionReturn(0);
1833 }
1834 
1835 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1836 
1837 #undef __FUNCT__
1838 #define __FUNCT__ "MatEqual_MPIBAIJ"
1839 int MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1840 {
1841   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1842   Mat         a,b,c,d;
1843   PetscTruth  flg;
1844   int         ierr;
1845 
1846   PetscFunctionBegin;
1847   ierr = PetscTypeCompare((PetscObject)B,MATMPIBAIJ,&flg);CHKERRQ(ierr);
1848   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1849   a = matA->A; b = matA->B;
1850   c = matB->A; d = matB->B;
1851 
1852   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1853   if (flg == PETSC_TRUE) {
1854     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1855   }
1856   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1857   PetscFunctionReturn(0);
1858 }
1859 
1860 
1861 #undef __FUNCT__
1862 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1863 int MatSetUpPreallocation_MPIBAIJ(Mat A)
1864 {
1865   int        ierr;
1866 
1867   PetscFunctionBegin;
1868   ierr =  MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1869   PetscFunctionReturn(0);
1870 }
1871 
1872 /* -------------------------------------------------------------------*/
1873 static struct _MatOps MatOps_Values = {
1874   MatSetValues_MPIBAIJ,
1875   MatGetRow_MPIBAIJ,
1876   MatRestoreRow_MPIBAIJ,
1877   MatMult_MPIBAIJ,
1878   MatMultAdd_MPIBAIJ,
1879   MatMultTranspose_MPIBAIJ,
1880   MatMultTransposeAdd_MPIBAIJ,
1881   0,
1882   0,
1883   0,
1884   0,
1885   0,
1886   0,
1887   0,
1888   MatTranspose_MPIBAIJ,
1889   MatGetInfo_MPIBAIJ,
1890   MatEqual_MPIBAIJ,
1891   MatGetDiagonal_MPIBAIJ,
1892   MatDiagonalScale_MPIBAIJ,
1893   MatNorm_MPIBAIJ,
1894   MatAssemblyBegin_MPIBAIJ,
1895   MatAssemblyEnd_MPIBAIJ,
1896   0,
1897   MatSetOption_MPIBAIJ,
1898   MatZeroEntries_MPIBAIJ,
1899   MatZeroRows_MPIBAIJ,
1900   0,
1901   0,
1902   0,
1903   0,
1904   MatSetUpPreallocation_MPIBAIJ,
1905   0,
1906   0,
1907   0,
1908   0,
1909   MatDuplicate_MPIBAIJ,
1910   0,
1911   0,
1912   0,
1913   0,
1914   0,
1915   MatGetSubMatrices_MPIBAIJ,
1916   MatIncreaseOverlap_MPIBAIJ,
1917   MatGetValues_MPIBAIJ,
1918   0,
1919   MatPrintHelp_MPIBAIJ,
1920   MatScale_MPIBAIJ,
1921   0,
1922   0,
1923   0,
1924   MatGetBlockSize_MPIBAIJ,
1925   0,
1926   0,
1927   0,
1928   0,
1929   0,
1930   0,
1931   MatSetUnfactored_MPIBAIJ,
1932   0,
1933   MatSetValuesBlocked_MPIBAIJ,
1934   0,
1935   MatDestroy_MPIBAIJ,
1936   MatView_MPIBAIJ,
1937   MatGetPetscMaps_Petsc,
1938   0,
1939   0,
1940   0,
1941   0,
1942   0,
1943   0,
1944   MatGetRowMax_MPIBAIJ};
1945 
1946 
1947 EXTERN_C_BEGIN
1948 #undef __FUNCT__
1949 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
1950 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1951 {
1952   PetscFunctionBegin;
1953   *a      = ((Mat_MPIBAIJ *)A->data)->A;
1954   *iscopy = PETSC_FALSE;
1955   PetscFunctionReturn(0);
1956 }
1957 EXTERN_C_END
1958 
1959 EXTERN_C_BEGIN
1960 #undef __FUNCT__
1961 #define __FUNCT__ "MatCreate_MPIBAIJ"
1962 int MatCreate_MPIBAIJ(Mat B)
1963 {
1964   Mat_MPIBAIJ  *b;
1965   int          ierr;
1966   PetscTruth   flg;
1967 
1968   PetscFunctionBegin;
1969 
1970   ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr);
1971   B->data = (void*)b;
1972 
1973   ierr    = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
1974   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1975   B->mapping    = 0;
1976   B->factor     = 0;
1977   B->assembled  = PETSC_FALSE;
1978 
1979   B->insertmode = NOT_SET_VALUES;
1980   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1981   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1982 
1983   /* build local table of row and column ownerships */
1984   ierr          = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1985   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
1986   b->cowners    = b->rowners + b->size + 2;
1987   b->rowners_bs = b->cowners + b->size + 2;
1988 
1989   /* build cache for off array entries formed */
1990   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1991   b->donotstash  = PETSC_FALSE;
1992   b->colmap      = PETSC_NULL;
1993   b->garray      = PETSC_NULL;
1994   b->roworiented = PETSC_TRUE;
1995 
1996 #if defined(PETSC_USE_MAT_SINGLE)
1997   /* stuff for MatSetValues_XXX in single precision */
1998   b->setvalueslen     = 0;
1999   b->setvaluescopy    = PETSC_NULL;
2000 #endif
2001 
2002   /* stuff used in block assembly */
2003   b->barray       = 0;
2004 
2005   /* stuff used for matrix vector multiply */
2006   b->lvec         = 0;
2007   b->Mvctx        = 0;
2008 
2009   /* stuff for MatGetRow() */
2010   b->rowindices   = 0;
2011   b->rowvalues    = 0;
2012   b->getrowactive = PETSC_FALSE;
2013 
2014   /* hash table stuff */
2015   b->ht           = 0;
2016   b->hd           = 0;
2017   b->ht_size      = 0;
2018   b->ht_flag      = PETSC_FALSE;
2019   b->ht_fact      = 0;
2020   b->ht_total_ct  = 0;
2021   b->ht_insert_ct = 0;
2022 
2023   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2024   if (flg) {
2025     PetscReal fact = 1.39;
2026     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2027     ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2028     if (fact <= 1.0) fact = 1.39;
2029     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2030     PetscLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2031   }
2032   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2033                                      "MatStoreValues_MPIBAIJ",
2034                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2035   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2036                                      "MatRetrieveValues_MPIBAIJ",
2037                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2038   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2039                                      "MatGetDiagonalBlock_MPIBAIJ",
2040                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2041   PetscFunctionReturn(0);
2042 }
2043 EXTERN_C_END
2044 
2045 #undef __FUNCT__
2046 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
2047 /*@C
2048    MatMPIBAIJSetPreallocation - Creates a sparse parallel matrix in block AIJ format
2049    (block compressed row).  For good matrix assembly performance
2050    the user should preallocate the matrix storage by setting the parameters
2051    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2052    performance can be increased by more than a factor of 50.
2053 
2054    Collective on Mat
2055 
2056    Input Parameters:
2057 +  A - the matrix
2058 .  bs   - size of blockk
2059 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2060            submatrix  (same for all local rows)
2061 .  d_nnz - array containing the number of block nonzeros in the various block rows
2062            of the in diagonal portion of the local (possibly different for each block
2063            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2064 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2065            submatrix (same for all local rows).
2066 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2067            off-diagonal portion of the local submatrix (possibly different for
2068            each block row) or PETSC_NULL.
2069 
2070    Output Parameter:
2071 
2072 
2073    Options Database Keys:
2074 .   -mat_no_unroll - uses code that does not unroll the loops in the
2075                      block calculations (much slower)
2076 .   -mat_block_size - size of the blocks to use
2077 
2078    Notes:
2079    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2080    than it must be used on all processors that share the object for that argument.
2081 
2082    Storage Information:
2083    For a square global matrix we define each processor's diagonal portion
2084    to be its local rows and the corresponding columns (a square submatrix);
2085    each processor's off-diagonal portion encompasses the remainder of the
2086    local matrix (a rectangular submatrix).
2087 
2088    The user can specify preallocated storage for the diagonal part of
2089    the local submatrix with either d_nz or d_nnz (not both).  Set
2090    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2091    memory allocation.  Likewise, specify preallocated storage for the
2092    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2093 
2094    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2095    the figure below we depict these three local rows and all columns (0-11).
2096 
2097 .vb
2098            0 1 2 3 4 5 6 7 8 9 10 11
2099           -------------------
2100    row 3  |  o o o d d d o o o o o o
2101    row 4  |  o o o d d d o o o o o o
2102    row 5  |  o o o d d d o o o o o o
2103           -------------------
2104 .ve
2105 
2106    Thus, any entries in the d locations are stored in the d (diagonal)
2107    submatrix, and any entries in the o locations are stored in the
2108    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2109    stored simply in the MATSEQBAIJ format for compressed row storage.
2110 
2111    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2112    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2113    In general, for PDE problems in which most nonzeros are near the diagonal,
2114    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2115    or you will get TERRIBLE performance; see the users' manual chapter on
2116    matrices.
2117 
2118    Level: intermediate
2119 
2120 .keywords: matrix, block, aij, compressed row, sparse, parallel
2121 
2122 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2123 @*/
2124 int MatMPIBAIJSetPreallocation(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2125 {
2126   Mat_MPIBAIJ  *b;
2127   int          ierr,i;
2128   PetscTruth   flg2;
2129 
2130   PetscFunctionBegin;
2131   ierr = PetscTypeCompare((PetscObject)B,MATMPIBAIJ,&flg2);CHKERRQ(ierr);
2132   if (!flg2) PetscFunctionReturn(0);
2133 
2134   B->preallocated = PETSC_TRUE;
2135   ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2136 
2137   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2138   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2139   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2140   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2141   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2142   if (d_nnz) {
2143   for (i=0; i<B->m/bs; i++) {
2144       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
2145     }
2146   }
2147   if (o_nnz) {
2148     for (i=0; i<B->m/bs; i++) {
2149       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
2150     }
2151   }
2152 
2153   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
2154   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
2155   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
2156   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
2157 
2158   b = (Mat_MPIBAIJ*)B->data;
2159   b->bs  = bs;
2160   b->bs2 = bs*bs;
2161   b->mbs = B->m/bs;
2162   b->nbs = B->n/bs;
2163   b->Mbs = B->M/bs;
2164   b->Nbs = B->N/bs;
2165 
2166   ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
2167   b->rowners[0]    = 0;
2168   for (i=2; i<=b->size; i++) {
2169     b->rowners[i] += b->rowners[i-1];
2170   }
2171   b->rstart    = b->rowners[b->rank];
2172   b->rend      = b->rowners[b->rank+1];
2173 
2174   ierr = MPI_Allgather(&b->nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
2175   b->cowners[0] = 0;
2176   for (i=2; i<=b->size; i++) {
2177     b->cowners[i] += b->cowners[i-1];
2178   }
2179   b->cstart    = b->cowners[b->rank];
2180   b->cend      = b->cowners[b->rank+1];
2181 
2182   for (i=0; i<=b->size; i++) {
2183     b->rowners_bs[i] = b->rowners[i]*bs;
2184   }
2185   b->rstart_bs = b->rstart*bs;
2186   b->rend_bs   = b->rend*bs;
2187   b->cstart_bs = b->cstart*bs;
2188   b->cend_bs   = b->cend*bs;
2189 
2190   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2191   PetscLogObjectParent(B,b->A);
2192   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2193   PetscLogObjectParent(B,b->B);
2194   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
2195 
2196   PetscFunctionReturn(0);
2197 }
2198 
2199 #undef __FUNCT__
2200 #define __FUNCT__ "MatCreateMPIBAIJ"
2201 /*@C
2202    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
2203    (block compressed row).  For good matrix assembly performance
2204    the user should preallocate the matrix storage by setting the parameters
2205    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2206    performance can be increased by more than a factor of 50.
2207 
2208    Collective on MPI_Comm
2209 
2210    Input Parameters:
2211 +  comm - MPI communicator
2212 .  bs   - size of blockk
2213 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2214            This value should be the same as the local size used in creating the
2215            y vector for the matrix-vector product y = Ax.
2216 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2217            This value should be the same as the local size used in creating the
2218            x vector for the matrix-vector product y = Ax.
2219 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2220 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2221 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
2222            submatrix  (same for all local rows)
2223 .  d_nnz - array containing the number of nonzero blocks in the various block rows
2224            of the in diagonal portion of the local (possibly different for each block
2225            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2226 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
2227            submatrix (same for all local rows).
2228 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
2229            off-diagonal portion of the local submatrix (possibly different for
2230            each block row) or PETSC_NULL.
2231 
2232    Output Parameter:
2233 .  A - the matrix
2234 
2235    Options Database Keys:
2236 .   -mat_no_unroll - uses code that does not unroll the loops in the
2237                      block calculations (much slower)
2238 .   -mat_block_size - size of the blocks to use
2239 
2240    Notes:
2241    A nonzero block is any block that as 1 or more nonzeros in it
2242 
2243    The user MUST specify either the local or global matrix dimensions
2244    (possibly both).
2245 
2246    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2247    than it must be used on all processors that share the object for that argument.
2248 
2249    Storage Information:
2250    For a square global matrix we define each processor's diagonal portion
2251    to be its local rows and the corresponding columns (a square submatrix);
2252    each processor's off-diagonal portion encompasses the remainder of the
2253    local matrix (a rectangular submatrix).
2254 
2255    The user can specify preallocated storage for the diagonal part of
2256    the local submatrix with either d_nz or d_nnz (not both).  Set
2257    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2258    memory allocation.  Likewise, specify preallocated storage for the
2259    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2260 
2261    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2262    the figure below we depict these three local rows and all columns (0-11).
2263 
2264 .vb
2265            0 1 2 3 4 5 6 7 8 9 10 11
2266           -------------------
2267    row 3  |  o o o d d d o o o o o o
2268    row 4  |  o o o d d d o o o o o o
2269    row 5  |  o o o d d d o o o o o o
2270           -------------------
2271 .ve
2272 
2273    Thus, any entries in the d locations are stored in the d (diagonal)
2274    submatrix, and any entries in the o locations are stored in the
2275    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2276    stored simply in the MATSEQBAIJ format for compressed row storage.
2277 
2278    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2279    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2280    In general, for PDE problems in which most nonzeros are near the diagonal,
2281    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2282    or you will get TERRIBLE performance; see the users' manual chapter on
2283    matrices.
2284 
2285    Level: intermediate
2286 
2287 .keywords: matrix, block, aij, compressed row, sparse, parallel
2288 
2289 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2290 @*/
2291 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2292 {
2293   int ierr,size;
2294 
2295   PetscFunctionBegin;
2296   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2297   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2298   if (size > 1) {
2299     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
2300     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2301   } else {
2302     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2303     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2304   }
2305   PetscFunctionReturn(0);
2306 }
2307 
2308 #undef __FUNCT__
2309 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
2310 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2311 {
2312   Mat         mat;
2313   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2314   int         ierr,len=0;
2315 
2316   PetscFunctionBegin;
2317   *newmat       = 0;
2318   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2319   ierr = MatSetType(mat,MATMPIBAIJ);CHKERRQ(ierr);
2320   mat->preallocated = PETSC_TRUE;
2321   mat->assembled    = PETSC_TRUE;
2322   a      = (Mat_MPIBAIJ*)mat->data;
2323   a->bs  = oldmat->bs;
2324   a->bs2 = oldmat->bs2;
2325   a->mbs = oldmat->mbs;
2326   a->nbs = oldmat->nbs;
2327   a->Mbs = oldmat->Mbs;
2328   a->Nbs = oldmat->Nbs;
2329 
2330   a->rstart       = oldmat->rstart;
2331   a->rend         = oldmat->rend;
2332   a->cstart       = oldmat->cstart;
2333   a->cend         = oldmat->cend;
2334   a->size         = oldmat->size;
2335   a->rank         = oldmat->rank;
2336   a->donotstash   = oldmat->donotstash;
2337   a->roworiented  = oldmat->roworiented;
2338   a->rowindices   = 0;
2339   a->rowvalues    = 0;
2340   a->getrowactive = PETSC_FALSE;
2341   a->barray       = 0;
2342   a->rstart_bs    = oldmat->rstart_bs;
2343   a->rend_bs      = oldmat->rend_bs;
2344   a->cstart_bs    = oldmat->cstart_bs;
2345   a->cend_bs      = oldmat->cend_bs;
2346 
2347   /* hash table stuff */
2348   a->ht           = 0;
2349   a->hd           = 0;
2350   a->ht_size      = 0;
2351   a->ht_flag      = oldmat->ht_flag;
2352   a->ht_fact      = oldmat->ht_fact;
2353   a->ht_total_ct  = 0;
2354   a->ht_insert_ct = 0;
2355 
2356   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2357   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2358   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2359   if (oldmat->colmap) {
2360 #if defined (PETSC_USE_CTABLE)
2361   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2362 #else
2363   ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr);
2364   PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2365   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2366 #endif
2367   } else a->colmap = 0;
2368   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2369     ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr);
2370     PetscLogObjectMemory(mat,len*sizeof(int));
2371     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2372   } else a->garray = 0;
2373 
2374   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2375   PetscLogObjectParent(mat,a->lvec);
2376   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2377 
2378   PetscLogObjectParent(mat,a->Mvctx);
2379   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2380   PetscLogObjectParent(mat,a->A);
2381   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2382   PetscLogObjectParent(mat,a->B);
2383   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2384   *newmat = mat;
2385   PetscFunctionReturn(0);
2386 }
2387 
2388 #include "petscsys.h"
2389 
2390 EXTERN_C_BEGIN
2391 #undef __FUNCT__
2392 #define __FUNCT__ "MatLoad_MPIBAIJ"
2393 int MatLoad_MPIBAIJ(PetscViewer viewer,MatType type,Mat *newmat)
2394 {
2395   Mat          A;
2396   int          i,nz,ierr,j,rstart,rend,fd;
2397   PetscScalar  *vals,*buf;
2398   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2399   MPI_Status   status;
2400   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2401   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2402   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2403   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2404   int          dcount,kmax,k,nzcount,tmp;
2405 
2406   PetscFunctionBegin;
2407   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2408 
2409   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2410   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2411   if (!rank) {
2412     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2413     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2414     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2415     if (header[3] < 0) {
2416       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPIBAIJ");
2417     }
2418   }
2419 
2420   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2421   M = header[1]; N = header[2];
2422 
2423   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2424 
2425   /*
2426      This code adds extra rows to make sure the number of rows is
2427      divisible by the blocksize
2428   */
2429   Mbs        = M/bs;
2430   extra_rows = bs - M + bs*(Mbs);
2431   if (extra_rows == bs) extra_rows = 0;
2432   else                  Mbs++;
2433   if (extra_rows &&!rank) {
2434     PetscLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
2435   }
2436 
2437   /* determine ownership of all rows */
2438   mbs        = Mbs/size + ((Mbs % size) > rank);
2439   m          = mbs*bs;
2440   ierr       = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
2441   browners   = rowners + size + 1;
2442   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2443   rowners[0] = 0;
2444   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2445   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2446   rstart = rowners[rank];
2447   rend   = rowners[rank+1];
2448 
2449   /* distribute row lengths to all processors */
2450   ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr);
2451   if (!rank) {
2452     ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
2453     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2454     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2455     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
2456     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2457     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2458     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2459   } else {
2460     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2461   }
2462 
2463   if (!rank) {
2464     /* calculate the number of nonzeros on each processor */
2465     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
2466     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2467     for (i=0; i<size; i++) {
2468       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2469         procsnz[i] += rowlengths[j];
2470       }
2471     }
2472     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2473 
2474     /* determine max buffer needed and allocate it */
2475     maxnz = 0;
2476     for (i=0; i<size; i++) {
2477       maxnz = PetscMax(maxnz,procsnz[i]);
2478     }
2479     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
2480 
2481     /* read in my part of the matrix column indices  */
2482     nz     = procsnz[0];
2483     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2484     mycols = ibuf;
2485     if (size == 1)  nz -= extra_rows;
2486     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2487     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2488 
2489     /* read in every ones (except the last) and ship off */
2490     for (i=1; i<size-1; i++) {
2491       nz   = procsnz[i];
2492       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2493       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2494     }
2495     /* read in the stuff for the last proc */
2496     if (size != 1) {
2497       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2498       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2499       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2500       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2501     }
2502     ierr = PetscFree(cols);CHKERRQ(ierr);
2503   } else {
2504     /* determine buffer space needed for message */
2505     nz = 0;
2506     for (i=0; i<m; i++) {
2507       nz += locrowlens[i];
2508     }
2509     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2510     mycols = ibuf;
2511     /* receive message of column indices*/
2512     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2513     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2514     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2515   }
2516 
2517   /* loop over local rows, determining number of off diagonal entries */
2518   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2519   odlens   = dlens + (rend-rstart);
2520   ierr     = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr);
2521   ierr     = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2522   masked1  = mask    + Mbs;
2523   masked2  = masked1 + Mbs;
2524   rowcount = 0; nzcount = 0;
2525   for (i=0; i<mbs; i++) {
2526     dcount  = 0;
2527     odcount = 0;
2528     for (j=0; j<bs; j++) {
2529       kmax = locrowlens[rowcount];
2530       for (k=0; k<kmax; k++) {
2531         tmp = mycols[nzcount++]/bs;
2532         if (!mask[tmp]) {
2533           mask[tmp] = 1;
2534           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2535           else masked1[dcount++] = tmp;
2536         }
2537       }
2538       rowcount++;
2539     }
2540 
2541     dlens[i]  = dcount;
2542     odlens[i] = odcount;
2543 
2544     /* zero out the mask elements we set */
2545     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2546     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2547   }
2548 
2549   /* create our matrix */
2550   ierr = MatCreateMPIBAIJ(comm,bs,m,m,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr);
2551   A = *newmat;
2552   MatSetOption(A,MAT_COLUMNS_SORTED);
2553 
2554   if (!rank) {
2555     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2556     /* read in my part of the matrix numerical values  */
2557     nz = procsnz[0];
2558     vals = buf;
2559     mycols = ibuf;
2560     if (size == 1)  nz -= extra_rows;
2561     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2562     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2563 
2564     /* insert into matrix */
2565     jj      = rstart*bs;
2566     for (i=0; i<m; i++) {
2567       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2568       mycols += locrowlens[i];
2569       vals   += locrowlens[i];
2570       jj++;
2571     }
2572     /* read in other processors (except the last one) and ship out */
2573     for (i=1; i<size-1; i++) {
2574       nz   = procsnz[i];
2575       vals = buf;
2576       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2577       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2578     }
2579     /* the last proc */
2580     if (size != 1){
2581       nz   = procsnz[i] - extra_rows;
2582       vals = buf;
2583       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2584       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2585       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2586     }
2587     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2588   } else {
2589     /* receive numeric values */
2590     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2591 
2592     /* receive message of values*/
2593     vals   = buf;
2594     mycols = ibuf;
2595     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2596     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2597     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2598 
2599     /* insert into matrix */
2600     jj      = rstart*bs;
2601     for (i=0; i<m; i++) {
2602       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2603       mycols += locrowlens[i];
2604       vals   += locrowlens[i];
2605       jj++;
2606     }
2607   }
2608   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2609   ierr = PetscFree(buf);CHKERRQ(ierr);
2610   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2611   ierr = PetscFree(rowners);CHKERRQ(ierr);
2612   ierr = PetscFree(dlens);CHKERRQ(ierr);
2613   ierr = PetscFree(mask);CHKERRQ(ierr);
2614   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2615   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2616   PetscFunctionReturn(0);
2617 }
2618 EXTERN_C_END
2619 
2620 #undef __FUNCT__
2621 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
2622 /*@
2623    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2624 
2625    Input Parameters:
2626 .  mat  - the matrix
2627 .  fact - factor
2628 
2629    Collective on Mat
2630 
2631    Level: advanced
2632 
2633   Notes:
2634    This can also be set by the command line option: -mat_use_hash_table fact
2635 
2636 .keywords: matrix, hashtable, factor, HT
2637 
2638 .seealso: MatSetOption()
2639 @*/
2640 int MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2641 {
2642   Mat_MPIBAIJ *baij;
2643   int         ierr;
2644   PetscTruth  flg;
2645 
2646   PetscFunctionBegin;
2647   PetscValidHeaderSpecific(mat,MAT_COOKIE);
2648   ierr = PetscTypeCompare((PetscObject)mat,MATMPIBAIJ,&flg);CHKERRQ(ierr);
2649   if (!flg) {
2650     SETERRQ(PETSC_ERR_ARG_WRONG,"Incorrect matrix type. Use MPIBAIJ only.");
2651   }
2652   baij = (Mat_MPIBAIJ*)mat->data;
2653   baij->ht_fact = fact;
2654   PetscFunctionReturn(0);
2655 }
2656 
2657 #undef __FUNCT__
2658 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
2659 int MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap)
2660 {
2661   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2662   PetscFunctionBegin;
2663   *Ad     = a->A;
2664   *Ao     = a->B;
2665   *colmap = a->garray;
2666   PetscFunctionReturn(0);
2667 }
2668