xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision a56a16c87eec4e97c4cf0ce1e098241a17b4c30e)
1 /*$Id: mpibaij.c,v 1.234 2001/09/25 22:56:49 balay Exp $*/
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4 #include "src/vec/vecimpl.h"
5 
6 EXTERN int MatSetUpMultiply_MPIBAIJ(Mat);
7 EXTERN int DisAssemble_MPIBAIJ(Mat);
8 EXTERN int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
9 EXTERN int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
10 EXTERN int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,PetscScalar *);
11 EXTERN int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,PetscScalar *,InsertMode);
12 EXTERN int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
13 EXTERN int MatGetRow_SeqBAIJ(Mat,int,int*,int**,PetscScalar**);
14 EXTERN int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,PetscScalar**);
15 EXTERN int MatPrintHelp_SeqBAIJ(Mat);
16 EXTERN int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar*);
17 
18 /*  UGLY, ugly, ugly
19    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
20    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
21    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
22    converts the entries into single precision and then calls ..._MatScalar() to put them
23    into the single precision data structures.
24 */
25 #if defined(PETSC_USE_MAT_SINGLE)
26 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
27 EXTERN int MatSetValues_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
28 EXTERN int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
29 EXTERN int MatSetValues_MPIBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
30 EXTERN int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
31 #else
32 #define MatSetValuesBlocked_SeqBAIJ_MatScalar      MatSetValuesBlocked_SeqBAIJ
33 #define MatSetValues_MPIBAIJ_MatScalar             MatSetValues_MPIBAIJ
34 #define MatSetValuesBlocked_MPIBAIJ_MatScalar      MatSetValuesBlocked_MPIBAIJ
35 #define MatSetValues_MPIBAIJ_HT_MatScalar          MatSetValues_MPIBAIJ_HT
36 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar   MatSetValuesBlocked_MPIBAIJ_HT
37 #endif
38 
39 #undef __FUNCT__
40 #define __FUNCT__ "MatGetRowMax_MPIBAIJ"
41 int MatGetRowMax_MPIBAIJ(Mat A,Vec v)
42 {
43   Mat_MPIBAIJ  *a = (Mat_MPIBAIJ*)A->data;
44   int          ierr,i;
45   PetscScalar  *va,*vb;
46   Vec          vtmp;
47 
48   PetscFunctionBegin;
49 
50   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
51   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
52 
53   ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr);
54   ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr);
55   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
56 
57   for (i=0; i<A->m; i++){
58     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i];
59   }
60 
61   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
62   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
63   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
64 
65   PetscFunctionReturn(0);
66 }
67 
68 EXTERN_C_BEGIN
69 #undef __FUNCT__
70 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
71 int MatStoreValues_MPIBAIJ(Mat mat)
72 {
73   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
74   int         ierr;
75 
76   PetscFunctionBegin;
77   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
78   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
79   PetscFunctionReturn(0);
80 }
81 EXTERN_C_END
82 
83 EXTERN_C_BEGIN
84 #undef __FUNCT__
85 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
86 int MatRetrieveValues_MPIBAIJ(Mat mat)
87 {
88   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
89   int         ierr;
90 
91   PetscFunctionBegin;
92   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
93   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
94   PetscFunctionReturn(0);
95 }
96 EXTERN_C_END
97 
98 /*
99      Local utility routine that creates a mapping from the global column
100    number to the local number in the off-diagonal part of the local
101    storage of the matrix.  This is done in a non scable way since the
102    length of colmap equals the global matrix length.
103 */
104 #undef __FUNCT__
105 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
106 static int CreateColmap_MPIBAIJ_Private(Mat mat)
107 {
108   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
109   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data;
110   int         nbs = B->nbs,i,bs=B->bs,ierr;
111 
112   PetscFunctionBegin;
113 #if defined (PETSC_USE_CTABLE)
114   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
115   for (i=0; i<nbs; i++){
116     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
117   }
118 #else
119   ierr = PetscMalloc((baij->Nbs+1)*sizeof(int),&baij->colmap);CHKERRQ(ierr);
120   PetscLogObjectMemory(mat,baij->Nbs*sizeof(int));
121   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr);
122   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
123 #endif
124   PetscFunctionReturn(0);
125 }
126 
127 #define CHUNKSIZE  10
128 
129 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
130 { \
131  \
132     brow = row/bs;  \
133     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
134     rmax = aimax[brow]; nrow = ailen[brow]; \
135       bcol = col/bs; \
136       ridx = row % bs; cidx = col % bs; \
137       low = 0; high = nrow; \
138       while (high-low > 3) { \
139         t = (low+high)/2; \
140         if (rp[t] > bcol) high = t; \
141         else              low  = t; \
142       } \
143       for (_i=low; _i<high; _i++) { \
144         if (rp[_i] > bcol) break; \
145         if (rp[_i] == bcol) { \
146           bap  = ap +  bs2*_i + bs*cidx + ridx; \
147           if (addv == ADD_VALUES) *bap += value;  \
148           else                    *bap  = value;  \
149           goto a_noinsert; \
150         } \
151       } \
152       if (a->nonew == 1) goto a_noinsert; \
153       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
154       if (nrow >= rmax) { \
155         /* there is no extra room in row, therefore enlarge */ \
156         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
157         MatScalar *new_a; \
158  \
159         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
160  \
161         /* malloc new storage space */ \
162         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
163         ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
164         new_j   = (int*)(new_a + bs2*new_nz); \
165         new_i   = new_j + new_nz; \
166  \
167         /* copy over old data into new slots */ \
168         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
169         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
170         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
171         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
172         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
173         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
174         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
175         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
176                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
177         /* free up old matrix storage */ \
178         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
179         if (!a->singlemalloc) { \
180           ierr = PetscFree(a->i);CHKERRQ(ierr); \
181           ierr = PetscFree(a->j);CHKERRQ(ierr);\
182         } \
183         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
184         a->singlemalloc = PETSC_TRUE; \
185  \
186         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
187         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
188         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
189         a->maxnz += bs2*CHUNKSIZE; \
190         a->reallocs++; \
191         a->nz++; \
192       } \
193       N = nrow++ - 1;  \
194       /* shift up all the later entries in this row */ \
195       for (ii=N; ii>=_i; ii--) { \
196         rp[ii+1] = rp[ii]; \
197         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
198       } \
199       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
200       rp[_i]                      = bcol;  \
201       ap[bs2*_i + bs*cidx + ridx] = value;  \
202       a_noinsert:; \
203     ailen[brow] = nrow; \
204 }
205 
206 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
207 { \
208     brow = row/bs;  \
209     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
210     rmax = bimax[brow]; nrow = bilen[brow]; \
211       bcol = col/bs; \
212       ridx = row % bs; cidx = col % bs; \
213       low = 0; high = nrow; \
214       while (high-low > 3) { \
215         t = (low+high)/2; \
216         if (rp[t] > bcol) high = t; \
217         else              low  = t; \
218       } \
219       for (_i=low; _i<high; _i++) { \
220         if (rp[_i] > bcol) break; \
221         if (rp[_i] == bcol) { \
222           bap  = ap +  bs2*_i + bs*cidx + ridx; \
223           if (addv == ADD_VALUES) *bap += value;  \
224           else                    *bap  = value;  \
225           goto b_noinsert; \
226         } \
227       } \
228       if (b->nonew == 1) goto b_noinsert; \
229       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
230       if (nrow >= rmax) { \
231         /* there is no extra room in row, therefore enlarge */ \
232         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
233         MatScalar *new_a; \
234  \
235         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
236  \
237         /* malloc new storage space */ \
238         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
239         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
240         new_j   = (int*)(new_a + bs2*new_nz); \
241         new_i   = new_j + new_nz; \
242  \
243         /* copy over old data into new slots */ \
244         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
245         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
246         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
247         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
248         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
249         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
250         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
251         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
252                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
253         /* free up old matrix storage */ \
254         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
255         if (!b->singlemalloc) { \
256           ierr = PetscFree(b->i);CHKERRQ(ierr); \
257           ierr = PetscFree(b->j);CHKERRQ(ierr); \
258         } \
259         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
260         b->singlemalloc = PETSC_TRUE; \
261  \
262         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
263         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
264         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
265         b->maxnz += bs2*CHUNKSIZE; \
266         b->reallocs++; \
267         b->nz++; \
268       } \
269       N = nrow++ - 1;  \
270       /* shift up all the later entries in this row */ \
271       for (ii=N; ii>=_i; ii--) { \
272         rp[ii+1] = rp[ii]; \
273         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
274       } \
275       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
276       rp[_i]                      = bcol;  \
277       ap[bs2*_i + bs*cidx + ridx] = value;  \
278       b_noinsert:; \
279     bilen[brow] = nrow; \
280 }
281 
282 #if defined(PETSC_USE_MAT_SINGLE)
283 #undef __FUNCT__
284 #define __FUNCT__ "MatSetValues_MPIBAIJ"
285 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
286 {
287   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
288   int         ierr,i,N = m*n;
289   MatScalar   *vsingle;
290 
291   PetscFunctionBegin;
292   if (N > b->setvalueslen) {
293     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
294     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
295     b->setvalueslen  = N;
296   }
297   vsingle = b->setvaluescopy;
298 
299   for (i=0; i<N; i++) {
300     vsingle[i] = v[i];
301   }
302   ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
303   PetscFunctionReturn(0);
304 }
305 
306 #undef __FUNCT__
307 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
308 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
309 {
310   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
311   int         ierr,i,N = m*n*b->bs2;
312   MatScalar   *vsingle;
313 
314   PetscFunctionBegin;
315   if (N > b->setvalueslen) {
316     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
317     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
318     b->setvalueslen  = N;
319   }
320   vsingle = b->setvaluescopy;
321   for (i=0; i<N; i++) {
322     vsingle[i] = v[i];
323   }
324   ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
325   PetscFunctionReturn(0);
326 }
327 
328 #undef __FUNCT__
329 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
330 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
331 {
332   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
333   int         ierr,i,N = m*n;
334   MatScalar   *vsingle;
335 
336   PetscFunctionBegin;
337   if (N > b->setvalueslen) {
338     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
339     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
340     b->setvalueslen  = N;
341   }
342   vsingle = b->setvaluescopy;
343   for (i=0; i<N; i++) {
344     vsingle[i] = v[i];
345   }
346   ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
347   PetscFunctionReturn(0);
348 }
349 
350 #undef __FUNCT__
351 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
352 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
353 {
354   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
355   int         ierr,i,N = m*n*b->bs2;
356   MatScalar   *vsingle;
357 
358   PetscFunctionBegin;
359   if (N > b->setvalueslen) {
360     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
361     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
362     b->setvalueslen  = N;
363   }
364   vsingle = b->setvaluescopy;
365   for (i=0; i<N; i++) {
366     vsingle[i] = v[i];
367   }
368   ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
369   PetscFunctionReturn(0);
370 }
371 #endif
372 
373 #undef __FUNCT__
374 #define __FUNCT__ "MatSetValues_MPIBAIJ"
375 int MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
376 {
377   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
378   MatScalar   value;
379   PetscTruth  roworiented = baij->roworiented;
380   int         ierr,i,j,row,col;
381   int         rstart_orig=baij->rstart_bs;
382   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
383   int         cend_orig=baij->cend_bs,bs=baij->bs;
384 
385   /* Some Variables required in the macro */
386   Mat         A = baij->A;
387   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data;
388   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
389   MatScalar   *aa=a->a;
390 
391   Mat         B = baij->B;
392   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data;
393   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
394   MatScalar   *ba=b->a;
395 
396   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
397   int         low,high,t,ridx,cidx,bs2=a->bs2;
398   MatScalar   *ap,*bap;
399 
400   PetscFunctionBegin;
401   for (i=0; i<m; i++) {
402     if (im[i] < 0) continue;
403 #if defined(PETSC_USE_BOPT_g)
404     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
405 #endif
406     if (im[i] >= rstart_orig && im[i] < rend_orig) {
407       row = im[i] - rstart_orig;
408       for (j=0; j<n; j++) {
409         if (in[j] >= cstart_orig && in[j] < cend_orig){
410           col = in[j] - cstart_orig;
411           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
412           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
413           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
414         } else if (in[j] < 0) continue;
415 #if defined(PETSC_USE_BOPT_g)
416         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
417 #endif
418         else {
419           if (mat->was_assembled) {
420             if (!baij->colmap) {
421               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
422             }
423 #if defined (PETSC_USE_CTABLE)
424             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
425             col  = col - 1;
426 #else
427             col = baij->colmap[in[j]/bs] - 1;
428 #endif
429             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
430               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
431               col =  in[j];
432               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
433               B = baij->B;
434               b = (Mat_SeqBAIJ*)(B)->data;
435               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
436               ba=b->a;
437             } else col += in[j]%bs;
438           } else col = in[j];
439           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
440           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
441           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
442         }
443       }
444     } else {
445       if (!baij->donotstash) {
446         if (roworiented) {
447           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
448         } else {
449           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
450         }
451       }
452     }
453   }
454   PetscFunctionReturn(0);
455 }
456 
457 #undef __FUNCT__
458 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar"
459 int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
460 {
461   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
462   MatScalar   *value,*barray=baij->barray;
463   PetscTruth  roworiented = baij->roworiented;
464   int         ierr,i,j,ii,jj,row,col,rstart=baij->rstart;
465   int         rend=baij->rend,cstart=baij->cstart,stepval;
466   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
467 
468   PetscFunctionBegin;
469   if(!barray) {
470     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
471     baij->barray = barray;
472   }
473 
474   if (roworiented) {
475     stepval = (n-1)*bs;
476   } else {
477     stepval = (m-1)*bs;
478   }
479   for (i=0; i<m; i++) {
480     if (im[i] < 0) continue;
481 #if defined(PETSC_USE_BOPT_g)
482     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs);
483 #endif
484     if (im[i] >= rstart && im[i] < rend) {
485       row = im[i] - rstart;
486       for (j=0; j<n; j++) {
487         /* If NumCol = 1 then a copy is not required */
488         if ((roworiented) && (n == 1)) {
489           barray = v + i*bs2;
490         } else if((!roworiented) && (m == 1)) {
491           barray = v + j*bs2;
492         } else { /* Here a copy is required */
493           if (roworiented) {
494             value = v + i*(stepval+bs)*bs + j*bs;
495           } else {
496             value = v + j*(stepval+bs)*bs + i*bs;
497           }
498           for (ii=0; ii<bs; ii++,value+=stepval) {
499             for (jj=0; jj<bs; jj++) {
500               *barray++  = *value++;
501             }
502           }
503           barray -=bs2;
504         }
505 
506         if (in[j] >= cstart && in[j] < cend){
507           col  = in[j] - cstart;
508           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
509         }
510         else if (in[j] < 0) continue;
511 #if defined(PETSC_USE_BOPT_g)
512         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs);}
513 #endif
514         else {
515           if (mat->was_assembled) {
516             if (!baij->colmap) {
517               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
518             }
519 
520 #if defined(PETSC_USE_BOPT_g)
521 #if defined (PETSC_USE_CTABLE)
522             { int data;
523               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
524               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
525             }
526 #else
527             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
528 #endif
529 #endif
530 #if defined (PETSC_USE_CTABLE)
531 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
532             col  = (col - 1)/bs;
533 #else
534             col = (baij->colmap[in[j]] - 1)/bs;
535 #endif
536             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
537               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
538               col =  in[j];
539             }
540           }
541           else col = in[j];
542           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
543         }
544       }
545     } else {
546       if (!baij->donotstash) {
547         if (roworiented) {
548           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
549         } else {
550           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
551         }
552       }
553     }
554   }
555   PetscFunctionReturn(0);
556 }
557 
558 #define HASH_KEY 0.6180339887
559 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
560 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
561 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
562 #undef __FUNCT__
563 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar"
564 int MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
565 {
566   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
567   PetscTruth  roworiented = baij->roworiented;
568   int         ierr,i,j,row,col;
569   int         rstart_orig=baij->rstart_bs;
570   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
571   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
572   PetscReal   tmp;
573   MatScalar   **HD = baij->hd,value;
574 #if defined(PETSC_USE_BOPT_g)
575   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
576 #endif
577 
578   PetscFunctionBegin;
579 
580   for (i=0; i<m; i++) {
581 #if defined(PETSC_USE_BOPT_g)
582     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
583     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
584 #endif
585       row = im[i];
586     if (row >= rstart_orig && row < rend_orig) {
587       for (j=0; j<n; j++) {
588         col = in[j];
589         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
590         /* Look up into the Hash Table */
591         key = (row/bs)*Nbs+(col/bs)+1;
592         h1  = HASH(size,key,tmp);
593 
594 
595         idx = h1;
596 #if defined(PETSC_USE_BOPT_g)
597         insert_ct++;
598         total_ct++;
599         if (HT[idx] != key) {
600           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
601           if (idx == size) {
602             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
603             if (idx == h1) {
604               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table");
605             }
606           }
607         }
608 #else
609         if (HT[idx] != key) {
610           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
611           if (idx == size) {
612             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
613             if (idx == h1) {
614               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table");
615             }
616           }
617         }
618 #endif
619         /* A HASH table entry is found, so insert the values at the correct address */
620         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
621         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
622       }
623     } else {
624       if (!baij->donotstash) {
625         if (roworiented) {
626           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
627         } else {
628           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
629         }
630       }
631     }
632   }
633 #if defined(PETSC_USE_BOPT_g)
634   baij->ht_total_ct = total_ct;
635   baij->ht_insert_ct = insert_ct;
636 #endif
637   PetscFunctionReturn(0);
638 }
639 
640 #undef __FUNCT__
641 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar"
642 int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
643 {
644   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
645   PetscTruth  roworiented = baij->roworiented;
646   int         ierr,i,j,ii,jj,row,col;
647   int         rstart=baij->rstart ;
648   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
649   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
650   PetscReal   tmp;
651   MatScalar   **HD = baij->hd,*baij_a;
652   MatScalar   *v_t,*value;
653 #if defined(PETSC_USE_BOPT_g)
654   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
655 #endif
656 
657   PetscFunctionBegin;
658 
659   if (roworiented) {
660     stepval = (n-1)*bs;
661   } else {
662     stepval = (m-1)*bs;
663   }
664   for (i=0; i<m; i++) {
665 #if defined(PETSC_USE_BOPT_g)
666     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
667     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
668 #endif
669     row   = im[i];
670     v_t   = v + i*bs2;
671     if (row >= rstart && row < rend) {
672       for (j=0; j<n; j++) {
673         col = in[j];
674 
675         /* Look up into the Hash Table */
676         key = row*Nbs+col+1;
677         h1  = HASH(size,key,tmp);
678 
679         idx = h1;
680 #if defined(PETSC_USE_BOPT_g)
681         total_ct++;
682         insert_ct++;
683        if (HT[idx] != key) {
684           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
685           if (idx == size) {
686             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
687             if (idx == h1) {
688               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table");
689             }
690           }
691         }
692 #else
693         if (HT[idx] != key) {
694           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
695           if (idx == size) {
696             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
697             if (idx == h1) {
698               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table");
699             }
700           }
701         }
702 #endif
703         baij_a = HD[idx];
704         if (roworiented) {
705           /*value = v + i*(stepval+bs)*bs + j*bs;*/
706           /* value = v + (i*(stepval+bs)+j)*bs; */
707           value = v_t;
708           v_t  += bs;
709           if (addv == ADD_VALUES) {
710             for (ii=0; ii<bs; ii++,value+=stepval) {
711               for (jj=ii; jj<bs2; jj+=bs) {
712                 baij_a[jj]  += *value++;
713               }
714             }
715           } else {
716             for (ii=0; ii<bs; ii++,value+=stepval) {
717               for (jj=ii; jj<bs2; jj+=bs) {
718                 baij_a[jj]  = *value++;
719               }
720             }
721           }
722         } else {
723           value = v + j*(stepval+bs)*bs + i*bs;
724           if (addv == ADD_VALUES) {
725             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
726               for (jj=0; jj<bs; jj++) {
727                 baij_a[jj]  += *value++;
728               }
729             }
730           } else {
731             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
732               for (jj=0; jj<bs; jj++) {
733                 baij_a[jj]  = *value++;
734               }
735             }
736           }
737         }
738       }
739     } else {
740       if (!baij->donotstash) {
741         if (roworiented) {
742           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
743         } else {
744           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
745         }
746       }
747     }
748   }
749 #if defined(PETSC_USE_BOPT_g)
750   baij->ht_total_ct = total_ct;
751   baij->ht_insert_ct = insert_ct;
752 #endif
753   PetscFunctionReturn(0);
754 }
755 
756 #undef __FUNCT__
757 #define __FUNCT__ "MatGetValues_MPIBAIJ"
758 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
759 {
760   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
761   int        bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
762   int        bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
763 
764   PetscFunctionBegin;
765   for (i=0; i<m; i++) {
766     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
767     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
768     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
769       row = idxm[i] - bsrstart;
770       for (j=0; j<n; j++) {
771         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
772         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
773         if (idxn[j] >= bscstart && idxn[j] < bscend){
774           col = idxn[j] - bscstart;
775           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
776         } else {
777           if (!baij->colmap) {
778             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
779           }
780 #if defined (PETSC_USE_CTABLE)
781           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
782           data --;
783 #else
784           data = baij->colmap[idxn[j]/bs]-1;
785 #endif
786           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
787           else {
788             col  = data + idxn[j]%bs;
789             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
790           }
791         }
792       }
793     } else {
794       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
795     }
796   }
797  PetscFunctionReturn(0);
798 }
799 
800 #undef __FUNCT__
801 #define __FUNCT__ "MatNorm_MPIBAIJ"
802 int MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
803 {
804   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
805   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
806   int        ierr,i,bs2=baij->bs2;
807   PetscReal  sum = 0.0;
808   MatScalar  *v;
809 
810   PetscFunctionBegin;
811   if (baij->size == 1) {
812     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
813   } else {
814     if (type == NORM_FROBENIUS) {
815       v = amat->a;
816       for (i=0; i<amat->nz*bs2; i++) {
817 #if defined(PETSC_USE_COMPLEX)
818         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
819 #else
820         sum += (*v)*(*v); v++;
821 #endif
822       }
823       v = bmat->a;
824       for (i=0; i<bmat->nz*bs2; i++) {
825 #if defined(PETSC_USE_COMPLEX)
826         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
827 #else
828         sum += (*v)*(*v); v++;
829 #endif
830       }
831       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
832       *nrm = sqrt(*nrm);
833     } else {
834       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
835     }
836   }
837   PetscFunctionReturn(0);
838 }
839 
840 
841 /*
842   Creates the hash table, and sets the table
843   This table is created only once.
844   If new entried need to be added to the matrix
845   then the hash table has to be destroyed and
846   recreated.
847 */
848 #undef __FUNCT__
849 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
850 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
851 {
852   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
853   Mat         A = baij->A,B=baij->B;
854   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
855   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
856   int         size,bs2=baij->bs2,rstart=baij->rstart,ierr;
857   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
858   int         *HT,key;
859   MatScalar   **HD;
860   PetscReal   tmp;
861 #if defined(PETSC_USE_BOPT_g)
862   int         ct=0,max=0;
863 #endif
864 
865   PetscFunctionBegin;
866   baij->ht_size=(int)(factor*nz);
867   size = baij->ht_size;
868 
869   if (baij->ht) {
870     PetscFunctionReturn(0);
871   }
872 
873   /* Allocate Memory for Hash Table */
874   ierr     = PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr);
875   baij->ht = (int*)(baij->hd + size);
876   HD       = baij->hd;
877   HT       = baij->ht;
878 
879 
880   ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(PetscScalar*)));CHKERRQ(ierr);
881 
882 
883   /* Loop Over A */
884   for (i=0; i<a->mbs; i++) {
885     for (j=ai[i]; j<ai[i+1]; j++) {
886       row = i+rstart;
887       col = aj[j]+cstart;
888 
889       key = row*Nbs + col + 1;
890       h1  = HASH(size,key,tmp);
891       for (k=0; k<size; k++){
892         if (HT[(h1+k)%size] == 0.0) {
893           HT[(h1+k)%size] = key;
894           HD[(h1+k)%size] = a->a + j*bs2;
895           break;
896 #if defined(PETSC_USE_BOPT_g)
897         } else {
898           ct++;
899 #endif
900         }
901       }
902 #if defined(PETSC_USE_BOPT_g)
903       if (k> max) max = k;
904 #endif
905     }
906   }
907   /* Loop Over B */
908   for (i=0; i<b->mbs; i++) {
909     for (j=bi[i]; j<bi[i+1]; j++) {
910       row = i+rstart;
911       col = garray[bj[j]];
912       key = row*Nbs + col + 1;
913       h1  = HASH(size,key,tmp);
914       for (k=0; k<size; k++){
915         if (HT[(h1+k)%size] == 0.0) {
916           HT[(h1+k)%size] = key;
917           HD[(h1+k)%size] = b->a + j*bs2;
918           break;
919 #if defined(PETSC_USE_BOPT_g)
920         } else {
921           ct++;
922 #endif
923         }
924       }
925 #if defined(PETSC_USE_BOPT_g)
926       if (k> max) max = k;
927 #endif
928     }
929   }
930 
931   /* Print Summary */
932 #if defined(PETSC_USE_BOPT_g)
933   for (i=0,j=0; i<size; i++) {
934     if (HT[i]) {j++;}
935   }
936   PetscLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max);
937 #endif
938   PetscFunctionReturn(0);
939 }
940 
941 #undef __FUNCT__
942 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
943 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
944 {
945   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
946   int         ierr,nstash,reallocs;
947   InsertMode  addv;
948 
949   PetscFunctionBegin;
950   if (baij->donotstash) {
951     PetscFunctionReturn(0);
952   }
953 
954   /* make sure all processors are either in INSERTMODE or ADDMODE */
955   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
956   if (addv == (ADD_VALUES|INSERT_VALUES)) {
957     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
958   }
959   mat->insertmode = addv; /* in case this processor had no cache */
960 
961   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
962   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
963   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
964   PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
965   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
966   PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
967   PetscFunctionReturn(0);
968 }
969 
970 EXTERN int MatUseDSCPACK_MPIBAIJ(Mat);
971 #undef __FUNCT__
972 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
973 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
974 {
975   Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data;
976   Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
977   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
978   int         *row,*col,other_disassembled;
979   PetscTruth  r1,r2,r3;
980   MatScalar   *val;
981   InsertMode  addv = mat->insertmode;
982 #if defined(PETSC_HAVE_DSCPACK)
983   PetscTruth   flag;
984 #endif
985 
986   PetscFunctionBegin;
987   if (!baij->donotstash) {
988     while (1) {
989       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
990       if (!flg) break;
991 
992       for (i=0; i<n;) {
993         /* Now identify the consecutive vals belonging to the same row */
994         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
995         if (j < n) ncols = j-i;
996         else       ncols = n-i;
997         /* Now assemble all these values with a single function call */
998         ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
999         i = j;
1000       }
1001     }
1002     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
1003     /* Now process the block-stash. Since the values are stashed column-oriented,
1004        set the roworiented flag to column oriented, and after MatSetValues()
1005        restore the original flags */
1006     r1 = baij->roworiented;
1007     r2 = a->roworiented;
1008     r3 = b->roworiented;
1009     baij->roworiented = PETSC_FALSE;
1010     a->roworiented    = PETSC_FALSE;
1011     b->roworiented    = PETSC_FALSE;
1012     while (1) {
1013       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1014       if (!flg) break;
1015 
1016       for (i=0; i<n;) {
1017         /* Now identify the consecutive vals belonging to the same row */
1018         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1019         if (j < n) ncols = j-i;
1020         else       ncols = n-i;
1021         ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
1022         i = j;
1023       }
1024     }
1025     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
1026     baij->roworiented = r1;
1027     a->roworiented    = r2;
1028     b->roworiented    = r3;
1029   }
1030 
1031   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
1032   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
1033 
1034   /* determine if any processor has disassembled, if so we must
1035      also disassemble ourselfs, in order that we may reassemble. */
1036   /*
1037      if nonzero structure of submatrix B cannot change then we know that
1038      no processor disassembled thus we can skip this stuff
1039   */
1040   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
1041     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
1042     if (mat->was_assembled && !other_disassembled) {
1043       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
1044     }
1045   }
1046 
1047   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1048     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
1049   }
1050   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1051   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1052 
1053 #if defined(PETSC_USE_BOPT_g)
1054   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1055     PetscLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
1056     baij->ht_total_ct  = 0;
1057     baij->ht_insert_ct = 0;
1058   }
1059 #endif
1060   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1061     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1062     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1063     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1064   }
1065 
1066   if (baij->rowvalues) {
1067     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1068     baij->rowvalues = 0;
1069   }
1070 #if defined(PETSC_HAVE_DSCPACK)
1071   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_baij_dscpack",&flag);CHKERRQ(ierr);
1072   if (flag) { ierr = MatUseDSCPACK_MPIBAIJ(mat);CHKERRQ(ierr); }
1073 #endif
1074   PetscFunctionReturn(0);
1075 }
1076 
1077 #undef __FUNCT__
1078 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
1079 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1080 {
1081   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
1082   int               ierr,bs = baij->bs,size = baij->size,rank = baij->rank;
1083   PetscTruth        isascii,isdraw;
1084   PetscViewer       sviewer;
1085   PetscViewerFormat format;
1086 
1087   PetscFunctionBegin;
1088   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
1089   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1090   if (isascii) {
1091     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1092     if (format == PETSC_VIEWER_ASCII_INFO_LONG) {
1093       MatInfo info;
1094       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
1095       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1096       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
1097               rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
1098               baij->bs,(int)info.memory);CHKERRQ(ierr);
1099       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1100       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1101       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1102       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1103       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1104       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1105       PetscFunctionReturn(0);
1106     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1107       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
1108       PetscFunctionReturn(0);
1109     }
1110   }
1111 
1112   if (isdraw) {
1113     PetscDraw       draw;
1114     PetscTruth isnull;
1115     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1116     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1117   }
1118 
1119   if (size == 1) {
1120     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
1121     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1122   } else {
1123     /* assemble the entire matrix onto first processor. */
1124     Mat         A;
1125     Mat_SeqBAIJ *Aloc;
1126     int         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1127     MatScalar   *a;
1128 
1129     if (!rank) {
1130       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1131     } else {
1132       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1133     }
1134     PetscLogObjectParent(mat,A);
1135 
1136     /* copy over the A part */
1137     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1138     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1139     ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
1140 
1141     for (i=0; i<mbs; i++) {
1142       rvals[0] = bs*(baij->rstart + i);
1143       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1144       for (j=ai[i]; j<ai[i+1]; j++) {
1145         col = (baij->cstart+aj[j])*bs;
1146         for (k=0; k<bs; k++) {
1147           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1148           col++; a += bs;
1149         }
1150       }
1151     }
1152     /* copy over the B part */
1153     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1154     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1155     for (i=0; i<mbs; i++) {
1156       rvals[0] = bs*(baij->rstart + i);
1157       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1158       for (j=ai[i]; j<ai[i+1]; j++) {
1159         col = baij->garray[aj[j]]*bs;
1160         for (k=0; k<bs; k++) {
1161           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1162           col++; a += bs;
1163         }
1164       }
1165     }
1166     ierr = PetscFree(rvals);CHKERRQ(ierr);
1167     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1168     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1169     /*
1170        Everyone has to call to draw the matrix since the graphics waits are
1171        synchronized across all processors that share the PetscDraw object
1172     */
1173     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1174     if (!rank) {
1175       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1176       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1177     }
1178     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1179     ierr = MatDestroy(A);CHKERRQ(ierr);
1180   }
1181   PetscFunctionReturn(0);
1182 }
1183 
1184 #undef __FUNCT__
1185 #define __FUNCT__ "MatView_MPIBAIJ"
1186 int MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1187 {
1188   int        ierr;
1189   PetscTruth isascii,isdraw,issocket,isbinary;
1190 
1191   PetscFunctionBegin;
1192   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
1193   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1194   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1195   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1196   if (isascii || isdraw || issocket || isbinary) {
1197     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1198   } else {
1199     SETERRQ1(1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1200   }
1201   PetscFunctionReturn(0);
1202 }
1203 
1204 #undef __FUNCT__
1205 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1206 int MatDestroy_MPIBAIJ(Mat mat)
1207 {
1208   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1209   int         ierr;
1210 
1211   PetscFunctionBegin;
1212 #if defined(PETSC_USE_LOG)
1213   PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N);
1214 #endif
1215   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1216   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1217   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1218   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1219   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1220 #if defined (PETSC_USE_CTABLE)
1221   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1222 #else
1223   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1224 #endif
1225   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1226   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1227   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1228   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1229   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1230   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
1231 #if defined(PETSC_USE_MAT_SINGLE)
1232   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
1233 #endif
1234   ierr = PetscFree(baij);CHKERRQ(ierr);
1235   PetscFunctionReturn(0);
1236 }
1237 
1238 #undef __FUNCT__
1239 #define __FUNCT__ "MatMult_MPIBAIJ"
1240 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1241 {
1242   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1243   int         ierr,nt;
1244 
1245   PetscFunctionBegin;
1246   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1247   if (nt != A->n) {
1248     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1249   }
1250   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1251   if (nt != A->m) {
1252     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1253   }
1254   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1255   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1256   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1257   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1258   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 #undef __FUNCT__
1263 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1264 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1265 {
1266   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1267   int        ierr;
1268 
1269   PetscFunctionBegin;
1270   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1271   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1272   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1273   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1274   PetscFunctionReturn(0);
1275 }
1276 
1277 #undef __FUNCT__
1278 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1279 int MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1280 {
1281   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1282   int         ierr;
1283 
1284   PetscFunctionBegin;
1285   /* do nondiagonal part */
1286   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1287   /* send it on its way */
1288   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1289   /* do local part */
1290   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1291   /* receive remote parts: note this assumes the values are not actually */
1292   /* inserted in yy until the next line, which is true for my implementation*/
1293   /* but is not perhaps always true. */
1294   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1295   PetscFunctionReturn(0);
1296 }
1297 
1298 #undef __FUNCT__
1299 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1300 int MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1301 {
1302   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1303   int         ierr;
1304 
1305   PetscFunctionBegin;
1306   /* do nondiagonal part */
1307   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1308   /* send it on its way */
1309   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1310   /* do local part */
1311   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1312   /* receive remote parts: note this assumes the values are not actually */
1313   /* inserted in yy until the next line, which is true for my implementation*/
1314   /* but is not perhaps always true. */
1315   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1316   PetscFunctionReturn(0);
1317 }
1318 
1319 /*
1320   This only works correctly for square matrices where the subblock A->A is the
1321    diagonal block
1322 */
1323 #undef __FUNCT__
1324 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1325 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1326 {
1327   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1328   int         ierr;
1329 
1330   PetscFunctionBegin;
1331   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1332   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1333   PetscFunctionReturn(0);
1334 }
1335 
1336 #undef __FUNCT__
1337 #define __FUNCT__ "MatScale_MPIBAIJ"
1338 int MatScale_MPIBAIJ(PetscScalar *aa,Mat A)
1339 {
1340   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1341   int         ierr;
1342 
1343   PetscFunctionBegin;
1344   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1345   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1346   PetscFunctionReturn(0);
1347 }
1348 
1349 #undef __FUNCT__
1350 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1351 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1352 {
1353   Mat_MPIBAIJ  *mat = (Mat_MPIBAIJ*)matin->data;
1354   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1355   int          bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1356   int          nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1357   int          *cmap,*idx_p,cstart = mat->cstart;
1358 
1359   PetscFunctionBegin;
1360   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1361   mat->getrowactive = PETSC_TRUE;
1362 
1363   if (!mat->rowvalues && (idx || v)) {
1364     /*
1365         allocate enough space to hold information from the longest row.
1366     */
1367     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1368     int     max = 1,mbs = mat->mbs,tmp;
1369     for (i=0; i<mbs; i++) {
1370       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1371       if (max < tmp) { max = tmp; }
1372     }
1373     ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1374     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1375   }
1376 
1377   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1378   lrow = row - brstart;
1379 
1380   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1381   if (!v)   {pvA = 0; pvB = 0;}
1382   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1383   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1384   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1385   nztot = nzA + nzB;
1386 
1387   cmap  = mat->garray;
1388   if (v  || idx) {
1389     if (nztot) {
1390       /* Sort by increasing column numbers, assuming A and B already sorted */
1391       int imark = -1;
1392       if (v) {
1393         *v = v_p = mat->rowvalues;
1394         for (i=0; i<nzB; i++) {
1395           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1396           else break;
1397         }
1398         imark = i;
1399         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1400         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1401       }
1402       if (idx) {
1403         *idx = idx_p = mat->rowindices;
1404         if (imark > -1) {
1405           for (i=0; i<imark; i++) {
1406             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1407           }
1408         } else {
1409           for (i=0; i<nzB; i++) {
1410             if (cmap[cworkB[i]/bs] < cstart)
1411               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1412             else break;
1413           }
1414           imark = i;
1415         }
1416         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1417         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1418       }
1419     } else {
1420       if (idx) *idx = 0;
1421       if (v)   *v   = 0;
1422     }
1423   }
1424   *nz = nztot;
1425   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1426   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1427   PetscFunctionReturn(0);
1428 }
1429 
1430 #undef __FUNCT__
1431 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1432 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1433 {
1434   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1435 
1436   PetscFunctionBegin;
1437   if (baij->getrowactive == PETSC_FALSE) {
1438     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1439   }
1440   baij->getrowactive = PETSC_FALSE;
1441   PetscFunctionReturn(0);
1442 }
1443 
1444 #undef __FUNCT__
1445 #define __FUNCT__ "MatGetBlockSize_MPIBAIJ"
1446 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
1447 {
1448   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1449 
1450   PetscFunctionBegin;
1451   *bs = baij->bs;
1452   PetscFunctionReturn(0);
1453 }
1454 
1455 #undef __FUNCT__
1456 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1457 int MatZeroEntries_MPIBAIJ(Mat A)
1458 {
1459   Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data;
1460   int         ierr;
1461 
1462   PetscFunctionBegin;
1463   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1464   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1465   PetscFunctionReturn(0);
1466 }
1467 
1468 #undef __FUNCT__
1469 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1470 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1471 {
1472   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data;
1473   Mat         A = a->A,B = a->B;
1474   int         ierr;
1475   PetscReal   isend[5],irecv[5];
1476 
1477   PetscFunctionBegin;
1478   info->block_size     = (PetscReal)a->bs;
1479   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1480   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1481   isend[3] = info->memory;  isend[4] = info->mallocs;
1482   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1483   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1484   isend[3] += info->memory;  isend[4] += info->mallocs;
1485   if (flag == MAT_LOCAL) {
1486     info->nz_used      = isend[0];
1487     info->nz_allocated = isend[1];
1488     info->nz_unneeded  = isend[2];
1489     info->memory       = isend[3];
1490     info->mallocs      = isend[4];
1491   } else if (flag == MAT_GLOBAL_MAX) {
1492     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1493     info->nz_used      = irecv[0];
1494     info->nz_allocated = irecv[1];
1495     info->nz_unneeded  = irecv[2];
1496     info->memory       = irecv[3];
1497     info->mallocs      = irecv[4];
1498   } else if (flag == MAT_GLOBAL_SUM) {
1499     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1500     info->nz_used      = irecv[0];
1501     info->nz_allocated = irecv[1];
1502     info->nz_unneeded  = irecv[2];
1503     info->memory       = irecv[3];
1504     info->mallocs      = irecv[4];
1505   } else {
1506     SETERRQ1(1,"Unknown MatInfoType argument %d",flag);
1507   }
1508   info->rows_global       = (PetscReal)A->M;
1509   info->columns_global    = (PetscReal)A->N;
1510   info->rows_local        = (PetscReal)A->m;
1511   info->columns_local     = (PetscReal)A->N;
1512   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1513   info->fill_ratio_needed = 0;
1514   info->factor_mallocs    = 0;
1515   PetscFunctionReturn(0);
1516 }
1517 
1518 #undef __FUNCT__
1519 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1520 int MatSetOption_MPIBAIJ(Mat A,MatOption op)
1521 {
1522   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1523   int         ierr;
1524 
1525   PetscFunctionBegin;
1526   switch (op) {
1527   case MAT_NO_NEW_NONZERO_LOCATIONS:
1528   case MAT_YES_NEW_NONZERO_LOCATIONS:
1529   case MAT_COLUMNS_UNSORTED:
1530   case MAT_COLUMNS_SORTED:
1531   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1532   case MAT_KEEP_ZEROED_ROWS:
1533   case MAT_NEW_NONZERO_LOCATION_ERR:
1534     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1535     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1536     break;
1537   case MAT_ROW_ORIENTED:
1538     a->roworiented = PETSC_TRUE;
1539     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1540     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1541     break;
1542   case MAT_ROWS_SORTED:
1543   case MAT_ROWS_UNSORTED:
1544   case MAT_YES_NEW_DIAGONALS:
1545   case MAT_USE_SINGLE_PRECISION_SOLVES:
1546     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1547     break;
1548   case MAT_COLUMN_ORIENTED:
1549     a->roworiented = PETSC_FALSE;
1550     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1551     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1552     break;
1553   case MAT_IGNORE_OFF_PROC_ENTRIES:
1554     a->donotstash = PETSC_TRUE;
1555     break;
1556   case MAT_NO_NEW_DIAGONALS:
1557     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1558   case MAT_USE_HASH_TABLE:
1559     a->ht_flag = PETSC_TRUE;
1560     break;
1561   default:
1562     SETERRQ(PETSC_ERR_SUP,"unknown option");
1563   }
1564   PetscFunctionReturn(0);
1565 }
1566 
1567 #undef __FUNCT__
1568 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1569 int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1570 {
1571   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data;
1572   Mat_SeqBAIJ *Aloc;
1573   Mat         B;
1574   int         ierr,M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col;
1575   int         bs=baij->bs,mbs=baij->mbs;
1576   MatScalar   *a;
1577 
1578   PetscFunctionBegin;
1579   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1580   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1581 
1582   /* copy over the A part */
1583   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1584   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1585   ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
1586 
1587   for (i=0; i<mbs; i++) {
1588     rvals[0] = bs*(baij->rstart + i);
1589     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1590     for (j=ai[i]; j<ai[i+1]; j++) {
1591       col = (baij->cstart+aj[j])*bs;
1592       for (k=0; k<bs; k++) {
1593         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1594         col++; a += bs;
1595       }
1596     }
1597   }
1598   /* copy over the B part */
1599   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1600   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1601   for (i=0; i<mbs; i++) {
1602     rvals[0] = bs*(baij->rstart + i);
1603     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1604     for (j=ai[i]; j<ai[i+1]; j++) {
1605       col = baij->garray[aj[j]]*bs;
1606       for (k=0; k<bs; k++) {
1607         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1608         col++; a += bs;
1609       }
1610     }
1611   }
1612   ierr = PetscFree(rvals);CHKERRQ(ierr);
1613   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1614   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1615 
1616   if (matout) {
1617     *matout = B;
1618   } else {
1619     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1620   }
1621   PetscFunctionReturn(0);
1622 }
1623 
1624 #undef __FUNCT__
1625 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1626 int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1627 {
1628   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1629   Mat         a = baij->A,b = baij->B;
1630   int         ierr,s1,s2,s3;
1631 
1632   PetscFunctionBegin;
1633   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1634   if (rr) {
1635     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1636     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1637     /* Overlap communication with computation. */
1638     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1639   }
1640   if (ll) {
1641     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1642     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1643     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1644   }
1645   /* scale  the diagonal block */
1646   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1647 
1648   if (rr) {
1649     /* Do a scatter end and then right scale the off-diagonal block */
1650     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1651     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1652   }
1653 
1654   PetscFunctionReturn(0);
1655 }
1656 
1657 #undef __FUNCT__
1658 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1659 int MatZeroRows_MPIBAIJ(Mat A,IS is,PetscScalar *diag)
1660 {
1661   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1662   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
1663   int            *procs,*nprocs,j,idx,nsends,*work,row;
1664   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1665   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1666   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1667   MPI_Comm       comm = A->comm;
1668   MPI_Request    *send_waits,*recv_waits;
1669   MPI_Status     recv_status,*send_status;
1670   IS             istmp;
1671   PetscTruth     found;
1672 
1673   PetscFunctionBegin;
1674   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
1675   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1676 
1677   /*  first count number of contributors to each processor */
1678   ierr  = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
1679   ierr  = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
1680   procs = nprocs + size;
1681   ierr  = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
1682   for (i=0; i<N; i++) {
1683     idx   = rows[i];
1684     found = PETSC_FALSE;
1685     for (j=0; j<size; j++) {
1686       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1687         nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
1688       }
1689     }
1690     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1691   }
1692   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
1693 
1694   /* inform other processors of number of messages and max length*/
1695   ierr   = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr);
1696   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
1697   nmax   = work[rank];
1698   nrecvs = work[size+rank];
1699   ierr   = PetscFree(work);CHKERRQ(ierr);
1700 
1701   /* post receives:   */
1702   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
1703   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1704   for (i=0; i<nrecvs; i++) {
1705     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1706   }
1707 
1708   /* do sends:
1709      1) starts[i] gives the starting index in svalues for stuff going to
1710      the ith processor
1711   */
1712   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
1713   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1714   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
1715   starts[0]  = 0;
1716   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1717   for (i=0; i<N; i++) {
1718     svalues[starts[owner[i]]++] = rows[i];
1719   }
1720   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
1721 
1722   starts[0] = 0;
1723   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1724   count = 0;
1725   for (i=0; i<size; i++) {
1726     if (procs[i]) {
1727       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1728     }
1729   }
1730   ierr = PetscFree(starts);CHKERRQ(ierr);
1731 
1732   base = owners[rank]*bs;
1733 
1734   /*  wait on receives */
1735   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
1736   source = lens + nrecvs;
1737   count  = nrecvs; slen = 0;
1738   while (count) {
1739     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1740     /* unpack receives into our local space */
1741     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1742     source[imdex]  = recv_status.MPI_SOURCE;
1743     lens[imdex]    = n;
1744     slen          += n;
1745     count--;
1746   }
1747   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1748 
1749   /* move the data into the send scatter */
1750   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
1751   count = 0;
1752   for (i=0; i<nrecvs; i++) {
1753     values = rvalues + i*nmax;
1754     for (j=0; j<lens[i]; j++) {
1755       lrows[count++] = values[j] - base;
1756     }
1757   }
1758   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1759   ierr = PetscFree(lens);CHKERRQ(ierr);
1760   ierr = PetscFree(owner);CHKERRQ(ierr);
1761   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1762 
1763   /* actually zap the local rows */
1764   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1765   PetscLogObjectParent(A,istmp);
1766 
1767   /*
1768         Zero the required rows. If the "diagonal block" of the matrix
1769      is square and the user wishes to set the diagonal we use seperate
1770      code so that MatSetValues() is not called for each diagonal allocating
1771      new memory, thus calling lots of mallocs and slowing things down.
1772 
1773        Contributed by: Mathew Knepley
1774   */
1775   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1776   ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr);
1777   if (diag && (l->A->M == l->A->N)) {
1778     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr);
1779   } else if (diag) {
1780     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1781     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1782       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1783 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1784     }
1785     for (i=0; i<slen; i++) {
1786       row  = lrows[i] + rstart_bs;
1787       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1788     }
1789     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1790     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1791   } else {
1792     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1793   }
1794 
1795   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1796   ierr = PetscFree(lrows);CHKERRQ(ierr);
1797 
1798   /* wait on sends */
1799   if (nsends) {
1800     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1801     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1802     ierr = PetscFree(send_status);CHKERRQ(ierr);
1803   }
1804   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1805   ierr = PetscFree(svalues);CHKERRQ(ierr);
1806 
1807   PetscFunctionReturn(0);
1808 }
1809 
1810 #undef __FUNCT__
1811 #define __FUNCT__ "MatPrintHelp_MPIBAIJ"
1812 int MatPrintHelp_MPIBAIJ(Mat A)
1813 {
1814   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1815   MPI_Comm    comm = A->comm;
1816   static int  called = 0;
1817   int         ierr;
1818 
1819   PetscFunctionBegin;
1820   if (!a->rank) {
1821     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1822   }
1823   if (called) {PetscFunctionReturn(0);} else called = 1;
1824   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1825   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1826   PetscFunctionReturn(0);
1827 }
1828 
1829 #undef __FUNCT__
1830 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1831 int MatSetUnfactored_MPIBAIJ(Mat A)
1832 {
1833   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1834   int         ierr;
1835 
1836   PetscFunctionBegin;
1837   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1838   PetscFunctionReturn(0);
1839 }
1840 
1841 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1842 
1843 #undef __FUNCT__
1844 #define __FUNCT__ "MatEqual_MPIBAIJ"
1845 int MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1846 {
1847   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1848   Mat         a,b,c,d;
1849   PetscTruth  flg;
1850   int         ierr;
1851 
1852   PetscFunctionBegin;
1853   ierr = PetscTypeCompare((PetscObject)B,MATMPIBAIJ,&flg);CHKERRQ(ierr);
1854   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1855   a = matA->A; b = matA->B;
1856   c = matB->A; d = matB->B;
1857 
1858   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1859   if (flg == PETSC_TRUE) {
1860     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1861   }
1862   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1863   PetscFunctionReturn(0);
1864 }
1865 
1866 
1867 #undef __FUNCT__
1868 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1869 int MatSetUpPreallocation_MPIBAIJ(Mat A)
1870 {
1871   int        ierr;
1872 
1873   PetscFunctionBegin;
1874   ierr =  MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1875   PetscFunctionReturn(0);
1876 }
1877 
1878 /* -------------------------------------------------------------------*/
1879 static struct _MatOps MatOps_Values = {
1880   MatSetValues_MPIBAIJ,
1881   MatGetRow_MPIBAIJ,
1882   MatRestoreRow_MPIBAIJ,
1883   MatMult_MPIBAIJ,
1884   MatMultAdd_MPIBAIJ,
1885   MatMultTranspose_MPIBAIJ,
1886   MatMultTransposeAdd_MPIBAIJ,
1887   0,
1888   0,
1889   0,
1890   0,
1891   0,
1892   0,
1893   0,
1894   MatTranspose_MPIBAIJ,
1895   MatGetInfo_MPIBAIJ,
1896   MatEqual_MPIBAIJ,
1897   MatGetDiagonal_MPIBAIJ,
1898   MatDiagonalScale_MPIBAIJ,
1899   MatNorm_MPIBAIJ,
1900   MatAssemblyBegin_MPIBAIJ,
1901   MatAssemblyEnd_MPIBAIJ,
1902   0,
1903   MatSetOption_MPIBAIJ,
1904   MatZeroEntries_MPIBAIJ,
1905   MatZeroRows_MPIBAIJ,
1906   0,
1907   0,
1908   0,
1909   0,
1910   MatSetUpPreallocation_MPIBAIJ,
1911   0,
1912   0,
1913   0,
1914   0,
1915   MatDuplicate_MPIBAIJ,
1916   0,
1917   0,
1918   0,
1919   0,
1920   0,
1921   MatGetSubMatrices_MPIBAIJ,
1922   MatIncreaseOverlap_MPIBAIJ,
1923   MatGetValues_MPIBAIJ,
1924   0,
1925   MatPrintHelp_MPIBAIJ,
1926   MatScale_MPIBAIJ,
1927   0,
1928   0,
1929   0,
1930   MatGetBlockSize_MPIBAIJ,
1931   0,
1932   0,
1933   0,
1934   0,
1935   0,
1936   0,
1937   MatSetUnfactored_MPIBAIJ,
1938   0,
1939   MatSetValuesBlocked_MPIBAIJ,
1940   0,
1941   MatDestroy_MPIBAIJ,
1942   MatView_MPIBAIJ,
1943   MatGetPetscMaps_Petsc,
1944   0,
1945   0,
1946   0,
1947   0,
1948   0,
1949   0,
1950   MatGetRowMax_MPIBAIJ};
1951 
1952 
1953 EXTERN_C_BEGIN
1954 #undef __FUNCT__
1955 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
1956 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1957 {
1958   PetscFunctionBegin;
1959   *a      = ((Mat_MPIBAIJ *)A->data)->A;
1960   *iscopy = PETSC_FALSE;
1961   PetscFunctionReturn(0);
1962 }
1963 EXTERN_C_END
1964 
1965 EXTERN_C_BEGIN
1966 #undef __FUNCT__
1967 #define __FUNCT__ "MatCreate_MPIBAIJ"
1968 int MatCreate_MPIBAIJ(Mat B)
1969 {
1970   Mat_MPIBAIJ  *b;
1971   int          ierr;
1972   PetscTruth   flg;
1973 
1974   PetscFunctionBegin;
1975 
1976   ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr);
1977   B->data = (void*)b;
1978 
1979   ierr    = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
1980   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1981   B->mapping    = 0;
1982   B->factor     = 0;
1983   B->assembled  = PETSC_FALSE;
1984 
1985   B->insertmode = NOT_SET_VALUES;
1986   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1987   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1988 
1989   /* build local table of row and column ownerships */
1990   ierr          = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1991   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
1992   b->cowners    = b->rowners + b->size + 2;
1993   b->rowners_bs = b->cowners + b->size + 2;
1994 
1995   /* build cache for off array entries formed */
1996   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1997   b->donotstash  = PETSC_FALSE;
1998   b->colmap      = PETSC_NULL;
1999   b->garray      = PETSC_NULL;
2000   b->roworiented = PETSC_TRUE;
2001 
2002 #if defined(PETSC_USE_MAT_SINGLE)
2003   /* stuff for MatSetValues_XXX in single precision */
2004   b->setvalueslen     = 0;
2005   b->setvaluescopy    = PETSC_NULL;
2006 #endif
2007 
2008   /* stuff used in block assembly */
2009   b->barray       = 0;
2010 
2011   /* stuff used for matrix vector multiply */
2012   b->lvec         = 0;
2013   b->Mvctx        = 0;
2014 
2015   /* stuff for MatGetRow() */
2016   b->rowindices   = 0;
2017   b->rowvalues    = 0;
2018   b->getrowactive = PETSC_FALSE;
2019 
2020   /* hash table stuff */
2021   b->ht           = 0;
2022   b->hd           = 0;
2023   b->ht_size      = 0;
2024   b->ht_flag      = PETSC_FALSE;
2025   b->ht_fact      = 0;
2026   b->ht_total_ct  = 0;
2027   b->ht_insert_ct = 0;
2028 
2029   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2030   if (flg) {
2031     PetscReal fact = 1.39;
2032     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2033     ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2034     if (fact <= 1.0) fact = 1.39;
2035     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2036     PetscLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2037   }
2038   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2039                                      "MatStoreValues_MPIBAIJ",
2040                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2041   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2042                                      "MatRetrieveValues_MPIBAIJ",
2043                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2044   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2045                                      "MatGetDiagonalBlock_MPIBAIJ",
2046                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2047   PetscFunctionReturn(0);
2048 }
2049 EXTERN_C_END
2050 
2051 #undef __FUNCT__
2052 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
2053 /*@C
2054    MatMPIBAIJSetPreallocation - Creates a sparse parallel matrix in block AIJ format
2055    (block compressed row).  For good matrix assembly performance
2056    the user should preallocate the matrix storage by setting the parameters
2057    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2058    performance can be increased by more than a factor of 50.
2059 
2060    Collective on Mat
2061 
2062    Input Parameters:
2063 +  A - the matrix
2064 .  bs   - size of blockk
2065 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2066            submatrix  (same for all local rows)
2067 .  d_nnz - array containing the number of block nonzeros in the various block rows
2068            of the in diagonal portion of the local (possibly different for each block
2069            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2070 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2071            submatrix (same for all local rows).
2072 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2073            off-diagonal portion of the local submatrix (possibly different for
2074            each block row) or PETSC_NULL.
2075 
2076    Output Parameter:
2077 
2078 
2079    Options Database Keys:
2080 .   -mat_no_unroll - uses code that does not unroll the loops in the
2081                      block calculations (much slower)
2082 .   -mat_block_size - size of the blocks to use
2083 
2084    Notes:
2085    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2086    than it must be used on all processors that share the object for that argument.
2087 
2088    Storage Information:
2089    For a square global matrix we define each processor's diagonal portion
2090    to be its local rows and the corresponding columns (a square submatrix);
2091    each processor's off-diagonal portion encompasses the remainder of the
2092    local matrix (a rectangular submatrix).
2093 
2094    The user can specify preallocated storage for the diagonal part of
2095    the local submatrix with either d_nz or d_nnz (not both).  Set
2096    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2097    memory allocation.  Likewise, specify preallocated storage for the
2098    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2099 
2100    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2101    the figure below we depict these three local rows and all columns (0-11).
2102 
2103 .vb
2104            0 1 2 3 4 5 6 7 8 9 10 11
2105           -------------------
2106    row 3  |  o o o d d d o o o o o o
2107    row 4  |  o o o d d d o o o o o o
2108    row 5  |  o o o d d d o o o o o o
2109           -------------------
2110 .ve
2111 
2112    Thus, any entries in the d locations are stored in the d (diagonal)
2113    submatrix, and any entries in the o locations are stored in the
2114    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2115    stored simply in the MATSEQBAIJ format for compressed row storage.
2116 
2117    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2118    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2119    In general, for PDE problems in which most nonzeros are near the diagonal,
2120    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2121    or you will get TERRIBLE performance; see the users' manual chapter on
2122    matrices.
2123 
2124    Level: intermediate
2125 
2126 .keywords: matrix, block, aij, compressed row, sparse, parallel
2127 
2128 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2129 @*/
2130 int MatMPIBAIJSetPreallocation(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2131 {
2132   Mat_MPIBAIJ  *b;
2133   int          ierr,i;
2134   PetscTruth   flg2;
2135 
2136   PetscFunctionBegin;
2137   ierr = PetscTypeCompare((PetscObject)B,MATMPIBAIJ,&flg2);CHKERRQ(ierr);
2138   if (!flg2) PetscFunctionReturn(0);
2139 
2140   B->preallocated = PETSC_TRUE;
2141   ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2142 
2143   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2144   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2145   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2146   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2147   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2148   if (d_nnz) {
2149   for (i=0; i<B->m/bs; i++) {
2150       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
2151     }
2152   }
2153   if (o_nnz) {
2154     for (i=0; i<B->m/bs; i++) {
2155       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
2156     }
2157   }
2158 
2159   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
2160   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
2161   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
2162   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
2163 
2164   b = (Mat_MPIBAIJ*)B->data;
2165   b->bs  = bs;
2166   b->bs2 = bs*bs;
2167   b->mbs = B->m/bs;
2168   b->nbs = B->n/bs;
2169   b->Mbs = B->M/bs;
2170   b->Nbs = B->N/bs;
2171 
2172   ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
2173   b->rowners[0]    = 0;
2174   for (i=2; i<=b->size; i++) {
2175     b->rowners[i] += b->rowners[i-1];
2176   }
2177   b->rstart    = b->rowners[b->rank];
2178   b->rend      = b->rowners[b->rank+1];
2179 
2180   ierr = MPI_Allgather(&b->nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
2181   b->cowners[0] = 0;
2182   for (i=2; i<=b->size; i++) {
2183     b->cowners[i] += b->cowners[i-1];
2184   }
2185   b->cstart    = b->cowners[b->rank];
2186   b->cend      = b->cowners[b->rank+1];
2187 
2188   for (i=0; i<=b->size; i++) {
2189     b->rowners_bs[i] = b->rowners[i]*bs;
2190   }
2191   b->rstart_bs = b->rstart*bs;
2192   b->rend_bs   = b->rend*bs;
2193   b->cstart_bs = b->cstart*bs;
2194   b->cend_bs   = b->cend*bs;
2195 
2196   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2197   PetscLogObjectParent(B,b->A);
2198   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2199   PetscLogObjectParent(B,b->B);
2200   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
2201 
2202   PetscFunctionReturn(0);
2203 }
2204 
2205 #undef __FUNCT__
2206 #define __FUNCT__ "MatCreateMPIBAIJ"
2207 /*@C
2208    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
2209    (block compressed row).  For good matrix assembly performance
2210    the user should preallocate the matrix storage by setting the parameters
2211    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2212    performance can be increased by more than a factor of 50.
2213 
2214    Collective on MPI_Comm
2215 
2216    Input Parameters:
2217 +  comm - MPI communicator
2218 .  bs   - size of blockk
2219 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2220            This value should be the same as the local size used in creating the
2221            y vector for the matrix-vector product y = Ax.
2222 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2223            This value should be the same as the local size used in creating the
2224            x vector for the matrix-vector product y = Ax.
2225 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2226 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2227 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
2228            submatrix  (same for all local rows)
2229 .  d_nnz - array containing the number of nonzero blocks in the various block rows
2230            of the in diagonal portion of the local (possibly different for each block
2231            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2232 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
2233            submatrix (same for all local rows).
2234 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
2235            off-diagonal portion of the local submatrix (possibly different for
2236            each block row) or PETSC_NULL.
2237 
2238    Output Parameter:
2239 .  A - the matrix
2240 
2241    Options Database Keys:
2242 .   -mat_no_unroll - uses code that does not unroll the loops in the
2243                      block calculations (much slower)
2244 .   -mat_block_size - size of the blocks to use
2245 
2246    Notes:
2247    A nonzero block is any block that as 1 or more nonzeros in it
2248 
2249    The user MUST specify either the local or global matrix dimensions
2250    (possibly both).
2251 
2252    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2253    than it must be used on all processors that share the object for that argument.
2254 
2255    Storage Information:
2256    For a square global matrix we define each processor's diagonal portion
2257    to be its local rows and the corresponding columns (a square submatrix);
2258    each processor's off-diagonal portion encompasses the remainder of the
2259    local matrix (a rectangular submatrix).
2260 
2261    The user can specify preallocated storage for the diagonal part of
2262    the local submatrix with either d_nz or d_nnz (not both).  Set
2263    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2264    memory allocation.  Likewise, specify preallocated storage for the
2265    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2266 
2267    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2268    the figure below we depict these three local rows and all columns (0-11).
2269 
2270 .vb
2271            0 1 2 3 4 5 6 7 8 9 10 11
2272           -------------------
2273    row 3  |  o o o d d d o o o o o o
2274    row 4  |  o o o d d d o o o o o o
2275    row 5  |  o o o d d d o o o o o o
2276           -------------------
2277 .ve
2278 
2279    Thus, any entries in the d locations are stored in the d (diagonal)
2280    submatrix, and any entries in the o locations are stored in the
2281    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2282    stored simply in the MATSEQBAIJ format for compressed row storage.
2283 
2284    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2285    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2286    In general, for PDE problems in which most nonzeros are near the diagonal,
2287    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2288    or you will get TERRIBLE performance; see the users' manual chapter on
2289    matrices.
2290 
2291    Level: intermediate
2292 
2293 .keywords: matrix, block, aij, compressed row, sparse, parallel
2294 
2295 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2296 @*/
2297 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2298 {
2299   int ierr,size;
2300 
2301   PetscFunctionBegin;
2302   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2303   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2304   if (size > 1) {
2305     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
2306     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2307   } else {
2308     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2309     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2310   }
2311   PetscFunctionReturn(0);
2312 }
2313 
2314 #undef __FUNCT__
2315 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
2316 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2317 {
2318   Mat         mat;
2319   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2320   int         ierr,len=0;
2321 
2322   PetscFunctionBegin;
2323   *newmat       = 0;
2324   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2325   ierr = MatSetType(mat,MATMPIBAIJ);CHKERRQ(ierr);
2326   mat->preallocated = PETSC_TRUE;
2327   mat->assembled    = PETSC_TRUE;
2328   a      = (Mat_MPIBAIJ*)mat->data;
2329   a->bs  = oldmat->bs;
2330   a->bs2 = oldmat->bs2;
2331   a->mbs = oldmat->mbs;
2332   a->nbs = oldmat->nbs;
2333   a->Mbs = oldmat->Mbs;
2334   a->Nbs = oldmat->Nbs;
2335 
2336   a->rstart       = oldmat->rstart;
2337   a->rend         = oldmat->rend;
2338   a->cstart       = oldmat->cstart;
2339   a->cend         = oldmat->cend;
2340   a->size         = oldmat->size;
2341   a->rank         = oldmat->rank;
2342   a->donotstash   = oldmat->donotstash;
2343   a->roworiented  = oldmat->roworiented;
2344   a->rowindices   = 0;
2345   a->rowvalues    = 0;
2346   a->getrowactive = PETSC_FALSE;
2347   a->barray       = 0;
2348   a->rstart_bs    = oldmat->rstart_bs;
2349   a->rend_bs      = oldmat->rend_bs;
2350   a->cstart_bs    = oldmat->cstart_bs;
2351   a->cend_bs      = oldmat->cend_bs;
2352 
2353   /* hash table stuff */
2354   a->ht           = 0;
2355   a->hd           = 0;
2356   a->ht_size      = 0;
2357   a->ht_flag      = oldmat->ht_flag;
2358   a->ht_fact      = oldmat->ht_fact;
2359   a->ht_total_ct  = 0;
2360   a->ht_insert_ct = 0;
2361 
2362   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2363   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2364   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2365   if (oldmat->colmap) {
2366 #if defined (PETSC_USE_CTABLE)
2367   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2368 #else
2369   ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr);
2370   PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2371   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2372 #endif
2373   } else a->colmap = 0;
2374   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2375     ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr);
2376     PetscLogObjectMemory(mat,len*sizeof(int));
2377     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2378   } else a->garray = 0;
2379 
2380   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2381   PetscLogObjectParent(mat,a->lvec);
2382   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2383 
2384   PetscLogObjectParent(mat,a->Mvctx);
2385   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2386   PetscLogObjectParent(mat,a->A);
2387   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2388   PetscLogObjectParent(mat,a->B);
2389   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2390   *newmat = mat;
2391   PetscFunctionReturn(0);
2392 }
2393 
2394 #include "petscsys.h"
2395 
2396 EXTERN_C_BEGIN
2397 #undef __FUNCT__
2398 #define __FUNCT__ "MatLoad_MPIBAIJ"
2399 int MatLoad_MPIBAIJ(PetscViewer viewer,MatType type,Mat *newmat)
2400 {
2401   Mat          A;
2402   int          i,nz,ierr,j,rstart,rend,fd;
2403   PetscScalar  *vals,*buf;
2404   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2405   MPI_Status   status;
2406   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2407   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2408   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2409   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2410   int          dcount,kmax,k,nzcount,tmp;
2411 
2412   PetscFunctionBegin;
2413   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2414 
2415   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2416   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2417   if (!rank) {
2418     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2419     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2420     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2421     if (header[3] < 0) {
2422       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPIBAIJ");
2423     }
2424   }
2425 
2426   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2427   M = header[1]; N = header[2];
2428 
2429   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2430 
2431   /*
2432      This code adds extra rows to make sure the number of rows is
2433      divisible by the blocksize
2434   */
2435   Mbs        = M/bs;
2436   extra_rows = bs - M + bs*(Mbs);
2437   if (extra_rows == bs) extra_rows = 0;
2438   else                  Mbs++;
2439   if (extra_rows &&!rank) {
2440     PetscLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
2441   }
2442 
2443   /* determine ownership of all rows */
2444   mbs        = Mbs/size + ((Mbs % size) > rank);
2445   m          = mbs*bs;
2446   ierr       = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
2447   browners   = rowners + size + 1;
2448   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2449   rowners[0] = 0;
2450   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2451   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2452   rstart = rowners[rank];
2453   rend   = rowners[rank+1];
2454 
2455   /* distribute row lengths to all processors */
2456   ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr);
2457   if (!rank) {
2458     ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
2459     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2460     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2461     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
2462     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2463     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2464     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2465   } else {
2466     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2467   }
2468 
2469   if (!rank) {
2470     /* calculate the number of nonzeros on each processor */
2471     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
2472     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2473     for (i=0; i<size; i++) {
2474       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2475         procsnz[i] += rowlengths[j];
2476       }
2477     }
2478     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2479 
2480     /* determine max buffer needed and allocate it */
2481     maxnz = 0;
2482     for (i=0; i<size; i++) {
2483       maxnz = PetscMax(maxnz,procsnz[i]);
2484     }
2485     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
2486 
2487     /* read in my part of the matrix column indices  */
2488     nz     = procsnz[0];
2489     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2490     mycols = ibuf;
2491     if (size == 1)  nz -= extra_rows;
2492     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2493     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2494 
2495     /* read in every ones (except the last) and ship off */
2496     for (i=1; i<size-1; i++) {
2497       nz   = procsnz[i];
2498       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2499       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2500     }
2501     /* read in the stuff for the last proc */
2502     if (size != 1) {
2503       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2504       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2505       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2506       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2507     }
2508     ierr = PetscFree(cols);CHKERRQ(ierr);
2509   } else {
2510     /* determine buffer space needed for message */
2511     nz = 0;
2512     for (i=0; i<m; i++) {
2513       nz += locrowlens[i];
2514     }
2515     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2516     mycols = ibuf;
2517     /* receive message of column indices*/
2518     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2519     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2520     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2521   }
2522 
2523   /* loop over local rows, determining number of off diagonal entries */
2524   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2525   odlens   = dlens + (rend-rstart);
2526   ierr     = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr);
2527   ierr     = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2528   masked1  = mask    + Mbs;
2529   masked2  = masked1 + Mbs;
2530   rowcount = 0; nzcount = 0;
2531   for (i=0; i<mbs; i++) {
2532     dcount  = 0;
2533     odcount = 0;
2534     for (j=0; j<bs; j++) {
2535       kmax = locrowlens[rowcount];
2536       for (k=0; k<kmax; k++) {
2537         tmp = mycols[nzcount++]/bs;
2538         if (!mask[tmp]) {
2539           mask[tmp] = 1;
2540           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2541           else masked1[dcount++] = tmp;
2542         }
2543       }
2544       rowcount++;
2545     }
2546 
2547     dlens[i]  = dcount;
2548     odlens[i] = odcount;
2549 
2550     /* zero out the mask elements we set */
2551     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2552     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2553   }
2554 
2555   /* create our matrix */
2556   ierr = MatCreateMPIBAIJ(comm,bs,m,m,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr);
2557   A = *newmat;
2558   MatSetOption(A,MAT_COLUMNS_SORTED);
2559 
2560   if (!rank) {
2561     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2562     /* read in my part of the matrix numerical values  */
2563     nz = procsnz[0];
2564     vals = buf;
2565     mycols = ibuf;
2566     if (size == 1)  nz -= extra_rows;
2567     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2568     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2569 
2570     /* insert into matrix */
2571     jj      = rstart*bs;
2572     for (i=0; i<m; i++) {
2573       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2574       mycols += locrowlens[i];
2575       vals   += locrowlens[i];
2576       jj++;
2577     }
2578     /* read in other processors (except the last one) and ship out */
2579     for (i=1; i<size-1; i++) {
2580       nz   = procsnz[i];
2581       vals = buf;
2582       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2583       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2584     }
2585     /* the last proc */
2586     if (size != 1){
2587       nz   = procsnz[i] - extra_rows;
2588       vals = buf;
2589       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2590       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2591       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2592     }
2593     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2594   } else {
2595     /* receive numeric values */
2596     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2597 
2598     /* receive message of values*/
2599     vals   = buf;
2600     mycols = ibuf;
2601     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2602     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2603     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2604 
2605     /* insert into matrix */
2606     jj      = rstart*bs;
2607     for (i=0; i<m; i++) {
2608       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2609       mycols += locrowlens[i];
2610       vals   += locrowlens[i];
2611       jj++;
2612     }
2613   }
2614   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2615   ierr = PetscFree(buf);CHKERRQ(ierr);
2616   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2617   ierr = PetscFree(rowners);CHKERRQ(ierr);
2618   ierr = PetscFree(dlens);CHKERRQ(ierr);
2619   ierr = PetscFree(mask);CHKERRQ(ierr);
2620   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2621   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2622   PetscFunctionReturn(0);
2623 }
2624 EXTERN_C_END
2625 
2626 #undef __FUNCT__
2627 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
2628 /*@
2629    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2630 
2631    Input Parameters:
2632 .  mat  - the matrix
2633 .  fact - factor
2634 
2635    Collective on Mat
2636 
2637    Level: advanced
2638 
2639   Notes:
2640    This can also be set by the command line option: -mat_use_hash_table fact
2641 
2642 .keywords: matrix, hashtable, factor, HT
2643 
2644 .seealso: MatSetOption()
2645 @*/
2646 int MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2647 {
2648   Mat_MPIBAIJ *baij;
2649   int         ierr;
2650   PetscTruth  flg;
2651 
2652   PetscFunctionBegin;
2653   PetscValidHeaderSpecific(mat,MAT_COOKIE);
2654   ierr = PetscTypeCompare((PetscObject)mat,MATMPIBAIJ,&flg);CHKERRQ(ierr);
2655   if (!flg) {
2656     SETERRQ(PETSC_ERR_ARG_WRONG,"Incorrect matrix type. Use MPIBAIJ only.");
2657   }
2658   baij = (Mat_MPIBAIJ*)mat->data;
2659   baij->ht_fact = fact;
2660   PetscFunctionReturn(0);
2661 }
2662 
2663 #undef __FUNCT__
2664 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
2665 int MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap)
2666 {
2667   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2668   PetscFunctionBegin;
2669   *Ad     = a->A;
2670   *Ao     = a->B;
2671   *colmap = a->garray;
2672   PetscFunctionReturn(0);
2673 }
2674