xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 63b9fa5e258ef37d5bcff491ebacde33f30ed49b)
1 /*$Id: mpibaij.c,v 1.234 2001/09/25 22:56:49 balay Exp $*/
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4 #include "src/vec/vecimpl.h"
5 
6 EXTERN int MatSetUpMultiply_MPIBAIJ(Mat);
7 EXTERN int DisAssemble_MPIBAIJ(Mat);
8 EXTERN int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
9 EXTERN int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
10 EXTERN int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,PetscScalar *);
11 EXTERN int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,PetscScalar *,InsertMode);
12 EXTERN int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
13 EXTERN int MatGetRow_SeqBAIJ(Mat,int,int*,int**,PetscScalar**);
14 EXTERN int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,PetscScalar**);
15 EXTERN int MatPrintHelp_SeqBAIJ(Mat);
16 EXTERN int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar*);
17 
18 /*  UGLY, ugly, ugly
19    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
20    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
21    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
22    converts the entries into single precision and then calls ..._MatScalar() to put them
23    into the single precision data structures.
24 */
25 #if defined(PETSC_USE_MAT_SINGLE)
26 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
27 EXTERN int MatSetValues_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
28 EXTERN int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
29 EXTERN int MatSetValues_MPIBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
30 EXTERN int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
31 #else
32 #define MatSetValuesBlocked_SeqBAIJ_MatScalar      MatSetValuesBlocked_SeqBAIJ
33 #define MatSetValues_MPIBAIJ_MatScalar             MatSetValues_MPIBAIJ
34 #define MatSetValuesBlocked_MPIBAIJ_MatScalar      MatSetValuesBlocked_MPIBAIJ
35 #define MatSetValues_MPIBAIJ_HT_MatScalar          MatSetValues_MPIBAIJ_HT
36 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar   MatSetValuesBlocked_MPIBAIJ_HT
37 #endif
38 
39 #undef __FUNCT__
40 #define __FUNCT__ "MatGetRowMax_MPIBAIJ"
41 int MatGetRowMax_MPIBAIJ(Mat A,Vec v)
42 {
43   Mat_MPIBAIJ  *a = (Mat_MPIBAIJ*)A->data;
44   int          ierr,i;
45   PetscScalar  *va,*vb;
46   Vec          vtmp;
47 
48   PetscFunctionBegin;
49 
50   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
51   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
52 
53   ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr);
54   ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr);
55   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
56 
57   for (i=0; i<A->m; i++){
58     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i];
59   }
60 
61   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
62   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
63   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
64 
65   PetscFunctionReturn(0);
66 }
67 
68 EXTERN_C_BEGIN
69 #undef __FUNCT__
70 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
71 int MatStoreValues_MPIBAIJ(Mat mat)
72 {
73   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
74   int         ierr;
75 
76   PetscFunctionBegin;
77   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
78   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
79   PetscFunctionReturn(0);
80 }
81 EXTERN_C_END
82 
83 EXTERN_C_BEGIN
84 #undef __FUNCT__
85 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
86 int MatRetrieveValues_MPIBAIJ(Mat mat)
87 {
88   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
89   int         ierr;
90 
91   PetscFunctionBegin;
92   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
93   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
94   PetscFunctionReturn(0);
95 }
96 EXTERN_C_END
97 
98 /*
99      Local utility routine that creates a mapping from the global column
100    number to the local number in the off-diagonal part of the local
101    storage of the matrix.  This is done in a non scable way since the
102    length of colmap equals the global matrix length.
103 */
104 #undef __FUNCT__
105 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
106 static int CreateColmap_MPIBAIJ_Private(Mat mat)
107 {
108   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
109   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data;
110   int         nbs = B->nbs,i,bs=B->bs,ierr;
111 
112   PetscFunctionBegin;
113 #if defined (PETSC_USE_CTABLE)
114   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
115   for (i=0; i<nbs; i++){
116     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
117   }
118 #else
119   ierr = PetscMalloc((baij->Nbs+1)*sizeof(int),&baij->colmap);CHKERRQ(ierr);
120   PetscLogObjectMemory(mat,baij->Nbs*sizeof(int));
121   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr);
122   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
123 #endif
124   PetscFunctionReturn(0);
125 }
126 
127 #define CHUNKSIZE  10
128 
129 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
130 { \
131  \
132     brow = row/bs;  \
133     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
134     rmax = aimax[brow]; nrow = ailen[brow]; \
135       bcol = col/bs; \
136       ridx = row % bs; cidx = col % bs; \
137       low = 0; high = nrow; \
138       while (high-low > 3) { \
139         t = (low+high)/2; \
140         if (rp[t] > bcol) high = t; \
141         else              low  = t; \
142       } \
143       for (_i=low; _i<high; _i++) { \
144         if (rp[_i] > bcol) break; \
145         if (rp[_i] == bcol) { \
146           bap  = ap +  bs2*_i + bs*cidx + ridx; \
147           if (addv == ADD_VALUES) *bap += value;  \
148           else                    *bap  = value;  \
149           goto a_noinsert; \
150         } \
151       } \
152       if (a->nonew == 1) goto a_noinsert; \
153       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
154       if (nrow >= rmax) { \
155         /* there is no extra room in row, therefore enlarge */ \
156         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
157         MatScalar *new_a; \
158  \
159         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
160  \
161         /* malloc new storage space */ \
162         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
163         ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
164         new_j   = (int*)(new_a + bs2*new_nz); \
165         new_i   = new_j + new_nz; \
166  \
167         /* copy over old data into new slots */ \
168         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
169         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
170         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
171         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
172         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
173         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
174         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
175         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
176                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
177         /* free up old matrix storage */ \
178         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
179         if (!a->singlemalloc) { \
180           ierr = PetscFree(a->i);CHKERRQ(ierr); \
181           ierr = PetscFree(a->j);CHKERRQ(ierr);\
182         } \
183         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
184         a->singlemalloc = PETSC_TRUE; \
185  \
186         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
187         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
188         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
189         a->maxnz += bs2*CHUNKSIZE; \
190         a->reallocs++; \
191         a->nz++; \
192       } \
193       N = nrow++ - 1;  \
194       /* shift up all the later entries in this row */ \
195       for (ii=N; ii>=_i; ii--) { \
196         rp[ii+1] = rp[ii]; \
197         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
198       } \
199       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
200       rp[_i]                      = bcol;  \
201       ap[bs2*_i + bs*cidx + ridx] = value;  \
202       a_noinsert:; \
203     ailen[brow] = nrow; \
204 }
205 
206 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
207 { \
208     brow = row/bs;  \
209     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
210     rmax = bimax[brow]; nrow = bilen[brow]; \
211       bcol = col/bs; \
212       ridx = row % bs; cidx = col % bs; \
213       low = 0; high = nrow; \
214       while (high-low > 3) { \
215         t = (low+high)/2; \
216         if (rp[t] > bcol) high = t; \
217         else              low  = t; \
218       } \
219       for (_i=low; _i<high; _i++) { \
220         if (rp[_i] > bcol) break; \
221         if (rp[_i] == bcol) { \
222           bap  = ap +  bs2*_i + bs*cidx + ridx; \
223           if (addv == ADD_VALUES) *bap += value;  \
224           else                    *bap  = value;  \
225           goto b_noinsert; \
226         } \
227       } \
228       if (b->nonew == 1) goto b_noinsert; \
229       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
230       if (nrow >= rmax) { \
231         /* there is no extra room in row, therefore enlarge */ \
232         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
233         MatScalar *new_a; \
234  \
235         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
236  \
237         /* malloc new storage space */ \
238         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
239         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
240         new_j   = (int*)(new_a + bs2*new_nz); \
241         new_i   = new_j + new_nz; \
242  \
243         /* copy over old data into new slots */ \
244         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
245         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
246         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
247         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
248         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
249         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
250         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
251         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
252                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
253         /* free up old matrix storage */ \
254         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
255         if (!b->singlemalloc) { \
256           ierr = PetscFree(b->i);CHKERRQ(ierr); \
257           ierr = PetscFree(b->j);CHKERRQ(ierr); \
258         } \
259         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
260         b->singlemalloc = PETSC_TRUE; \
261  \
262         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
263         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
264         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
265         b->maxnz += bs2*CHUNKSIZE; \
266         b->reallocs++; \
267         b->nz++; \
268       } \
269       N = nrow++ - 1;  \
270       /* shift up all the later entries in this row */ \
271       for (ii=N; ii>=_i; ii--) { \
272         rp[ii+1] = rp[ii]; \
273         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
274       } \
275       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
276       rp[_i]                      = bcol;  \
277       ap[bs2*_i + bs*cidx + ridx] = value;  \
278       b_noinsert:; \
279     bilen[brow] = nrow; \
280 }
281 
282 #if defined(PETSC_USE_MAT_SINGLE)
283 #undef __FUNCT__
284 #define __FUNCT__ "MatSetValues_MPIBAIJ"
285 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
286 {
287   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
288   int         ierr,i,N = m*n;
289   MatScalar   *vsingle;
290 
291   PetscFunctionBegin;
292   if (N > b->setvalueslen) {
293     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
294     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
295     b->setvalueslen  = N;
296   }
297   vsingle = b->setvaluescopy;
298 
299   for (i=0; i<N; i++) {
300     vsingle[i] = v[i];
301   }
302   ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
303   PetscFunctionReturn(0);
304 }
305 
306 #undef __FUNCT__
307 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
308 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
309 {
310   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
311   int         ierr,i,N = m*n*b->bs2;
312   MatScalar   *vsingle;
313 
314   PetscFunctionBegin;
315   if (N > b->setvalueslen) {
316     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
317     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
318     b->setvalueslen  = N;
319   }
320   vsingle = b->setvaluescopy;
321   for (i=0; i<N; i++) {
322     vsingle[i] = v[i];
323   }
324   ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
325   PetscFunctionReturn(0);
326 }
327 
328 #undef __FUNCT__
329 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
330 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
331 {
332   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
333   int         ierr,i,N = m*n;
334   MatScalar   *vsingle;
335 
336   PetscFunctionBegin;
337   if (N > b->setvalueslen) {
338     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
339     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
340     b->setvalueslen  = N;
341   }
342   vsingle = b->setvaluescopy;
343   for (i=0; i<N; i++) {
344     vsingle[i] = v[i];
345   }
346   ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
347   PetscFunctionReturn(0);
348 }
349 
350 #undef __FUNCT__
351 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
352 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
353 {
354   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
355   int         ierr,i,N = m*n*b->bs2;
356   MatScalar   *vsingle;
357 
358   PetscFunctionBegin;
359   if (N > b->setvalueslen) {
360     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
361     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
362     b->setvalueslen  = N;
363   }
364   vsingle = b->setvaluescopy;
365   for (i=0; i<N; i++) {
366     vsingle[i] = v[i];
367   }
368   ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
369   PetscFunctionReturn(0);
370 }
371 #endif
372 
373 #undef __FUNCT__
374 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
375 int MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
376 {
377   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
378   MatScalar   value;
379   PetscTruth  roworiented = baij->roworiented;
380   int         ierr,i,j,row,col;
381   int         rstart_orig=baij->rstart_bs;
382   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
383   int         cend_orig=baij->cend_bs,bs=baij->bs;
384 
385   /* Some Variables required in the macro */
386   Mat         A = baij->A;
387   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data;
388   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
389   MatScalar   *aa=a->a;
390 
391   Mat         B = baij->B;
392   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data;
393   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
394   MatScalar   *ba=b->a;
395 
396   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
397   int         low,high,t,ridx,cidx,bs2=a->bs2;
398   MatScalar   *ap,*bap;
399 
400   PetscFunctionBegin;
401   for (i=0; i<m; i++) {
402     if (im[i] < 0) continue;
403 #if defined(PETSC_USE_BOPT_g)
404     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
405 #endif
406     if (im[i] >= rstart_orig && im[i] < rend_orig) {
407       row = im[i] - rstart_orig;
408       for (j=0; j<n; j++) {
409         if (in[j] >= cstart_orig && in[j] < cend_orig){
410           col = in[j] - cstart_orig;
411           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
412           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
413           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
414         } else if (in[j] < 0) continue;
415 #if defined(PETSC_USE_BOPT_g)
416         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
417 #endif
418         else {
419           if (mat->was_assembled) {
420             if (!baij->colmap) {
421               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
422             }
423 #if defined (PETSC_USE_CTABLE)
424             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
425             col  = col - 1;
426 #else
427             col = baij->colmap[in[j]/bs] - 1;
428 #endif
429             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
430               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
431               col =  in[j];
432               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
433               B = baij->B;
434               b = (Mat_SeqBAIJ*)(B)->data;
435               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
436               ba=b->a;
437             } else col += in[j]%bs;
438           } else col = in[j];
439           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
440           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
441           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
442         }
443       }
444     } else {
445       if (!baij->donotstash) {
446         if (roworiented) {
447           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
448         } else {
449           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
450         }
451       }
452     }
453   }
454   PetscFunctionReturn(0);
455 }
456 
457 #undef __FUNCT__
458 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar"
459 int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
460 {
461   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
462   MatScalar   *value,*barray=baij->barray;
463   PetscTruth  roworiented = baij->roworiented;
464   int         ierr,i,j,ii,jj,row,col,rstart=baij->rstart;
465   int         rend=baij->rend,cstart=baij->cstart,stepval;
466   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
467 
468   PetscFunctionBegin;
469   if(!barray) {
470     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
471     baij->barray = barray;
472   }
473 
474   if (roworiented) {
475     stepval = (n-1)*bs;
476   } else {
477     stepval = (m-1)*bs;
478   }
479   for (i=0; i<m; i++) {
480     if (im[i] < 0) continue;
481 #if defined(PETSC_USE_BOPT_g)
482     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs);
483 #endif
484     if (im[i] >= rstart && im[i] < rend) {
485       row = im[i] - rstart;
486       for (j=0; j<n; j++) {
487         /* If NumCol = 1 then a copy is not required */
488         if ((roworiented) && (n == 1)) {
489           barray = v + i*bs2;
490         } else if((!roworiented) && (m == 1)) {
491           barray = v + j*bs2;
492         } else { /* Here a copy is required */
493           if (roworiented) {
494             value = v + i*(stepval+bs)*bs + j*bs;
495           } else {
496             value = v + j*(stepval+bs)*bs + i*bs;
497           }
498           for (ii=0; ii<bs; ii++,value+=stepval) {
499             for (jj=0; jj<bs; jj++) {
500               *barray++  = *value++;
501             }
502           }
503           barray -=bs2;
504         }
505 
506         if (in[j] >= cstart && in[j] < cend){
507           col  = in[j] - cstart;
508           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
509         }
510         else if (in[j] < 0) continue;
511 #if defined(PETSC_USE_BOPT_g)
512         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs);}
513 #endif
514         else {
515           if (mat->was_assembled) {
516             if (!baij->colmap) {
517               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
518             }
519 
520 #if defined(PETSC_USE_BOPT_g)
521 #if defined (PETSC_USE_CTABLE)
522             { int data;
523               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
524               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
525             }
526 #else
527             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
528 #endif
529 #endif
530 #if defined (PETSC_USE_CTABLE)
531 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
532             col  = (col - 1)/bs;
533 #else
534             col = (baij->colmap[in[j]] - 1)/bs;
535 #endif
536             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
537               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
538               col =  in[j];
539             }
540           }
541           else col = in[j];
542           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
543         }
544       }
545     } else {
546       if (!baij->donotstash) {
547         if (roworiented) {
548           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
549         } else {
550           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
551         }
552       }
553     }
554   }
555   PetscFunctionReturn(0);
556 }
557 
558 #define HASH_KEY 0.6180339887
559 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
560 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
561 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
562 #undef __FUNCT__
563 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar"
564 int MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
565 {
566   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
567   PetscTruth  roworiented = baij->roworiented;
568   int         ierr,i,j,row,col;
569   int         rstart_orig=baij->rstart_bs;
570   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
571   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
572   PetscReal   tmp;
573   MatScalar   **HD = baij->hd,value;
574 #if defined(PETSC_USE_BOPT_g)
575   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
576 #endif
577 
578   PetscFunctionBegin;
579 
580   for (i=0; i<m; i++) {
581 #if defined(PETSC_USE_BOPT_g)
582     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
583     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
584 #endif
585       row = im[i];
586     if (row >= rstart_orig && row < rend_orig) {
587       for (j=0; j<n; j++) {
588         col = in[j];
589         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
590         /* Look up into the Hash Table */
591         key = (row/bs)*Nbs+(col/bs)+1;
592         h1  = HASH(size,key,tmp);
593 
594 
595         idx = h1;
596 #if defined(PETSC_USE_BOPT_g)
597         insert_ct++;
598         total_ct++;
599         if (HT[idx] != key) {
600           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
601           if (idx == size) {
602             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
603             if (idx == h1) {
604               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table");
605             }
606           }
607         }
608 #else
609         if (HT[idx] != key) {
610           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
611           if (idx == size) {
612             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
613             if (idx == h1) {
614               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table");
615             }
616           }
617         }
618 #endif
619         /* A HASH table entry is found, so insert the values at the correct address */
620         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
621         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
622       }
623     } else {
624       if (!baij->donotstash) {
625         if (roworiented) {
626           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
627         } else {
628           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
629         }
630       }
631     }
632   }
633 #if defined(PETSC_USE_BOPT_g)
634   baij->ht_total_ct = total_ct;
635   baij->ht_insert_ct = insert_ct;
636 #endif
637   PetscFunctionReturn(0);
638 }
639 
640 #undef __FUNCT__
641 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar"
642 int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
643 {
644   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
645   PetscTruth  roworiented = baij->roworiented;
646   int         ierr,i,j,ii,jj,row,col;
647   int         rstart=baij->rstart ;
648   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
649   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
650   PetscReal   tmp;
651   MatScalar   **HD = baij->hd,*baij_a;
652   MatScalar   *v_t,*value;
653 #if defined(PETSC_USE_BOPT_g)
654   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
655 #endif
656 
657   PetscFunctionBegin;
658 
659   if (roworiented) {
660     stepval = (n-1)*bs;
661   } else {
662     stepval = (m-1)*bs;
663   }
664   for (i=0; i<m; i++) {
665 #if defined(PETSC_USE_BOPT_g)
666     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
667     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
668 #endif
669     row   = im[i];
670     v_t   = v + i*bs2;
671     if (row >= rstart && row < rend) {
672       for (j=0; j<n; j++) {
673         col = in[j];
674 
675         /* Look up into the Hash Table */
676         key = row*Nbs+col+1;
677         h1  = HASH(size,key,tmp);
678 
679         idx = h1;
680 #if defined(PETSC_USE_BOPT_g)
681         total_ct++;
682         insert_ct++;
683        if (HT[idx] != key) {
684           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
685           if (idx == size) {
686             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
687             if (idx == h1) {
688               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table");
689             }
690           }
691         }
692 #else
693         if (HT[idx] != key) {
694           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
695           if (idx == size) {
696             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
697             if (idx == h1) {
698               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table");
699             }
700           }
701         }
702 #endif
703         baij_a = HD[idx];
704         if (roworiented) {
705           /*value = v + i*(stepval+bs)*bs + j*bs;*/
706           /* value = v + (i*(stepval+bs)+j)*bs; */
707           value = v_t;
708           v_t  += bs;
709           if (addv == ADD_VALUES) {
710             for (ii=0; ii<bs; ii++,value+=stepval) {
711               for (jj=ii; jj<bs2; jj+=bs) {
712                 baij_a[jj]  += *value++;
713               }
714             }
715           } else {
716             for (ii=0; ii<bs; ii++,value+=stepval) {
717               for (jj=ii; jj<bs2; jj+=bs) {
718                 baij_a[jj]  = *value++;
719               }
720             }
721           }
722         } else {
723           value = v + j*(stepval+bs)*bs + i*bs;
724           if (addv == ADD_VALUES) {
725             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
726               for (jj=0; jj<bs; jj++) {
727                 baij_a[jj]  += *value++;
728               }
729             }
730           } else {
731             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
732               for (jj=0; jj<bs; jj++) {
733                 baij_a[jj]  = *value++;
734               }
735             }
736           }
737         }
738       }
739     } else {
740       if (!baij->donotstash) {
741         if (roworiented) {
742           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
743         } else {
744           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
745         }
746       }
747     }
748   }
749 #if defined(PETSC_USE_BOPT_g)
750   baij->ht_total_ct = total_ct;
751   baij->ht_insert_ct = insert_ct;
752 #endif
753   PetscFunctionReturn(0);
754 }
755 
756 #undef __FUNCT__
757 #define __FUNCT__ "MatGetValues_MPIBAIJ"
758 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
759 {
760   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
761   int        bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
762   int        bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
763 
764   PetscFunctionBegin;
765   for (i=0; i<m; i++) {
766     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
767     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
768     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
769       row = idxm[i] - bsrstart;
770       for (j=0; j<n; j++) {
771         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
772         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
773         if (idxn[j] >= bscstart && idxn[j] < bscend){
774           col = idxn[j] - bscstart;
775           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
776         } else {
777           if (!baij->colmap) {
778             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
779           }
780 #if defined (PETSC_USE_CTABLE)
781           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
782           data --;
783 #else
784           data = baij->colmap[idxn[j]/bs]-1;
785 #endif
786           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
787           else {
788             col  = data + idxn[j]%bs;
789             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
790           }
791         }
792       }
793     } else {
794       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
795     }
796   }
797  PetscFunctionReturn(0);
798 }
799 
800 #undef __FUNCT__
801 #define __FUNCT__ "MatNorm_MPIBAIJ"
802 int MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
803 {
804   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
805   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
806   int        ierr,i,bs2=baij->bs2;
807   PetscReal  sum = 0.0;
808   MatScalar  *v;
809 
810   PetscFunctionBegin;
811   if (baij->size == 1) {
812     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
813   } else {
814     if (type == NORM_FROBENIUS) {
815       v = amat->a;
816       for (i=0; i<amat->nz*bs2; i++) {
817 #if defined(PETSC_USE_COMPLEX)
818         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
819 #else
820         sum += (*v)*(*v); v++;
821 #endif
822       }
823       v = bmat->a;
824       for (i=0; i<bmat->nz*bs2; i++) {
825 #if defined(PETSC_USE_COMPLEX)
826         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
827 #else
828         sum += (*v)*(*v); v++;
829 #endif
830       }
831       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
832       *nrm = sqrt(*nrm);
833     } else {
834       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
835     }
836   }
837   PetscFunctionReturn(0);
838 }
839 
840 
841 /*
842   Creates the hash table, and sets the table
843   This table is created only once.
844   If new entried need to be added to the matrix
845   then the hash table has to be destroyed and
846   recreated.
847 */
848 #undef __FUNCT__
849 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
850 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
851 {
852   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
853   Mat         A = baij->A,B=baij->B;
854   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
855   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
856   int         size,bs2=baij->bs2,rstart=baij->rstart,ierr;
857   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
858   int         *HT,key;
859   MatScalar   **HD;
860   PetscReal   tmp;
861 #if defined(PETSC_USE_BOPT_g)
862   int         ct=0,max=0;
863 #endif
864 
865   PetscFunctionBegin;
866   baij->ht_size=(int)(factor*nz);
867   size = baij->ht_size;
868 
869   if (baij->ht) {
870     PetscFunctionReturn(0);
871   }
872 
873   /* Allocate Memory for Hash Table */
874   ierr     = PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr);
875   baij->ht = (int*)(baij->hd + size);
876   HD       = baij->hd;
877   HT       = baij->ht;
878 
879 
880   ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(PetscScalar*)));CHKERRQ(ierr);
881 
882 
883   /* Loop Over A */
884   for (i=0; i<a->mbs; i++) {
885     for (j=ai[i]; j<ai[i+1]; j++) {
886       row = i+rstart;
887       col = aj[j]+cstart;
888 
889       key = row*Nbs + col + 1;
890       h1  = HASH(size,key,tmp);
891       for (k=0; k<size; k++){
892         if (HT[(h1+k)%size] == 0.0) {
893           HT[(h1+k)%size] = key;
894           HD[(h1+k)%size] = a->a + j*bs2;
895           break;
896 #if defined(PETSC_USE_BOPT_g)
897         } else {
898           ct++;
899 #endif
900         }
901       }
902 #if defined(PETSC_USE_BOPT_g)
903       if (k> max) max = k;
904 #endif
905     }
906   }
907   /* Loop Over B */
908   for (i=0; i<b->mbs; i++) {
909     for (j=bi[i]; j<bi[i+1]; j++) {
910       row = i+rstart;
911       col = garray[bj[j]];
912       key = row*Nbs + col + 1;
913       h1  = HASH(size,key,tmp);
914       for (k=0; k<size; k++){
915         if (HT[(h1+k)%size] == 0.0) {
916           HT[(h1+k)%size] = key;
917           HD[(h1+k)%size] = b->a + j*bs2;
918           break;
919 #if defined(PETSC_USE_BOPT_g)
920         } else {
921           ct++;
922 #endif
923         }
924       }
925 #if defined(PETSC_USE_BOPT_g)
926       if (k> max) max = k;
927 #endif
928     }
929   }
930 
931   /* Print Summary */
932 #if defined(PETSC_USE_BOPT_g)
933   for (i=0,j=0; i<size; i++) {
934     if (HT[i]) {j++;}
935   }
936   PetscLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max);
937 #endif
938   PetscFunctionReturn(0);
939 }
940 
941 #undef __FUNCT__
942 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
943 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
944 {
945   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
946   int         ierr,nstash,reallocs;
947   InsertMode  addv;
948 
949   PetscFunctionBegin;
950   if (baij->donotstash) {
951     PetscFunctionReturn(0);
952   }
953 
954   /* make sure all processors are either in INSERTMODE or ADDMODE */
955   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
956   if (addv == (ADD_VALUES|INSERT_VALUES)) {
957     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
958   }
959   mat->insertmode = addv; /* in case this processor had no cache */
960 
961   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
962   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
963   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
964   PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
965   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
966   PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
967   PetscFunctionReturn(0);
968 }
969 
970 EXTERN int MatUseDSCPACK_MPIBAIJ(Mat);
971 #undef __FUNCT__
972 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
973 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
974 {
975   Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data;
976   Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
977   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
978   int         *row,*col,other_disassembled;
979   PetscTruth  r1,r2,r3;
980   MatScalar   *val;
981   InsertMode  addv = mat->insertmode;
982 #if defined(PETSC_HAVE_DSCPACK)
983   PetscTruth   flag;
984 #endif
985 
986   PetscFunctionBegin;
987   if (!baij->donotstash) {
988     while (1) {
989       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
990       if (!flg) break;
991 
992       for (i=0; i<n;) {
993         /* Now identify the consecutive vals belonging to the same row */
994         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
995         if (j < n) ncols = j-i;
996         else       ncols = n-i;
997         /* Now assemble all these values with a single function call */
998         ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
999         i = j;
1000       }
1001     }
1002     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
1003     /* Now process the block-stash. Since the values are stashed column-oriented,
1004        set the roworiented flag to column oriented, and after MatSetValues()
1005        restore the original flags */
1006     r1 = baij->roworiented;
1007     r2 = a->roworiented;
1008     r3 = b->roworiented;
1009     baij->roworiented = PETSC_FALSE;
1010     a->roworiented    = PETSC_FALSE;
1011     b->roworiented    = PETSC_FALSE;
1012     while (1) {
1013       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1014       if (!flg) break;
1015 
1016       for (i=0; i<n;) {
1017         /* Now identify the consecutive vals belonging to the same row */
1018         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1019         if (j < n) ncols = j-i;
1020         else       ncols = n-i;
1021         ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
1022         i = j;
1023       }
1024     }
1025     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
1026     baij->roworiented = r1;
1027     a->roworiented    = r2;
1028     b->roworiented    = r3;
1029   }
1030 
1031   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
1032   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
1033 
1034   /* determine if any processor has disassembled, if so we must
1035      also disassemble ourselfs, in order that we may reassemble. */
1036   /*
1037      if nonzero structure of submatrix B cannot change then we know that
1038      no processor disassembled thus we can skip this stuff
1039   */
1040   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
1041     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
1042     if (mat->was_assembled && !other_disassembled) {
1043       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
1044     }
1045   }
1046 
1047   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1048     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
1049   }
1050   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1051   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1052 
1053 #if defined(PETSC_USE_BOPT_g)
1054   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1055     PetscLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
1056     baij->ht_total_ct  = 0;
1057     baij->ht_insert_ct = 0;
1058   }
1059 #endif
1060   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1061     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1062     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1063     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1064   }
1065 
1066   if (baij->rowvalues) {
1067     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1068     baij->rowvalues = 0;
1069   }
1070 #if defined(PETSC_HAVE_DSCPACK)
1071   ierr = PetscOptionsHasName(mat->prefix,"-mat_baij_dscpack",&flag);CHKERRQ(ierr);
1072   if (flag) { ierr = MatUseDSCPACK_MPIBAIJ(mat);CHKERRQ(ierr); }
1073 #endif
1074   PetscFunctionReturn(0);
1075 }
1076 
1077 extern int MatMPIBAIJFactorInfo_DSCPACK(Mat,PetscViewer);
1078 
1079 #undef __FUNCT__
1080 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
1081 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1082 {
1083   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
1084   int               ierr,bs = baij->bs,size = baij->size,rank = baij->rank;
1085   PetscTruth        isascii,isdraw;
1086   PetscViewer       sviewer;
1087   PetscViewerFormat format;
1088 
1089   PetscFunctionBegin;
1090   /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */
1091   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
1092   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1093   if (isascii) {
1094     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1095     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1096       MatInfo info;
1097       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
1098       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1099       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
1100               rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
1101               baij->bs,(int)info.memory);CHKERRQ(ierr);
1102       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1103       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1104       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1105       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1106       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1107       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1108       PetscFunctionReturn(0);
1109     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1110       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
1111       PetscFunctionReturn(0);
1112     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1113 #if defined(PETSC_HAVE_DSCPACK) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX)
1114       ierr = MatMPIBAIJFactorInfo_DSCPACK(mat,viewer);CHKERRQ(ierr);
1115 #endif
1116       PetscFunctionReturn(0);
1117     }
1118   }
1119 
1120   if (isdraw) {
1121     PetscDraw       draw;
1122     PetscTruth isnull;
1123     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1124     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1125   }
1126 
1127   if (size == 1) {
1128     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
1129     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1130   } else {
1131     /* assemble the entire matrix onto first processor. */
1132     Mat         A;
1133     Mat_SeqBAIJ *Aloc;
1134     int         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1135     MatScalar   *a;
1136 
1137     if (!rank) {
1138       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1139     } else {
1140       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1141     }
1142     PetscLogObjectParent(mat,A);
1143 
1144     /* copy over the A part */
1145     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1146     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1147     ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
1148 
1149     for (i=0; i<mbs; i++) {
1150       rvals[0] = bs*(baij->rstart + i);
1151       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1152       for (j=ai[i]; j<ai[i+1]; j++) {
1153         col = (baij->cstart+aj[j])*bs;
1154         for (k=0; k<bs; k++) {
1155           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1156           col++; a += bs;
1157         }
1158       }
1159     }
1160     /* copy over the B part */
1161     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1162     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1163     for (i=0; i<mbs; i++) {
1164       rvals[0] = bs*(baij->rstart + i);
1165       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1166       for (j=ai[i]; j<ai[i+1]; j++) {
1167         col = baij->garray[aj[j]]*bs;
1168         for (k=0; k<bs; k++) {
1169           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1170           col++; a += bs;
1171         }
1172       }
1173     }
1174     ierr = PetscFree(rvals);CHKERRQ(ierr);
1175     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1176     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1177     /*
1178        Everyone has to call to draw the matrix since the graphics waits are
1179        synchronized across all processors that share the PetscDraw object
1180     */
1181     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1182     if (!rank) {
1183       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1184       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1185     }
1186     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1187     ierr = MatDestroy(A);CHKERRQ(ierr);
1188   }
1189   PetscFunctionReturn(0);
1190 }
1191 
1192 #undef __FUNCT__
1193 #define __FUNCT__ "MatView_MPIBAIJ"
1194 int MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1195 {
1196   int        ierr;
1197   PetscTruth isascii,isdraw,issocket,isbinary;
1198 
1199   PetscFunctionBegin;
1200   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
1201   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1202   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1203   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1204   if (isascii || isdraw || issocket || isbinary) {
1205     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1206   } else {
1207     SETERRQ1(1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1208   }
1209   PetscFunctionReturn(0);
1210 }
1211 
1212 #undef __FUNCT__
1213 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1214 int MatDestroy_MPIBAIJ(Mat mat)
1215 {
1216   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1217   int         ierr;
1218 
1219   PetscFunctionBegin;
1220 #if defined(PETSC_USE_LOG)
1221   PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N);
1222 #endif
1223   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1224   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1225   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1226   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1227   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1228 #if defined (PETSC_USE_CTABLE)
1229   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1230 #else
1231   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1232 #endif
1233   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1234   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1235   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1236   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1237   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1238   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
1239 #if defined(PETSC_USE_MAT_SINGLE)
1240   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
1241 #endif
1242   ierr = PetscFree(baij);CHKERRQ(ierr);
1243   PetscFunctionReturn(0);
1244 }
1245 
1246 #undef __FUNCT__
1247 #define __FUNCT__ "MatMult_MPIBAIJ"
1248 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1249 {
1250   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1251   int         ierr,nt;
1252 
1253   PetscFunctionBegin;
1254   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1255   if (nt != A->n) {
1256     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1257   }
1258   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1259   if (nt != A->m) {
1260     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1261   }
1262   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1263   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1264   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1265   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1266   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1267   PetscFunctionReturn(0);
1268 }
1269 
1270 #undef __FUNCT__
1271 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1272 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1273 {
1274   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1275   int        ierr;
1276 
1277   PetscFunctionBegin;
1278   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1279   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1280   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1281   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1282   PetscFunctionReturn(0);
1283 }
1284 
1285 #undef __FUNCT__
1286 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1287 int MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1288 {
1289   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1290   int         ierr;
1291 
1292   PetscFunctionBegin;
1293   /* do nondiagonal part */
1294   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1295   /* send it on its way */
1296   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1297   /* do local part */
1298   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1299   /* receive remote parts: note this assumes the values are not actually */
1300   /* inserted in yy until the next line, which is true for my implementation*/
1301   /* but is not perhaps always true. */
1302   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1303   PetscFunctionReturn(0);
1304 }
1305 
1306 #undef __FUNCT__
1307 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1308 int MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1309 {
1310   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1311   int         ierr;
1312 
1313   PetscFunctionBegin;
1314   /* do nondiagonal part */
1315   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1316   /* send it on its way */
1317   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1318   /* do local part */
1319   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1320   /* receive remote parts: note this assumes the values are not actually */
1321   /* inserted in yy until the next line, which is true for my implementation*/
1322   /* but is not perhaps always true. */
1323   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1324   PetscFunctionReturn(0);
1325 }
1326 
1327 /*
1328   This only works correctly for square matrices where the subblock A->A is the
1329    diagonal block
1330 */
1331 #undef __FUNCT__
1332 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1333 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1334 {
1335   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1336   int         ierr;
1337 
1338   PetscFunctionBegin;
1339   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1340   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1341   PetscFunctionReturn(0);
1342 }
1343 
1344 #undef __FUNCT__
1345 #define __FUNCT__ "MatScale_MPIBAIJ"
1346 int MatScale_MPIBAIJ(PetscScalar *aa,Mat A)
1347 {
1348   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1349   int         ierr;
1350 
1351   PetscFunctionBegin;
1352   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1353   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1354   PetscFunctionReturn(0);
1355 }
1356 
1357 #undef __FUNCT__
1358 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1359 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1360 {
1361   Mat_MPIBAIJ  *mat = (Mat_MPIBAIJ*)matin->data;
1362   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1363   int          bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1364   int          nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1365   int          *cmap,*idx_p,cstart = mat->cstart;
1366 
1367   PetscFunctionBegin;
1368   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1369   mat->getrowactive = PETSC_TRUE;
1370 
1371   if (!mat->rowvalues && (idx || v)) {
1372     /*
1373         allocate enough space to hold information from the longest row.
1374     */
1375     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1376     int     max = 1,mbs = mat->mbs,tmp;
1377     for (i=0; i<mbs; i++) {
1378       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1379       if (max < tmp) { max = tmp; }
1380     }
1381     ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1382     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1383   }
1384 
1385   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1386   lrow = row - brstart;
1387 
1388   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1389   if (!v)   {pvA = 0; pvB = 0;}
1390   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1391   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1392   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1393   nztot = nzA + nzB;
1394 
1395   cmap  = mat->garray;
1396   if (v  || idx) {
1397     if (nztot) {
1398       /* Sort by increasing column numbers, assuming A and B already sorted */
1399       int imark = -1;
1400       if (v) {
1401         *v = v_p = mat->rowvalues;
1402         for (i=0; i<nzB; i++) {
1403           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1404           else break;
1405         }
1406         imark = i;
1407         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1408         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1409       }
1410       if (idx) {
1411         *idx = idx_p = mat->rowindices;
1412         if (imark > -1) {
1413           for (i=0; i<imark; i++) {
1414             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1415           }
1416         } else {
1417           for (i=0; i<nzB; i++) {
1418             if (cmap[cworkB[i]/bs] < cstart)
1419               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1420             else break;
1421           }
1422           imark = i;
1423         }
1424         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1425         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1426       }
1427     } else {
1428       if (idx) *idx = 0;
1429       if (v)   *v   = 0;
1430     }
1431   }
1432   *nz = nztot;
1433   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1434   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1435   PetscFunctionReturn(0);
1436 }
1437 
1438 #undef __FUNCT__
1439 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1440 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1441 {
1442   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1443 
1444   PetscFunctionBegin;
1445   if (baij->getrowactive == PETSC_FALSE) {
1446     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1447   }
1448   baij->getrowactive = PETSC_FALSE;
1449   PetscFunctionReturn(0);
1450 }
1451 
1452 #undef __FUNCT__
1453 #define __FUNCT__ "MatGetBlockSize_MPIBAIJ"
1454 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
1455 {
1456   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1457 
1458   PetscFunctionBegin;
1459   *bs = baij->bs;
1460   PetscFunctionReturn(0);
1461 }
1462 
1463 #undef __FUNCT__
1464 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1465 int MatZeroEntries_MPIBAIJ(Mat A)
1466 {
1467   Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data;
1468   int         ierr;
1469 
1470   PetscFunctionBegin;
1471   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1472   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1473   PetscFunctionReturn(0);
1474 }
1475 
1476 #undef __FUNCT__
1477 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1478 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1479 {
1480   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data;
1481   Mat         A = a->A,B = a->B;
1482   int         ierr;
1483   PetscReal   isend[5],irecv[5];
1484 
1485   PetscFunctionBegin;
1486   info->block_size     = (PetscReal)a->bs;
1487   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1488   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1489   isend[3] = info->memory;  isend[4] = info->mallocs;
1490   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1491   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1492   isend[3] += info->memory;  isend[4] += info->mallocs;
1493   if (flag == MAT_LOCAL) {
1494     info->nz_used      = isend[0];
1495     info->nz_allocated = isend[1];
1496     info->nz_unneeded  = isend[2];
1497     info->memory       = isend[3];
1498     info->mallocs      = isend[4];
1499   } else if (flag == MAT_GLOBAL_MAX) {
1500     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1501     info->nz_used      = irecv[0];
1502     info->nz_allocated = irecv[1];
1503     info->nz_unneeded  = irecv[2];
1504     info->memory       = irecv[3];
1505     info->mallocs      = irecv[4];
1506   } else if (flag == MAT_GLOBAL_SUM) {
1507     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1508     info->nz_used      = irecv[0];
1509     info->nz_allocated = irecv[1];
1510     info->nz_unneeded  = irecv[2];
1511     info->memory       = irecv[3];
1512     info->mallocs      = irecv[4];
1513   } else {
1514     SETERRQ1(1,"Unknown MatInfoType argument %d",flag);
1515   }
1516   info->rows_global       = (PetscReal)A->M;
1517   info->columns_global    = (PetscReal)A->N;
1518   info->rows_local        = (PetscReal)A->m;
1519   info->columns_local     = (PetscReal)A->N;
1520   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1521   info->fill_ratio_needed = 0;
1522   info->factor_mallocs    = 0;
1523   PetscFunctionReturn(0);
1524 }
1525 
1526 #undef __FUNCT__
1527 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1528 int MatSetOption_MPIBAIJ(Mat A,MatOption op)
1529 {
1530   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1531   int         ierr;
1532 
1533   PetscFunctionBegin;
1534   switch (op) {
1535   case MAT_NO_NEW_NONZERO_LOCATIONS:
1536   case MAT_YES_NEW_NONZERO_LOCATIONS:
1537   case MAT_COLUMNS_UNSORTED:
1538   case MAT_COLUMNS_SORTED:
1539   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1540   case MAT_KEEP_ZEROED_ROWS:
1541   case MAT_NEW_NONZERO_LOCATION_ERR:
1542     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1543     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1544     break;
1545   case MAT_ROW_ORIENTED:
1546     a->roworiented = PETSC_TRUE;
1547     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1548     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1549     break;
1550   case MAT_ROWS_SORTED:
1551   case MAT_ROWS_UNSORTED:
1552   case MAT_YES_NEW_DIAGONALS:
1553     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1554     break;
1555   case MAT_COLUMN_ORIENTED:
1556     a->roworiented = PETSC_FALSE;
1557     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1558     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1559     break;
1560   case MAT_IGNORE_OFF_PROC_ENTRIES:
1561     a->donotstash = PETSC_TRUE;
1562     break;
1563   case MAT_NO_NEW_DIAGONALS:
1564     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1565   case MAT_USE_HASH_TABLE:
1566     a->ht_flag = PETSC_TRUE;
1567     break;
1568   default:
1569     SETERRQ(PETSC_ERR_SUP,"unknown option");
1570   }
1571   PetscFunctionReturn(0);
1572 }
1573 
1574 #undef __FUNCT__
1575 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1576 int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1577 {
1578   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data;
1579   Mat_SeqBAIJ *Aloc;
1580   Mat         B;
1581   int         ierr,M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col;
1582   int         bs=baij->bs,mbs=baij->mbs;
1583   MatScalar   *a;
1584 
1585   PetscFunctionBegin;
1586   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1587   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1588 
1589   /* copy over the A part */
1590   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1591   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1592   ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
1593 
1594   for (i=0; i<mbs; i++) {
1595     rvals[0] = bs*(baij->rstart + i);
1596     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1597     for (j=ai[i]; j<ai[i+1]; j++) {
1598       col = (baij->cstart+aj[j])*bs;
1599       for (k=0; k<bs; k++) {
1600         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1601         col++; a += bs;
1602       }
1603     }
1604   }
1605   /* copy over the B part */
1606   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1607   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1608   for (i=0; i<mbs; i++) {
1609     rvals[0] = bs*(baij->rstart + i);
1610     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1611     for (j=ai[i]; j<ai[i+1]; j++) {
1612       col = baij->garray[aj[j]]*bs;
1613       for (k=0; k<bs; k++) {
1614         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1615         col++; a += bs;
1616       }
1617     }
1618   }
1619   ierr = PetscFree(rvals);CHKERRQ(ierr);
1620   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1621   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1622 
1623   if (matout) {
1624     *matout = B;
1625   } else {
1626     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1627   }
1628   PetscFunctionReturn(0);
1629 }
1630 
1631 #undef __FUNCT__
1632 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1633 int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1634 {
1635   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1636   Mat         a = baij->A,b = baij->B;
1637   int         ierr,s1,s2,s3;
1638 
1639   PetscFunctionBegin;
1640   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1641   if (rr) {
1642     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1643     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1644     /* Overlap communication with computation. */
1645     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1646   }
1647   if (ll) {
1648     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1649     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1650     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1651   }
1652   /* scale  the diagonal block */
1653   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1654 
1655   if (rr) {
1656     /* Do a scatter end and then right scale the off-diagonal block */
1657     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1658     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1659   }
1660 
1661   PetscFunctionReturn(0);
1662 }
1663 
1664 #undef __FUNCT__
1665 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1666 int MatZeroRows_MPIBAIJ(Mat A,IS is,PetscScalar *diag)
1667 {
1668   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1669   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
1670   int            *nprocs,j,idx,nsends,row;
1671   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1672   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1673   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1674   MPI_Comm       comm = A->comm;
1675   MPI_Request    *send_waits,*recv_waits;
1676   MPI_Status     recv_status,*send_status;
1677   IS             istmp;
1678   PetscTruth     found;
1679 
1680   PetscFunctionBegin;
1681   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
1682   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1683 
1684   /*  first count number of contributors to each processor */
1685   ierr  = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
1686   ierr  = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
1687   ierr  = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
1688   for (i=0; i<N; i++) {
1689     idx   = rows[i];
1690     found = PETSC_FALSE;
1691     for (j=0; j<size; j++) {
1692       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1693         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
1694       }
1695     }
1696     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1697   }
1698   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1699 
1700   /* inform other processors of number of messages and max length*/
1701   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1702 
1703   /* post receives:   */
1704   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
1705   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1706   for (i=0; i<nrecvs; i++) {
1707     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1708   }
1709 
1710   /* do sends:
1711      1) starts[i] gives the starting index in svalues for stuff going to
1712      the ith processor
1713   */
1714   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
1715   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1716   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
1717   starts[0]  = 0;
1718   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1719   for (i=0; i<N; i++) {
1720     svalues[starts[owner[i]]++] = rows[i];
1721   }
1722   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
1723 
1724   starts[0] = 0;
1725   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1726   count = 0;
1727   for (i=0; i<size; i++) {
1728     if (nprocs[2*i+1]) {
1729       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1730     }
1731   }
1732   ierr = PetscFree(starts);CHKERRQ(ierr);
1733 
1734   base = owners[rank]*bs;
1735 
1736   /*  wait on receives */
1737   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
1738   source = lens + nrecvs;
1739   count  = nrecvs; slen = 0;
1740   while (count) {
1741     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1742     /* unpack receives into our local space */
1743     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1744     source[imdex]  = recv_status.MPI_SOURCE;
1745     lens[imdex]    = n;
1746     slen          += n;
1747     count--;
1748   }
1749   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1750 
1751   /* move the data into the send scatter */
1752   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
1753   count = 0;
1754   for (i=0; i<nrecvs; i++) {
1755     values = rvalues + i*nmax;
1756     for (j=0; j<lens[i]; j++) {
1757       lrows[count++] = values[j] - base;
1758     }
1759   }
1760   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1761   ierr = PetscFree(lens);CHKERRQ(ierr);
1762   ierr = PetscFree(owner);CHKERRQ(ierr);
1763   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1764 
1765   /* actually zap the local rows */
1766   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1767   PetscLogObjectParent(A,istmp);
1768 
1769   /*
1770         Zero the required rows. If the "diagonal block" of the matrix
1771      is square and the user wishes to set the diagonal we use seperate
1772      code so that MatSetValues() is not called for each diagonal allocating
1773      new memory, thus calling lots of mallocs and slowing things down.
1774 
1775        Contributed by: Mathew Knepley
1776   */
1777   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1778   ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr);
1779   if (diag && (l->A->M == l->A->N)) {
1780     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr);
1781   } else if (diag) {
1782     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1783     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1784       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1785 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1786     }
1787     for (i=0; i<slen; i++) {
1788       row  = lrows[i] + rstart_bs;
1789       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1790     }
1791     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1792     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1793   } else {
1794     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1795   }
1796 
1797   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1798   ierr = PetscFree(lrows);CHKERRQ(ierr);
1799 
1800   /* wait on sends */
1801   if (nsends) {
1802     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1803     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1804     ierr = PetscFree(send_status);CHKERRQ(ierr);
1805   }
1806   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1807   ierr = PetscFree(svalues);CHKERRQ(ierr);
1808 
1809   PetscFunctionReturn(0);
1810 }
1811 
1812 #undef __FUNCT__
1813 #define __FUNCT__ "MatPrintHelp_MPIBAIJ"
1814 int MatPrintHelp_MPIBAIJ(Mat A)
1815 {
1816   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1817   MPI_Comm    comm = A->comm;
1818   static int  called = 0;
1819   int         ierr;
1820 
1821   PetscFunctionBegin;
1822   if (!a->rank) {
1823     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1824   }
1825   if (called) {PetscFunctionReturn(0);} else called = 1;
1826   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1827   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1828   PetscFunctionReturn(0);
1829 }
1830 
1831 #undef __FUNCT__
1832 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1833 int MatSetUnfactored_MPIBAIJ(Mat A)
1834 {
1835   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1836   int         ierr;
1837 
1838   PetscFunctionBegin;
1839   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1840   PetscFunctionReturn(0);
1841 }
1842 
1843 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1844 
1845 #undef __FUNCT__
1846 #define __FUNCT__ "MatEqual_MPIBAIJ"
1847 int MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1848 {
1849   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1850   Mat         a,b,c,d;
1851   PetscTruth  flg;
1852   int         ierr;
1853 
1854   PetscFunctionBegin;
1855   a = matA->A; b = matA->B;
1856   c = matB->A; d = matB->B;
1857 
1858   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1859   if (flg == PETSC_TRUE) {
1860     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1861   }
1862   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1863   PetscFunctionReturn(0);
1864 }
1865 
1866 
1867 #undef __FUNCT__
1868 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1869 int MatSetUpPreallocation_MPIBAIJ(Mat A)
1870 {
1871   int        ierr;
1872 
1873   PetscFunctionBegin;
1874   ierr =  MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1875   PetscFunctionReturn(0);
1876 }
1877 
1878 /* -------------------------------------------------------------------*/
1879 static struct _MatOps MatOps_Values = {
1880   MatSetValues_MPIBAIJ,
1881   MatGetRow_MPIBAIJ,
1882   MatRestoreRow_MPIBAIJ,
1883   MatMult_MPIBAIJ,
1884   MatMultAdd_MPIBAIJ,
1885   MatMultTranspose_MPIBAIJ,
1886   MatMultTransposeAdd_MPIBAIJ,
1887   0,
1888   0,
1889   0,
1890   0,
1891   0,
1892   0,
1893   0,
1894   MatTranspose_MPIBAIJ,
1895   MatGetInfo_MPIBAIJ,
1896   MatEqual_MPIBAIJ,
1897   MatGetDiagonal_MPIBAIJ,
1898   MatDiagonalScale_MPIBAIJ,
1899   MatNorm_MPIBAIJ,
1900   MatAssemblyBegin_MPIBAIJ,
1901   MatAssemblyEnd_MPIBAIJ,
1902   0,
1903   MatSetOption_MPIBAIJ,
1904   MatZeroEntries_MPIBAIJ,
1905   MatZeroRows_MPIBAIJ,
1906   0,
1907   0,
1908   0,
1909   0,
1910   MatSetUpPreallocation_MPIBAIJ,
1911   0,
1912   0,
1913   0,
1914   0,
1915   MatDuplicate_MPIBAIJ,
1916   0,
1917   0,
1918   0,
1919   0,
1920   0,
1921   MatGetSubMatrices_MPIBAIJ,
1922   MatIncreaseOverlap_MPIBAIJ,
1923   MatGetValues_MPIBAIJ,
1924   0,
1925   MatPrintHelp_MPIBAIJ,
1926   MatScale_MPIBAIJ,
1927   0,
1928   0,
1929   0,
1930   MatGetBlockSize_MPIBAIJ,
1931   0,
1932   0,
1933   0,
1934   0,
1935   0,
1936   0,
1937   MatSetUnfactored_MPIBAIJ,
1938   0,
1939   MatSetValuesBlocked_MPIBAIJ,
1940   0,
1941   MatDestroy_MPIBAIJ,
1942   MatView_MPIBAIJ,
1943   MatGetPetscMaps_Petsc,
1944   0,
1945   0,
1946   0,
1947   0,
1948   0,
1949   0,
1950   MatGetRowMax_MPIBAIJ};
1951 
1952 
1953 EXTERN_C_BEGIN
1954 #undef __FUNCT__
1955 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
1956 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1957 {
1958   PetscFunctionBegin;
1959   *a      = ((Mat_MPIBAIJ *)A->data)->A;
1960   *iscopy = PETSC_FALSE;
1961   PetscFunctionReturn(0);
1962 }
1963 EXTERN_C_END
1964 
1965 EXTERN_C_BEGIN
1966 #undef __FUNCT__
1967 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
1968 int MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
1969 {
1970   Mat_MPIBAIJ  *b;
1971   int          ierr,i;
1972 
1973   PetscFunctionBegin;
1974   B->preallocated = PETSC_TRUE;
1975   ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1976 
1977   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1978   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1979   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1980   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1981   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1982   if (d_nnz) {
1983   for (i=0; i<B->m/bs; i++) {
1984       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
1985     }
1986   }
1987   if (o_nnz) {
1988     for (i=0; i<B->m/bs; i++) {
1989       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
1990     }
1991   }
1992 
1993   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1994   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1995   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1996   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1997 
1998   b = (Mat_MPIBAIJ*)B->data;
1999   b->bs  = bs;
2000   b->bs2 = bs*bs;
2001   b->mbs = B->m/bs;
2002   b->nbs = B->n/bs;
2003   b->Mbs = B->M/bs;
2004   b->Nbs = B->N/bs;
2005 
2006   ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
2007   b->rowners[0]    = 0;
2008   for (i=2; i<=b->size; i++) {
2009     b->rowners[i] += b->rowners[i-1];
2010   }
2011   b->rstart    = b->rowners[b->rank];
2012   b->rend      = b->rowners[b->rank+1];
2013 
2014   ierr = MPI_Allgather(&b->nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
2015   b->cowners[0] = 0;
2016   for (i=2; i<=b->size; i++) {
2017     b->cowners[i] += b->cowners[i-1];
2018   }
2019   b->cstart    = b->cowners[b->rank];
2020   b->cend      = b->cowners[b->rank+1];
2021 
2022   for (i=0; i<=b->size; i++) {
2023     b->rowners_bs[i] = b->rowners[i]*bs;
2024   }
2025   b->rstart_bs = b->rstart*bs;
2026   b->rend_bs   = b->rend*bs;
2027   b->cstart_bs = b->cstart*bs;
2028   b->cend_bs   = b->cend*bs;
2029 
2030   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2031   PetscLogObjectParent(B,b->A);
2032   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2033   PetscLogObjectParent(B,b->B);
2034   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
2035 
2036   PetscFunctionReturn(0);
2037 }
2038 EXTERN_C_END
2039 
2040 EXTERN_C_BEGIN
2041 EXTERN int MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2042 EXTERN int MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2043 EXTERN_C_END
2044 
2045 EXTERN_C_BEGIN
2046 #undef __FUNCT__
2047 #define __FUNCT__ "MatCreate_MPIBAIJ"
2048 int MatCreate_MPIBAIJ(Mat B)
2049 {
2050   Mat_MPIBAIJ  *b;
2051   int          ierr;
2052   PetscTruth   flg;
2053 
2054   PetscFunctionBegin;
2055 
2056   ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr);
2057   B->data = (void*)b;
2058 
2059   ierr    = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
2060   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2061   B->mapping    = 0;
2062   B->factor     = 0;
2063   B->assembled  = PETSC_FALSE;
2064 
2065   B->insertmode = NOT_SET_VALUES;
2066   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
2067   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
2068 
2069   /* build local table of row and column ownerships */
2070   ierr          = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
2071   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2072   b->cowners    = b->rowners + b->size + 2;
2073   b->rowners_bs = b->cowners + b->size + 2;
2074 
2075   /* build cache for off array entries formed */
2076   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
2077   b->donotstash  = PETSC_FALSE;
2078   b->colmap      = PETSC_NULL;
2079   b->garray      = PETSC_NULL;
2080   b->roworiented = PETSC_TRUE;
2081 
2082 #if defined(PETSC_USE_MAT_SINGLE)
2083   /* stuff for MatSetValues_XXX in single precision */
2084   b->setvalueslen     = 0;
2085   b->setvaluescopy    = PETSC_NULL;
2086 #endif
2087 
2088   /* stuff used in block assembly */
2089   b->barray       = 0;
2090 
2091   /* stuff used for matrix vector multiply */
2092   b->lvec         = 0;
2093   b->Mvctx        = 0;
2094 
2095   /* stuff for MatGetRow() */
2096   b->rowindices   = 0;
2097   b->rowvalues    = 0;
2098   b->getrowactive = PETSC_FALSE;
2099 
2100   /* hash table stuff */
2101   b->ht           = 0;
2102   b->hd           = 0;
2103   b->ht_size      = 0;
2104   b->ht_flag      = PETSC_FALSE;
2105   b->ht_fact      = 0;
2106   b->ht_total_ct  = 0;
2107   b->ht_insert_ct = 0;
2108 
2109   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2110   if (flg) {
2111     PetscReal fact = 1.39;
2112     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2113     ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2114     if (fact <= 1.0) fact = 1.39;
2115     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2116     PetscLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2117   }
2118   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2119                                      "MatStoreValues_MPIBAIJ",
2120                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2121   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2122                                      "MatRetrieveValues_MPIBAIJ",
2123                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2124   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2125                                      "MatGetDiagonalBlock_MPIBAIJ",
2126                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2127   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
2128                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
2129                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2130   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
2131                                      "MatDiagonalScaleLocal_MPIBAIJ",
2132                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
2133   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
2134                                      "MatSetHashTableFactor_MPIBAIJ",
2135                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2136   PetscFunctionReturn(0);
2137 }
2138 EXTERN_C_END
2139 
2140 #undef __FUNCT__
2141 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
2142 /*@C
2143    MatMPIBAIJSetPreallocation - Creates a sparse parallel matrix in block AIJ format
2144    (block compressed row).  For good matrix assembly performance
2145    the user should preallocate the matrix storage by setting the parameters
2146    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2147    performance can be increased by more than a factor of 50.
2148 
2149    Collective on Mat
2150 
2151    Input Parameters:
2152 +  A - the matrix
2153 .  bs   - size of blockk
2154 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2155            submatrix  (same for all local rows)
2156 .  d_nnz - array containing the number of block nonzeros in the various block rows
2157            of the in diagonal portion of the local (possibly different for each block
2158            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2159 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2160            submatrix (same for all local rows).
2161 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2162            off-diagonal portion of the local submatrix (possibly different for
2163            each block row) or PETSC_NULL.
2164 
2165    Output Parameter:
2166 
2167 
2168    Options Database Keys:
2169 .   -mat_no_unroll - uses code that does not unroll the loops in the
2170                      block calculations (much slower)
2171 .   -mat_block_size - size of the blocks to use
2172 
2173    Notes:
2174    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2175    than it must be used on all processors that share the object for that argument.
2176 
2177    Storage Information:
2178    For a square global matrix we define each processor's diagonal portion
2179    to be its local rows and the corresponding columns (a square submatrix);
2180    each processor's off-diagonal portion encompasses the remainder of the
2181    local matrix (a rectangular submatrix).
2182 
2183    The user can specify preallocated storage for the diagonal part of
2184    the local submatrix with either d_nz or d_nnz (not both).  Set
2185    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2186    memory allocation.  Likewise, specify preallocated storage for the
2187    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2188 
2189    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2190    the figure below we depict these three local rows and all columns (0-11).
2191 
2192 .vb
2193            0 1 2 3 4 5 6 7 8 9 10 11
2194           -------------------
2195    row 3  |  o o o d d d o o o o o o
2196    row 4  |  o o o d d d o o o o o o
2197    row 5  |  o o o d d d o o o o o o
2198           -------------------
2199 .ve
2200 
2201    Thus, any entries in the d locations are stored in the d (diagonal)
2202    submatrix, and any entries in the o locations are stored in the
2203    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2204    stored simply in the MATSEQBAIJ format for compressed row storage.
2205 
2206    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2207    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2208    In general, for PDE problems in which most nonzeros are near the diagonal,
2209    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2210    or you will get TERRIBLE performance; see the users' manual chapter on
2211    matrices.
2212 
2213    Level: intermediate
2214 
2215 .keywords: matrix, block, aij, compressed row, sparse, parallel
2216 
2217 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2218 @*/
2219 int MatMPIBAIJSetPreallocation(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2220 {
2221   int ierr,(*f)(Mat,int,int,int*,int,int*);
2222 
2223   PetscFunctionBegin;
2224   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2225   if (f) {
2226     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2227   }
2228   PetscFunctionReturn(0);
2229 }
2230 
2231 #undef __FUNCT__
2232 #define __FUNCT__ "MatCreateMPIBAIJ"
2233 /*@C
2234    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
2235    (block compressed row).  For good matrix assembly performance
2236    the user should preallocate the matrix storage by setting the parameters
2237    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2238    performance can be increased by more than a factor of 50.
2239 
2240    Collective on MPI_Comm
2241 
2242    Input Parameters:
2243 +  comm - MPI communicator
2244 .  bs   - size of blockk
2245 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2246            This value should be the same as the local size used in creating the
2247            y vector for the matrix-vector product y = Ax.
2248 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2249            This value should be the same as the local size used in creating the
2250            x vector for the matrix-vector product y = Ax.
2251 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2252 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2253 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
2254            submatrix  (same for all local rows)
2255 .  d_nnz - array containing the number of nonzero blocks in the various block rows
2256            of the in diagonal portion of the local (possibly different for each block
2257            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2258 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
2259            submatrix (same for all local rows).
2260 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
2261            off-diagonal portion of the local submatrix (possibly different for
2262            each block row) or PETSC_NULL.
2263 
2264    Output Parameter:
2265 .  A - the matrix
2266 
2267    Options Database Keys:
2268 .   -mat_no_unroll - uses code that does not unroll the loops in the
2269                      block calculations (much slower)
2270 .   -mat_block_size - size of the blocks to use
2271 
2272    Notes:
2273    A nonzero block is any block that as 1 or more nonzeros in it
2274 
2275    The user MUST specify either the local or global matrix dimensions
2276    (possibly both).
2277 
2278    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2279    than it must be used on all processors that share the object for that argument.
2280 
2281    Storage Information:
2282    For a square global matrix we define each processor's diagonal portion
2283    to be its local rows and the corresponding columns (a square submatrix);
2284    each processor's off-diagonal portion encompasses the remainder of the
2285    local matrix (a rectangular submatrix).
2286 
2287    The user can specify preallocated storage for the diagonal part of
2288    the local submatrix with either d_nz or d_nnz (not both).  Set
2289    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2290    memory allocation.  Likewise, specify preallocated storage for the
2291    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2292 
2293    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2294    the figure below we depict these three local rows and all columns (0-11).
2295 
2296 .vb
2297            0 1 2 3 4 5 6 7 8 9 10 11
2298           -------------------
2299    row 3  |  o o o d d d o o o o o o
2300    row 4  |  o o o d d d o o o o o o
2301    row 5  |  o o o d d d o o o o o o
2302           -------------------
2303 .ve
2304 
2305    Thus, any entries in the d locations are stored in the d (diagonal)
2306    submatrix, and any entries in the o locations are stored in the
2307    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2308    stored simply in the MATSEQBAIJ format for compressed row storage.
2309 
2310    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2311    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2312    In general, for PDE problems in which most nonzeros are near the diagonal,
2313    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2314    or you will get TERRIBLE performance; see the users' manual chapter on
2315    matrices.
2316 
2317    Level: intermediate
2318 
2319 .keywords: matrix, block, aij, compressed row, sparse, parallel
2320 
2321 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2322 @*/
2323 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2324 {
2325   int ierr,size;
2326 
2327   PetscFunctionBegin;
2328   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2329   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2330   if (size > 1) {
2331     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
2332     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2333   } else {
2334     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2335     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2336   }
2337   PetscFunctionReturn(0);
2338 }
2339 
2340 #undef __FUNCT__
2341 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
2342 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2343 {
2344   Mat         mat;
2345   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2346   int         ierr,len=0;
2347 
2348   PetscFunctionBegin;
2349   *newmat       = 0;
2350   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2351   ierr = MatSetType(mat,MATMPIBAIJ);CHKERRQ(ierr);
2352   mat->preallocated = PETSC_TRUE;
2353   mat->assembled    = PETSC_TRUE;
2354   a      = (Mat_MPIBAIJ*)mat->data;
2355   a->bs  = oldmat->bs;
2356   a->bs2 = oldmat->bs2;
2357   a->mbs = oldmat->mbs;
2358   a->nbs = oldmat->nbs;
2359   a->Mbs = oldmat->Mbs;
2360   a->Nbs = oldmat->Nbs;
2361 
2362   a->rstart       = oldmat->rstart;
2363   a->rend         = oldmat->rend;
2364   a->cstart       = oldmat->cstart;
2365   a->cend         = oldmat->cend;
2366   a->size         = oldmat->size;
2367   a->rank         = oldmat->rank;
2368   a->donotstash   = oldmat->donotstash;
2369   a->roworiented  = oldmat->roworiented;
2370   a->rowindices   = 0;
2371   a->rowvalues    = 0;
2372   a->getrowactive = PETSC_FALSE;
2373   a->barray       = 0;
2374   a->rstart_bs    = oldmat->rstart_bs;
2375   a->rend_bs      = oldmat->rend_bs;
2376   a->cstart_bs    = oldmat->cstart_bs;
2377   a->cend_bs      = oldmat->cend_bs;
2378 
2379   /* hash table stuff */
2380   a->ht           = 0;
2381   a->hd           = 0;
2382   a->ht_size      = 0;
2383   a->ht_flag      = oldmat->ht_flag;
2384   a->ht_fact      = oldmat->ht_fact;
2385   a->ht_total_ct  = 0;
2386   a->ht_insert_ct = 0;
2387 
2388   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2389   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2390   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2391   if (oldmat->colmap) {
2392 #if defined (PETSC_USE_CTABLE)
2393   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2394 #else
2395   ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr);
2396   PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2397   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2398 #endif
2399   } else a->colmap = 0;
2400   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2401     ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr);
2402     PetscLogObjectMemory(mat,len*sizeof(int));
2403     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2404   } else a->garray = 0;
2405 
2406   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2407   PetscLogObjectParent(mat,a->lvec);
2408   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2409 
2410   PetscLogObjectParent(mat,a->Mvctx);
2411   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2412   PetscLogObjectParent(mat,a->A);
2413   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2414   PetscLogObjectParent(mat,a->B);
2415   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2416   *newmat = mat;
2417   PetscFunctionReturn(0);
2418 }
2419 
2420 #include "petscsys.h"
2421 
2422 EXTERN_C_BEGIN
2423 #undef __FUNCT__
2424 #define __FUNCT__ "MatLoad_MPIBAIJ"
2425 int MatLoad_MPIBAIJ(PetscViewer viewer,MatType type,Mat *newmat)
2426 {
2427   Mat          A;
2428   int          i,nz,ierr,j,rstart,rend,fd;
2429   PetscScalar  *vals,*buf;
2430   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2431   MPI_Status   status;
2432   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2433   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2434   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2435   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2436   int          dcount,kmax,k,nzcount,tmp;
2437 #if defined(PETSC_HAVE_DSCPACK)
2438   PetscTruth   flag;
2439 #endif
2440 
2441   PetscFunctionBegin;
2442   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2443 
2444   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2445   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2446   if (!rank) {
2447     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2448     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2449     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2450     if (header[3] < 0) {
2451       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPIBAIJ");
2452     }
2453   }
2454 
2455   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2456   M = header[1]; N = header[2];
2457 
2458   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2459 
2460   /*
2461      This code adds extra rows to make sure the number of rows is
2462      divisible by the blocksize
2463   */
2464   Mbs        = M/bs;
2465   extra_rows = bs - M + bs*(Mbs);
2466   if (extra_rows == bs) extra_rows = 0;
2467   else                  Mbs++;
2468   if (extra_rows &&!rank) {
2469     PetscLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
2470   }
2471 
2472   /* determine ownership of all rows */
2473   mbs        = Mbs/size + ((Mbs % size) > rank);
2474   m          = mbs*bs;
2475   ierr       = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
2476   browners   = rowners + size + 1;
2477   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2478   rowners[0] = 0;
2479   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2480   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2481   rstart = rowners[rank];
2482   rend   = rowners[rank+1];
2483 
2484   /* distribute row lengths to all processors */
2485   ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr);
2486   if (!rank) {
2487     ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
2488     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2489     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2490     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
2491     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2492     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2493     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2494   } else {
2495     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2496   }
2497 
2498   if (!rank) {
2499     /* calculate the number of nonzeros on each processor */
2500     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
2501     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2502     for (i=0; i<size; i++) {
2503       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2504         procsnz[i] += rowlengths[j];
2505       }
2506     }
2507     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2508 
2509     /* determine max buffer needed and allocate it */
2510     maxnz = 0;
2511     for (i=0; i<size; i++) {
2512       maxnz = PetscMax(maxnz,procsnz[i]);
2513     }
2514     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
2515 
2516     /* read in my part of the matrix column indices  */
2517     nz     = procsnz[0];
2518     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2519     mycols = ibuf;
2520     if (size == 1)  nz -= extra_rows;
2521     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2522     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2523 
2524     /* read in every ones (except the last) and ship off */
2525     for (i=1; i<size-1; i++) {
2526       nz   = procsnz[i];
2527       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2528       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2529     }
2530     /* read in the stuff for the last proc */
2531     if (size != 1) {
2532       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2533       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2534       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2535       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2536     }
2537     ierr = PetscFree(cols);CHKERRQ(ierr);
2538   } else {
2539     /* determine buffer space needed for message */
2540     nz = 0;
2541     for (i=0; i<m; i++) {
2542       nz += locrowlens[i];
2543     }
2544     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2545     mycols = ibuf;
2546     /* receive message of column indices*/
2547     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2548     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2549     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2550   }
2551 
2552   /* loop over local rows, determining number of off diagonal entries */
2553   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2554   odlens   = dlens + (rend-rstart);
2555   ierr     = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr);
2556   ierr     = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2557   masked1  = mask    + Mbs;
2558   masked2  = masked1 + Mbs;
2559   rowcount = 0; nzcount = 0;
2560   for (i=0; i<mbs; i++) {
2561     dcount  = 0;
2562     odcount = 0;
2563     for (j=0; j<bs; j++) {
2564       kmax = locrowlens[rowcount];
2565       for (k=0; k<kmax; k++) {
2566         tmp = mycols[nzcount++]/bs;
2567         if (!mask[tmp]) {
2568           mask[tmp] = 1;
2569           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2570           else masked1[dcount++] = tmp;
2571         }
2572       }
2573       rowcount++;
2574     }
2575 
2576     dlens[i]  = dcount;
2577     odlens[i] = odcount;
2578 
2579     /* zero out the mask elements we set */
2580     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2581     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2582   }
2583 
2584   /* create our matrix */
2585   ierr = MatCreateMPIBAIJ(comm,bs,m,m,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr);
2586   A = *newmat;
2587   MatSetOption(A,MAT_COLUMNS_SORTED);
2588 
2589   if (!rank) {
2590     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2591     /* read in my part of the matrix numerical values  */
2592     nz = procsnz[0];
2593     vals = buf;
2594     mycols = ibuf;
2595     if (size == 1)  nz -= extra_rows;
2596     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2597     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2598 
2599     /* insert into matrix */
2600     jj      = rstart*bs;
2601     for (i=0; i<m; i++) {
2602       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2603       mycols += locrowlens[i];
2604       vals   += locrowlens[i];
2605       jj++;
2606     }
2607     /* read in other processors (except the last one) and ship out */
2608     for (i=1; i<size-1; i++) {
2609       nz   = procsnz[i];
2610       vals = buf;
2611       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2612       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2613     }
2614     /* the last proc */
2615     if (size != 1){
2616       nz   = procsnz[i] - extra_rows;
2617       vals = buf;
2618       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2619       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2620       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2621     }
2622     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2623   } else {
2624     /* receive numeric values */
2625     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2626 
2627     /* receive message of values*/
2628     vals   = buf;
2629     mycols = ibuf;
2630     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2631     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2632     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2633 
2634     /* insert into matrix */
2635     jj      = rstart*bs;
2636     for (i=0; i<m; i++) {
2637       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2638       mycols += locrowlens[i];
2639       vals   += locrowlens[i];
2640       jj++;
2641     }
2642   }
2643   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2644   ierr = PetscFree(buf);CHKERRQ(ierr);
2645   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2646   ierr = PetscFree(rowners);CHKERRQ(ierr);
2647   ierr = PetscFree(dlens);CHKERRQ(ierr);
2648   ierr = PetscFree(mask);CHKERRQ(ierr);
2649   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2650   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2651 #if defined(PETSC_HAVE_DSCPACK)
2652   ierr = PetscOptionsHasName(A->prefix,"-mat_baij_dscpack",&flag);CHKERRQ(ierr);
2653   if (flag) { ierr = MatUseDSCPACK_MPIBAIJ(A);CHKERRQ(ierr); }
2654 #endif
2655   PetscFunctionReturn(0);
2656 }
2657 EXTERN_C_END
2658 
2659 #undef __FUNCT__
2660 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
2661 /*@
2662    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2663 
2664    Input Parameters:
2665 .  mat  - the matrix
2666 .  fact - factor
2667 
2668    Collective on Mat
2669 
2670    Level: advanced
2671 
2672   Notes:
2673    This can also be set by the command line option: -mat_use_hash_table fact
2674 
2675 .keywords: matrix, hashtable, factor, HT
2676 
2677 .seealso: MatSetOption()
2678 @*/
2679 int MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2680 {
2681   int ierr,(*f)(Mat,PetscReal);
2682 
2683   PetscFunctionBegin;
2684   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
2685   if (f) {
2686     ierr = (*f)(mat,fact);CHKERRQ(ierr);
2687   }
2688   PetscFunctionReturn(0);
2689 }
2690 
2691 #undef __FUNCT__
2692 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
2693 int MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
2694 {
2695   Mat_MPIBAIJ *baij;
2696 
2697   PetscFunctionBegin;
2698   PetscValidHeaderSpecific(mat,MAT_COOKIE);
2699   baij = (Mat_MPIBAIJ*)mat->data;
2700   baij->ht_fact = fact;
2701   PetscFunctionReturn(0);
2702 }
2703 
2704 #undef __FUNCT__
2705 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
2706 int MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,int **colmap)
2707 {
2708   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2709   PetscFunctionBegin;
2710   *Ad     = a->A;
2711   *Ao     = a->B;
2712   *colmap = a->garray;
2713   PetscFunctionReturn(0);
2714 }
2715