xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 7843d17afa8c1ec0c9a03b3c2ea6d3e3db42e252)
1 /*$Id: mpibaij.c,v 1.203 2000/09/28 21:11:32 bsmith Exp bsmith $*/
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4 #include "src/vec/vecimpl.h"
5 
6 EXTERN int MatSetUpMultiply_MPIBAIJ(Mat);
7 EXTERN int DisAssemble_MPIBAIJ(Mat);
8 EXTERN int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
9 EXTERN int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
10 EXTERN int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *);
11 EXTERN int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode);
12 EXTERN int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
13 EXTERN int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
14 EXTERN int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
15 EXTERN int MatPrintHelp_SeqBAIJ(Mat);
16 EXTERN int MatZeroRows_SeqBAIJ(Mat,IS,Scalar*);
17 
18 /*  UGLY, ugly, ugly
19    When MatScalar == Scalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
20    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
21    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
22    converts the entries into single precision and then calls ..._MatScalar() to put them
23    into the single precision data structures.
24 */
25 #if defined(PETSC_USE_MAT_SINGLE)
26 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
27 EXTERN int MatSetValues_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
28 EXTERN int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
29 EXTERN int MatSetValues_MPIBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
30 EXTERN int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
31 #else
32 #define MatSetValuesBlocked_SeqBAIJ_MatScalar      MatSetValuesBlocked_SeqBAIJ
33 #define MatSetValues_MPIBAIJ_MatScalar             MatSetValues_MPIBAIJ
34 #define MatSetValuesBlocked_MPIBAIJ_MatScalar      MatSetValuesBlocked_MPIBAIJ
35 #define MatSetValues_MPIBAIJ_HT_MatScalar          MatSetValues_MPIBAIJ_HT
36 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar   MatSetValuesBlocked_MPIBAIJ_HT
37 #endif
38 
39 #undef __FUNC__
40 #define __FUNC__ /*<a name=""></a>*/"MatGetRowMax_MPIBAIJ"
41 int MatGetRowMax_MPIBAIJ(Mat A,Vec v)
42 {
43   Mat_MPIBAIJ  *a = (Mat_MPIBAIJ*)A->data;
44   int          ierr,i;
45   PetscReal    *va,*vb;
46   Vec          vtmp;
47 
48   PetscFunctionBegin;
49 
50   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
51   ierr = VecGetArry(v,&va);CHKERRQ(ierr);
52 
53   ierr = VecCreateSeq(PETSC_COMM_SELF,a->m,&vtmp);CHKERRA(ierr);
54   ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr);
55   ierr = VecGetArry(vtmp,&vb);CHKERRQ(ierr);
56 
57   for (i=0; i<a->m; i++){
58     if (va[i] < vb[i]) va[i] = vb[i];
59   }
60 
61   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
62   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
63   ierr = VecDestroy(vtmp);CHKERRA(ierr);
64 
65   PetscFunctionReturn(0);
66 }
67 
68 EXTERN_C_BEGIN
69 #undef __FUNC__
70 #define __FUNC__ /*<a name="MatStoreValues_MPIBAIJ"></a>*/"MatStoreValues_MPIBAIJ"
71 int MatStoreValues_MPIBAIJ(Mat mat)
72 {
73   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
74   int         ierr;
75 
76   PetscFunctionBegin;
77   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
78   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
79   PetscFunctionReturn(0);
80 }
81 EXTERN_C_END
82 
83 EXTERN_C_BEGIN
84 #undef __FUNC__
85 #define __FUNC__ /*<a name="MatRetrieveValues_MPIBAIJ"></a>*/"MatRetrieveValues_MPIBAIJ"
86 int MatRetrieveValues_MPIBAIJ(Mat mat)
87 {
88   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
89   int         ierr;
90 
91   PetscFunctionBegin;
92   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
93   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
94   PetscFunctionReturn(0);
95 }
96 EXTERN_C_END
97 
98 /*
99      Local utility routine that creates a mapping from the global column
100    number to the local number in the off-diagonal part of the local
101    storage of the matrix.  This is done in a non scable way since the
102    length of colmap equals the global matrix length.
103 */
104 #undef __FUNC__
105 #define __FUNC__ /*<a name="CreateColmap_MPIBAIJ_Private"></a>*/"CreateColmap_MPIBAIJ_Private"
106 static int CreateColmap_MPIBAIJ_Private(Mat mat)
107 {
108   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
109   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data;
110   int         nbs = B->nbs,i,bs=B->bs,ierr;
111 
112   PetscFunctionBegin;
113 #if defined (PETSC_USE_CTABLE)
114   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
115   for (i=0; i<nbs; i++){
116     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
117   }
118 #else
119   baij->colmap = (int*)PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
120   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
121   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr);
122   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
123 #endif
124   PetscFunctionReturn(0);
125 }
126 
127 #define CHUNKSIZE  10
128 
129 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
130 { \
131  \
132     brow = row/bs;  \
133     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
134     rmax = aimax[brow]; nrow = ailen[brow]; \
135       bcol = col/bs; \
136       ridx = row % bs; cidx = col % bs; \
137       low = 0; high = nrow; \
138       while (high-low > 3) { \
139         t = (low+high)/2; \
140         if (rp[t] > bcol) high = t; \
141         else              low  = t; \
142       } \
143       for (_i=low; _i<high; _i++) { \
144         if (rp[_i] > bcol) break; \
145         if (rp[_i] == bcol) { \
146           bap  = ap +  bs2*_i + bs*cidx + ridx; \
147           if (addv == ADD_VALUES) *bap += value;  \
148           else                    *bap  = value;  \
149           goto a_noinsert; \
150         } \
151       } \
152       if (a->nonew == 1) goto a_noinsert; \
153       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
154       if (nrow >= rmax) { \
155         /* there is no extra room in row, therefore enlarge */ \
156         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
157         MatScalar *new_a; \
158  \
159         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
160  \
161         /* malloc new storage space */ \
162         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
163         new_a   = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \
164         new_j   = (int*)(new_a + bs2*new_nz); \
165         new_i   = new_j + new_nz; \
166  \
167         /* copy over old data into new slots */ \
168         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
169         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
170         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
171         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
172         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
173         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
174         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \
175         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
176                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
177         /* free up old matrix storage */ \
178         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
179         if (!a->singlemalloc) { \
180           ierr = PetscFree(a->i);CHKERRQ(ierr); \
181           ierr = PetscFree(a->j);CHKERRQ(ierr);\
182         } \
183         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
184         a->singlemalloc = PETSC_TRUE; \
185  \
186         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
187         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
188         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
189         a->maxnz += bs2*CHUNKSIZE; \
190         a->reallocs++; \
191         a->nz++; \
192       } \
193       N = nrow++ - 1;  \
194       /* shift up all the later entries in this row */ \
195       for (ii=N; ii>=_i; ii--) { \
196         rp[ii+1] = rp[ii]; \
197         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
198       } \
199       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
200       rp[_i]                      = bcol;  \
201       ap[bs2*_i + bs*cidx + ridx] = value;  \
202       a_noinsert:; \
203     ailen[brow] = nrow; \
204 }
205 
206 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
207 { \
208     brow = row/bs;  \
209     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
210     rmax = bimax[brow]; nrow = bilen[brow]; \
211       bcol = col/bs; \
212       ridx = row % bs; cidx = col % bs; \
213       low = 0; high = nrow; \
214       while (high-low > 3) { \
215         t = (low+high)/2; \
216         if (rp[t] > bcol) high = t; \
217         else              low  = t; \
218       } \
219       for (_i=low; _i<high; _i++) { \
220         if (rp[_i] > bcol) break; \
221         if (rp[_i] == bcol) { \
222           bap  = ap +  bs2*_i + bs*cidx + ridx; \
223           if (addv == ADD_VALUES) *bap += value;  \
224           else                    *bap  = value;  \
225           goto b_noinsert; \
226         } \
227       } \
228       if (b->nonew == 1) goto b_noinsert; \
229       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
230       if (nrow >= rmax) { \
231         /* there is no extra room in row, therefore enlarge */ \
232         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
233         MatScalar *new_a; \
234  \
235         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
236  \
237         /* malloc new storage space */ \
238         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
239         new_a   = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \
240         new_j   = (int*)(new_a + bs2*new_nz); \
241         new_i   = new_j + new_nz; \
242  \
243         /* copy over old data into new slots */ \
244         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
245         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
246         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
247         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
248         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
249         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
250         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
251         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
252                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
253         /* free up old matrix storage */ \
254         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
255         if (!b->singlemalloc) { \
256           ierr = PetscFree(b->i);CHKERRQ(ierr); \
257           ierr = PetscFree(b->j);CHKERRQ(ierr); \
258         } \
259         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
260         b->singlemalloc = PETSC_TRUE; \
261  \
262         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
263         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
264         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
265         b->maxnz += bs2*CHUNKSIZE; \
266         b->reallocs++; \
267         b->nz++; \
268       } \
269       N = nrow++ - 1;  \
270       /* shift up all the later entries in this row */ \
271       for (ii=N; ii>=_i; ii--) { \
272         rp[ii+1] = rp[ii]; \
273         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
274       } \
275       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
276       rp[_i]                      = bcol;  \
277       ap[bs2*_i + bs*cidx + ridx] = value;  \
278       b_noinsert:; \
279     bilen[brow] = nrow; \
280 }
281 
282 #if defined(PETSC_USE_MAT_SINGLE)
283 #undef __FUNC__
284 #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ"></a>*/"MatSetValues_MPIBAIJ"
285 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
286 {
287   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
288   int         ierr,i,N = m*n;
289   MatScalar   *vsingle;
290 
291   PetscFunctionBegin;
292   if (N > b->setvalueslen) {
293     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
294     b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy);
295     b->setvalueslen  = N;
296   }
297   vsingle = b->setvaluescopy;
298 
299   for (i=0; i<N; i++) {
300     vsingle[i] = v[i];
301   }
302   ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
303   PetscFunctionReturn(0);
304 }
305 
306 #undef __FUNC__
307 #define __FUNC__ /*<a name="MatSetValuesBlocked_MPIBAIJ"></a>*/"MatSetValuesBlocked_MPIBAIJ"
308 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
309 {
310   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
311   int         ierr,i,N = m*n*b->bs2;
312   MatScalar   *vsingle;
313 
314   PetscFunctionBegin;
315   if (N > b->setvalueslen) {
316     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
317     b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy);
318     b->setvalueslen  = N;
319   }
320   vsingle = b->setvaluescopy;
321   for (i=0; i<N; i++) {
322     vsingle[i] = v[i];
323   }
324   ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
325   PetscFunctionReturn(0);
326 }
327 
328 #undef __FUNC__
329 #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ_HT"></a>*/"MatSetValues_MPIBAIJ_HT"
330 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
331 {
332   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
333   int         ierr,i,N = m*n;
334   MatScalar   *vsingle;
335 
336   PetscFunctionBegin;
337   if (N > b->setvalueslen) {
338     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
339     b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy);
340     b->setvalueslen  = N;
341   }
342   vsingle = b->setvaluescopy;
343   for (i=0; i<N; i++) {
344     vsingle[i] = v[i];
345   }
346   ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
347   PetscFunctionReturn(0);
348 }
349 
350 #undef __FUNC__
351 #define __FUNC__ /*<a name="MatSetValuesBlocked_MPIBAIJ_HT"></a>*/"MatSetValuesBlocked_MPIBAIJ_HT"
352 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
353 {
354   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
355   int         ierr,i,N = m*n*b->bs2;
356   MatScalar   *vsingle;
357 
358   PetscFunctionBegin;
359   if (N > b->setvalueslen) {
360     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
361     b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy);
362     b->setvalueslen  = N;
363   }
364   vsingle = b->setvaluescopy;
365   for (i=0; i<N; i++) {
366     vsingle[i] = v[i];
367   }
368   ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
369   PetscFunctionReturn(0);
370 }
371 #endif
372 
373 #undef __FUNC__
374 #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ"></a>*/"MatSetValues_MPIBAIJ"
375 int MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
376 {
377   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
378   MatScalar   value;
379   int         ierr,i,j,row,col;
380   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
381   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
382   int         cend_orig=baij->cend_bs,bs=baij->bs;
383 
384   /* Some Variables required in the macro */
385   Mat         A = baij->A;
386   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data;
387   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
388   MatScalar   *aa=a->a;
389 
390   Mat         B = baij->B;
391   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data;
392   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
393   MatScalar   *ba=b->a;
394 
395   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
396   int         low,high,t,ridx,cidx,bs2=a->bs2;
397   MatScalar   *ap,*bap;
398 
399   PetscFunctionBegin;
400   for (i=0; i<m; i++) {
401     if (im[i] < 0) continue;
402 #if defined(PETSC_USE_BOPT_g)
403     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
404 #endif
405     if (im[i] >= rstart_orig && im[i] < rend_orig) {
406       row = im[i] - rstart_orig;
407       for (j=0; j<n; j++) {
408         if (in[j] >= cstart_orig && in[j] < cend_orig){
409           col = in[j] - cstart_orig;
410           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
411           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
412           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
413         } else if (in[j] < 0) continue;
414 #if defined(PETSC_USE_BOPT_g)
415         else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Col too large");}
416 #endif
417         else {
418           if (mat->was_assembled) {
419             if (!baij->colmap) {
420               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
421             }
422 #if defined (PETSC_USE_CTABLE)
423             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
424             col  = col - 1 + in[j]%bs;
425 #else
426             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
427 #endif
428             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
429               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
430               col =  in[j];
431               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
432               B = baij->B;
433               b = (Mat_SeqBAIJ*)(B)->data;
434               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
435               ba=b->a;
436             }
437           } else col = in[j];
438           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
439           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
440           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
441         }
442       }
443     } else {
444       if (!baij->donotstash) {
445         if (roworiented) {
446           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
447         } else {
448           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
449         }
450       }
451     }
452   }
453   PetscFunctionReturn(0);
454 }
455 
456 #undef __FUNC__
457 #define __FUNC__ /*<a name="MatSetValuesBlocked_MPIBAIJ"></a>*/"MatSetValuesBlocked_MPIBAIJ"
458 int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
459 {
460   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
461   MatScalar   *value,*barray=baij->barray;
462   int         ierr,i,j,ii,jj,row,col;
463   int         roworiented = baij->roworiented,rstart=baij->rstart ;
464   int         rend=baij->rend,cstart=baij->cstart,stepval;
465   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
466 
467   PetscFunctionBegin;
468   if(!barray) {
469     baij->barray = barray = (MatScalar*)PetscMalloc(bs2*sizeof(MatScalar));CHKPTRQ(barray);
470   }
471 
472   if (roworiented) {
473     stepval = (n-1)*bs;
474   } else {
475     stepval = (m-1)*bs;
476   }
477   for (i=0; i<m; i++) {
478     if (im[i] < 0) continue;
479 #if defined(PETSC_USE_BOPT_g)
480     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs);
481 #endif
482     if (im[i] >= rstart && im[i] < rend) {
483       row = im[i] - rstart;
484       for (j=0; j<n; j++) {
485         /* If NumCol = 1 then a copy is not required */
486         if ((roworiented) && (n == 1)) {
487           barray = v + i*bs2;
488         } else if((!roworiented) && (m == 1)) {
489           barray = v + j*bs2;
490         } else { /* Here a copy is required */
491           if (roworiented) {
492             value = v + i*(stepval+bs)*bs + j*bs;
493           } else {
494             value = v + j*(stepval+bs)*bs + i*bs;
495           }
496           for (ii=0; ii<bs; ii++,value+=stepval) {
497             for (jj=0; jj<bs; jj++) {
498               *barray++  = *value++;
499             }
500           }
501           barray -=bs2;
502         }
503 
504         if (in[j] >= cstart && in[j] < cend){
505           col  = in[j] - cstart;
506           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
507         }
508         else if (in[j] < 0) continue;
509 #if defined(PETSC_USE_BOPT_g)
510         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs);}
511 #endif
512         else {
513           if (mat->was_assembled) {
514             if (!baij->colmap) {
515               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
516             }
517 
518 #if defined(PETSC_USE_BOPT_g)
519 #if defined (PETSC_USE_CTABLE)
520             { int data;
521               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
522               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
523             }
524 #else
525             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
526 #endif
527 #endif
528 #if defined (PETSC_USE_CTABLE)
529 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
530             col  = (col - 1)/bs;
531 #else
532             col = (baij->colmap[in[j]] - 1)/bs;
533 #endif
534             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
535               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
536               col =  in[j];
537             }
538           }
539           else col = in[j];
540           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
541         }
542       }
543     } else {
544       if (!baij->donotstash) {
545         if (roworiented) {
546           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
547         } else {
548           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
549         }
550       }
551     }
552   }
553   PetscFunctionReturn(0);
554 }
555 
556 #define HASH_KEY 0.6180339887
557 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
558 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
559 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
560 #undef __FUNC__
561 #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ_HT_MatScalar"></a>*/"MatSetValues_MPIBAIJ_HT_MatScalar"
562 int MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
563 {
564   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
565   int         ierr,i,j,row,col;
566   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
567   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
568   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
569   PetscReal   tmp;
570   MatScalar   **HD = baij->hd,value;
571 #if defined(PETSC_USE_BOPT_g)
572   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
573 #endif
574 
575   PetscFunctionBegin;
576 
577   for (i=0; i<m; i++) {
578 #if defined(PETSC_USE_BOPT_g)
579     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
580     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
581 #endif
582       row = im[i];
583     if (row >= rstart_orig && row < rend_orig) {
584       for (j=0; j<n; j++) {
585         col = in[j];
586         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
587         /* Look up into the Hash Table */
588         key = (row/bs)*Nbs+(col/bs)+1;
589         h1  = HASH(size,key,tmp);
590 
591 
592         idx = h1;
593 #if defined(PETSC_USE_BOPT_g)
594         insert_ct++;
595         total_ct++;
596         if (HT[idx] != key) {
597           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
598           if (idx == size) {
599             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
600             if (idx == h1) {
601               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table");
602             }
603           }
604         }
605 #else
606         if (HT[idx] != key) {
607           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
608           if (idx == size) {
609             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
610             if (idx == h1) {
611               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table");
612             }
613           }
614         }
615 #endif
616         /* A HASH table entry is found, so insert the values at the correct address */
617         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
618         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
619       }
620     } else {
621       if (!baij->donotstash) {
622         if (roworiented) {
623           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
624         } else {
625           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
626         }
627       }
628     }
629   }
630 #if defined(PETSC_USE_BOPT_g)
631   baij->ht_total_ct = total_ct;
632   baij->ht_insert_ct = insert_ct;
633 #endif
634   PetscFunctionReturn(0);
635 }
636 
637 #undef __FUNC__
638 #define __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPIBAIJ_HT_MatScalar"
639 int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
640 {
641   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
642   int         ierr,i,j,ii,jj,row,col;
643   int         roworiented = baij->roworiented,rstart=baij->rstart ;
644   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
645   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
646   PetscReal   tmp;
647   MatScalar   **HD = baij->hd,*baij_a;
648   MatScalar   *v_t,*value;
649 #if defined(PETSC_USE_BOPT_g)
650   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
651 #endif
652 
653   PetscFunctionBegin;
654 
655   if (roworiented) {
656     stepval = (n-1)*bs;
657   } else {
658     stepval = (m-1)*bs;
659   }
660   for (i=0; i<m; i++) {
661 #if defined(PETSC_USE_BOPT_g)
662     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
663     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
664 #endif
665     row   = im[i];
666     v_t   = v + i*bs2;
667     if (row >= rstart && row < rend) {
668       for (j=0; j<n; j++) {
669         col = in[j];
670 
671         /* Look up into the Hash Table */
672         key = row*Nbs+col+1;
673         h1  = HASH(size,key,tmp);
674 
675         idx = h1;
676 #if defined(PETSC_USE_BOPT_g)
677         total_ct++;
678         insert_ct++;
679        if (HT[idx] != key) {
680           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
681           if (idx == size) {
682             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
683             if (idx == h1) {
684               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table");
685             }
686           }
687         }
688 #else
689         if (HT[idx] != key) {
690           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
691           if (idx == size) {
692             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
693             if (idx == h1) {
694               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table");
695             }
696           }
697         }
698 #endif
699         baij_a = HD[idx];
700         if (roworiented) {
701           /*value = v + i*(stepval+bs)*bs + j*bs;*/
702           /* value = v + (i*(stepval+bs)+j)*bs; */
703           value = v_t;
704           v_t  += bs;
705           if (addv == ADD_VALUES) {
706             for (ii=0; ii<bs; ii++,value+=stepval) {
707               for (jj=ii; jj<bs2; jj+=bs) {
708                 baij_a[jj]  += *value++;
709               }
710             }
711           } else {
712             for (ii=0; ii<bs; ii++,value+=stepval) {
713               for (jj=ii; jj<bs2; jj+=bs) {
714                 baij_a[jj]  = *value++;
715               }
716             }
717           }
718         } else {
719           value = v + j*(stepval+bs)*bs + i*bs;
720           if (addv == ADD_VALUES) {
721             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
722               for (jj=0; jj<bs; jj++) {
723                 baij_a[jj]  += *value++;
724               }
725             }
726           } else {
727             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
728               for (jj=0; jj<bs; jj++) {
729                 baij_a[jj]  = *value++;
730               }
731             }
732           }
733         }
734       }
735     } else {
736       if (!baij->donotstash) {
737         if (roworiented) {
738           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
739         } else {
740           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
741         }
742       }
743     }
744   }
745 #if defined(PETSC_USE_BOPT_g)
746   baij->ht_total_ct = total_ct;
747   baij->ht_insert_ct = insert_ct;
748 #endif
749   PetscFunctionReturn(0);
750 }
751 
752 #undef __FUNC__
753 #define __FUNC__ /*<a name=""></a>*/"MatGetValues_MPIBAIJ"
754 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
755 {
756   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
757   int        bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
758   int        bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
759 
760   PetscFunctionBegin;
761   for (i=0; i<m; i++) {
762     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
763     if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
764     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
765       row = idxm[i] - bsrstart;
766       for (j=0; j<n; j++) {
767         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
768         if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
769         if (idxn[j] >= bscstart && idxn[j] < bscend){
770           col = idxn[j] - bscstart;
771           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
772         } else {
773           if (!baij->colmap) {
774             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
775           }
776 #if defined (PETSC_USE_CTABLE)
777           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
778           data --;
779 #else
780           data = baij->colmap[idxn[j]/bs]-1;
781 #endif
782           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
783           else {
784             col  = data + idxn[j]%bs;
785             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
786           }
787         }
788       }
789     } else {
790       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
791     }
792   }
793  PetscFunctionReturn(0);
794 }
795 
796 #undef __FUNC__
797 #define __FUNC__ /*<a name=""></a>*/"MatNorm_MPIBAIJ"
798 int MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *norm)
799 {
800   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
801   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
802   int        ierr,i,bs2=baij->bs2;
803   PetscReal  sum = 0.0;
804   MatScalar  *v;
805 
806   PetscFunctionBegin;
807   if (baij->size == 1) {
808     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
809   } else {
810     if (type == NORM_FROBENIUS) {
811       v = amat->a;
812       for (i=0; i<amat->nz*bs2; i++) {
813 #if defined(PETSC_USE_COMPLEX)
814         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
815 #else
816         sum += (*v)*(*v); v++;
817 #endif
818       }
819       v = bmat->a;
820       for (i=0; i<bmat->nz*bs2; i++) {
821 #if defined(PETSC_USE_COMPLEX)
822         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
823 #else
824         sum += (*v)*(*v); v++;
825 #endif
826       }
827       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
828       *norm = sqrt(*norm);
829     } else {
830       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
831     }
832   }
833   PetscFunctionReturn(0);
834 }
835 
836 
837 /*
838   Creates the hash table, and sets the table
839   This table is created only once.
840   If new entried need to be added to the matrix
841   then the hash table has to be destroyed and
842   recreated.
843 */
844 #undef __FUNC__
845 #define __FUNC__ /*<a name=""></a>*/"MatCreateHashTable_MPIBAIJ_Private"
846 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
847 {
848   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
849   Mat         A = baij->A,B=baij->B;
850   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
851   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
852   int         size,bs2=baij->bs2,rstart=baij->rstart,ierr;
853   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
854   int         *HT,key;
855   MatScalar   **HD;
856   PetscReal   tmp;
857 #if defined(PETSC_USE_BOPT_g)
858   int         ct=0,max=0;
859 #endif
860 
861   PetscFunctionBegin;
862   baij->ht_size=(int)(factor*nz);
863   size = baij->ht_size;
864 
865   if (baij->ht) {
866     PetscFunctionReturn(0);
867   }
868 
869   /* Allocate Memory for Hash Table */
870   baij->hd = (MatScalar**)PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1);CHKPTRQ(baij->hd);
871   baij->ht = (int*)(baij->hd + size);
872   HD = baij->hd;
873   HT = baij->ht;
874 
875 
876   ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr);
877 
878 
879   /* Loop Over A */
880   for (i=0; i<a->mbs; i++) {
881     for (j=ai[i]; j<ai[i+1]; j++) {
882       row = i+rstart;
883       col = aj[j]+cstart;
884 
885       key = row*Nbs + col + 1;
886       h1  = HASH(size,key,tmp);
887       for (k=0; k<size; k++){
888         if (HT[(h1+k)%size] == 0.0) {
889           HT[(h1+k)%size] = key;
890           HD[(h1+k)%size] = a->a + j*bs2;
891           break;
892 #if defined(PETSC_USE_BOPT_g)
893         } else {
894           ct++;
895 #endif
896         }
897       }
898 #if defined(PETSC_USE_BOPT_g)
899       if (k> max) max = k;
900 #endif
901     }
902   }
903   /* Loop Over B */
904   for (i=0; i<b->mbs; i++) {
905     for (j=bi[i]; j<bi[i+1]; j++) {
906       row = i+rstart;
907       col = garray[bj[j]];
908       key = row*Nbs + col + 1;
909       h1  = HASH(size,key,tmp);
910       for (k=0; k<size; k++){
911         if (HT[(h1+k)%size] == 0.0) {
912           HT[(h1+k)%size] = key;
913           HD[(h1+k)%size] = b->a + j*bs2;
914           break;
915 #if defined(PETSC_USE_BOPT_g)
916         } else {
917           ct++;
918 #endif
919         }
920       }
921 #if defined(PETSC_USE_BOPT_g)
922       if (k> max) max = k;
923 #endif
924     }
925   }
926 
927   /* Print Summary */
928 #if defined(PETSC_USE_BOPT_g)
929   for (i=0,j=0; i<size; i++) {
930     if (HT[i]) {j++;}
931   }
932   PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max);
933 #endif
934   PetscFunctionReturn(0);
935 }
936 
937 #undef __FUNC__
938 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyBegin_MPIBAIJ"
939 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
940 {
941   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
942   int         ierr,nstash,reallocs;
943   InsertMode  addv;
944 
945   PetscFunctionBegin;
946   if (baij->donotstash) {
947     PetscFunctionReturn(0);
948   }
949 
950   /* make sure all processors are either in INSERTMODE or ADDMODE */
951   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
952   if (addv == (ADD_VALUES|INSERT_VALUES)) {
953     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
954   }
955   mat->insertmode = addv; /* in case this processor had no cache */
956 
957   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
958   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
959   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
960   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
961   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
962   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
963   PetscFunctionReturn(0);
964 }
965 
966 #undef __FUNC__
967 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyEnd_MPIBAIJ"
968 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
969 {
970   Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data;
971   Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
972   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
973   int         *row,*col,other_disassembled;
974   PetscTruth  r1,r2,r3;
975   MatScalar   *val;
976   InsertMode  addv = mat->insertmode;
977 
978   PetscFunctionBegin;
979   if (!baij->donotstash) {
980     while (1) {
981       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
982       if (!flg) break;
983 
984       for (i=0; i<n;) {
985         /* Now identify the consecutive vals belonging to the same row */
986         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
987         if (j < n) ncols = j-i;
988         else       ncols = n-i;
989         /* Now assemble all these values with a single function call */
990         ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
991         i = j;
992       }
993     }
994     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
995     /* Now process the block-stash. Since the values are stashed column-oriented,
996        set the roworiented flag to column oriented, and after MatSetValues()
997        restore the original flags */
998     r1 = baij->roworiented;
999     r2 = a->roworiented;
1000     r3 = b->roworiented;
1001     baij->roworiented = PETSC_FALSE;
1002     a->roworiented    = PETSC_FALSE;
1003     b->roworiented    = PETSC_FALSE;
1004     while (1) {
1005       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1006       if (!flg) break;
1007 
1008       for (i=0; i<n;) {
1009         /* Now identify the consecutive vals belonging to the same row */
1010         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1011         if (j < n) ncols = j-i;
1012         else       ncols = n-i;
1013         ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
1014         i = j;
1015       }
1016     }
1017     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
1018     baij->roworiented = r1;
1019     a->roworiented    = r2;
1020     b->roworiented    = r3;
1021   }
1022 
1023   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
1024   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
1025 
1026   /* determine if any processor has disassembled, if so we must
1027      also disassemble ourselfs, in order that we may reassemble. */
1028   /*
1029      if nonzero structure of submatrix B cannot change then we know that
1030      no processor disassembled thus we can skip this stuff
1031   */
1032   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
1033     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
1034     if (mat->was_assembled && !other_disassembled) {
1035       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
1036     }
1037   }
1038 
1039   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1040     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
1041   }
1042   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1043   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1044 
1045 #if defined(PETSC_USE_BOPT_g)
1046   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1047     PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((double)baij->ht_total_ct)/baij->ht_insert_ct);
1048     baij->ht_total_ct  = 0;
1049     baij->ht_insert_ct = 0;
1050   }
1051 #endif
1052   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1053     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1054     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1055     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1056   }
1057 
1058   if (baij->rowvalues) {
1059     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1060     baij->rowvalues = 0;
1061   }
1062   PetscFunctionReturn(0);
1063 }
1064 
1065 #undef __FUNC__
1066 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIBAIJ_ASCIIorDraworSocket"
1067 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
1068 {
1069   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ*)mat->data;
1070   int          ierr,format,bs = baij->bs,size = baij->size,rank = baij->rank;
1071   PetscTruth   isascii,isdraw;
1072   Viewer       sviewer;
1073 
1074   PetscFunctionBegin;
1075   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
1076   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
1077   if (isascii) {
1078     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1079     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
1080       MatInfo info;
1081       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
1082       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1083       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
1084               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
1085               baij->bs,(int)info.memory);CHKERRQ(ierr);
1086       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1087       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1088       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1089       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1090       ierr = ViewerFlush(viewer);CHKERRQ(ierr);
1091       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1092       PetscFunctionReturn(0);
1093     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
1094       ierr = ViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
1095       PetscFunctionReturn(0);
1096     }
1097   }
1098 
1099   if (isdraw) {
1100     Draw       draw;
1101     PetscTruth isnull;
1102     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1103     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1104   }
1105 
1106   if (size == 1) {
1107     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1108   } else {
1109     /* assemble the entire matrix onto first processor. */
1110     Mat         A;
1111     Mat_SeqBAIJ *Aloc;
1112     int         M = baij->M,N = baij->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1113     MatScalar   *a;
1114 
1115     if (!rank) {
1116       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1117     } else {
1118       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1119     }
1120     PLogObjectParent(mat,A);
1121 
1122     /* copy over the A part */
1123     Aloc  = (Mat_SeqBAIJ*)baij->A->data;
1124     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
1125     rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
1126 
1127     for (i=0; i<mbs; i++) {
1128       rvals[0] = bs*(baij->rstart + i);
1129       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1130       for (j=ai[i]; j<ai[i+1]; j++) {
1131         col = (baij->cstart+aj[j])*bs;
1132         for (k=0; k<bs; k++) {
1133           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1134           col++; a += bs;
1135         }
1136       }
1137     }
1138     /* copy over the B part */
1139     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1140     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1141     for (i=0; i<mbs; i++) {
1142       rvals[0] = bs*(baij->rstart + i);
1143       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1144       for (j=ai[i]; j<ai[i+1]; j++) {
1145         col = baij->garray[aj[j]]*bs;
1146         for (k=0; k<bs; k++) {
1147           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1148           col++; a += bs;
1149         }
1150       }
1151     }
1152     ierr = PetscFree(rvals);CHKERRQ(ierr);
1153     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1154     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1155     /*
1156        Everyone has to call to draw the matrix since the graphics waits are
1157        synchronized across all processors that share the Draw object
1158     */
1159     ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1160     if (!rank) {
1161       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1162     }
1163     ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1164     ierr = MatDestroy(A);CHKERRQ(ierr);
1165   }
1166   PetscFunctionReturn(0);
1167 }
1168 
1169 #undef __FUNC__
1170 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIBAIJ"
1171 int MatView_MPIBAIJ(Mat mat,Viewer viewer)
1172 {
1173   int        ierr;
1174   PetscTruth isascii,isdraw,issocket,isbinary;
1175 
1176   PetscFunctionBegin;
1177   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
1178   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
1179   ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr);
1180   ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
1181   if (isascii || isdraw || issocket || isbinary) {
1182     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1183   } else {
1184     SETERRQ1(1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1185   }
1186   PetscFunctionReturn(0);
1187 }
1188 
1189 #undef __FUNC__
1190 #define __FUNC__ /*<a name=""></a>*/"MatDestroy_MPIBAIJ"
1191 int MatDestroy_MPIBAIJ(Mat mat)
1192 {
1193   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1194   int         ierr;
1195 
1196   PetscFunctionBegin;
1197 
1198   if (mat->mapping) {
1199     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
1200   }
1201   if (mat->bmapping) {
1202     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
1203   }
1204   if (mat->rmap) {
1205     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
1206   }
1207   if (mat->cmap) {
1208     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
1209   }
1210 #if defined(PETSC_USE_LOG)
1211   PLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",baij->M,baij->N);
1212 #endif
1213 
1214   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1215   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1216 
1217   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1218   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1219   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1220 #if defined (PETSC_USE_CTABLE)
1221   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1222 #else
1223   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1224 #endif
1225   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1226   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1227   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1228   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1229   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1230   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
1231 #if defined(PETSC_USE_MAT_SINGLE)
1232   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
1233 #endif
1234   ierr = PetscFree(baij);CHKERRQ(ierr);
1235   PLogObjectDestroy(mat);
1236   PetscHeaderDestroy(mat);
1237   PetscFunctionReturn(0);
1238 }
1239 
1240 #undef __FUNC__
1241 #define __FUNC__ /*<a name=""></a>*/"MatMult_MPIBAIJ"
1242 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1243 {
1244   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1245   int         ierr,nt;
1246 
1247   PetscFunctionBegin;
1248   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1249   if (nt != a->n) {
1250     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1251   }
1252   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1253   if (nt != a->m) {
1254     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1255   }
1256   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1257   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1258   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1259   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1260   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1261   PetscFunctionReturn(0);
1262 }
1263 
1264 #undef __FUNC__
1265 #define __FUNC__ /*<a name=""></a>*/"MatMultAdd_MPIBAIJ"
1266 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1267 {
1268   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1269   int        ierr;
1270 
1271   PetscFunctionBegin;
1272   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1273   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1274   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1275   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1276   PetscFunctionReturn(0);
1277 }
1278 
1279 #undef __FUNC__
1280 #define __FUNC__ /*<a name=""></a>*/"MatMultTranspose_MPIBAIJ"
1281 int MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1282 {
1283   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1284   int         ierr;
1285 
1286   PetscFunctionBegin;
1287   /* do nondiagonal part */
1288   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1289   /* send it on its way */
1290   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1291   /* do local part */
1292   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1293   /* receive remote parts: note this assumes the values are not actually */
1294   /* inserted in yy until the next line, which is true for my implementation*/
1295   /* but is not perhaps always true. */
1296   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1297   PetscFunctionReturn(0);
1298 }
1299 
1300 #undef __FUNC__
1301 #define __FUNC__ /*<a name=""></a>*/"MatMultTransposeAdd_MPIBAIJ"
1302 int MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1303 {
1304   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1305   int         ierr;
1306 
1307   PetscFunctionBegin;
1308   /* do nondiagonal part */
1309   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1310   /* send it on its way */
1311   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1312   /* do local part */
1313   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1314   /* receive remote parts: note this assumes the values are not actually */
1315   /* inserted in yy until the next line, which is true for my implementation*/
1316   /* but is not perhaps always true. */
1317   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1318   PetscFunctionReturn(0);
1319 }
1320 
1321 /*
1322   This only works correctly for square matrices where the subblock A->A is the
1323    diagonal block
1324 */
1325 #undef __FUNC__
1326 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonal_MPIBAIJ"
1327 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1328 {
1329   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1330   int         ierr;
1331 
1332   PetscFunctionBegin;
1333   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1334   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1335   PetscFunctionReturn(0);
1336 }
1337 
1338 #undef __FUNC__
1339 #define __FUNC__ /*<a name=""></a>*/"MatScale_MPIBAIJ"
1340 int MatScale_MPIBAIJ(Scalar *aa,Mat A)
1341 {
1342   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1343   int         ierr;
1344 
1345   PetscFunctionBegin;
1346   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1347   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1348   PetscFunctionReturn(0);
1349 }
1350 
1351 #undef __FUNC__
1352 #define __FUNC__ /*<a name=""></a>*/"MatGetOwnershipRange_MPIBAIJ"
1353 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
1354 {
1355   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1356 
1357   PetscFunctionBegin;
1358   if (m) *m = mat->rstart*mat->bs;
1359   if (n) *n = mat->rend*mat->bs;
1360   PetscFunctionReturn(0);
1361 }
1362 
1363 #undef __FUNC__
1364 #define __FUNC__ /*<a name=""></a>*/"MatGetRow_MPIBAIJ"
1365 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1366 {
1367   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1368   Scalar     *vworkA,*vworkB,**pvA,**pvB,*v_p;
1369   int        bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1370   int        nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1371   int        *cmap,*idx_p,cstart = mat->cstart;
1372 
1373   PetscFunctionBegin;
1374   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1375   mat->getrowactive = PETSC_TRUE;
1376 
1377   if (!mat->rowvalues && (idx || v)) {
1378     /*
1379         allocate enough space to hold information from the longest row.
1380     */
1381     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1382     int     max = 1,mbs = mat->mbs,tmp;
1383     for (i=0; i<mbs; i++) {
1384       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1385       if (max < tmp) { max = tmp; }
1386     }
1387     mat->rowvalues = (Scalar*)PetscMalloc(max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1388     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1389   }
1390 
1391   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1392   lrow = row - brstart;
1393 
1394   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1395   if (!v)   {pvA = 0; pvB = 0;}
1396   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1397   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1398   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1399   nztot = nzA + nzB;
1400 
1401   cmap  = mat->garray;
1402   if (v  || idx) {
1403     if (nztot) {
1404       /* Sort by increasing column numbers, assuming A and B already sorted */
1405       int imark = -1;
1406       if (v) {
1407         *v = v_p = mat->rowvalues;
1408         for (i=0; i<nzB; i++) {
1409           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1410           else break;
1411         }
1412         imark = i;
1413         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1414         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1415       }
1416       if (idx) {
1417         *idx = idx_p = mat->rowindices;
1418         if (imark > -1) {
1419           for (i=0; i<imark; i++) {
1420             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1421           }
1422         } else {
1423           for (i=0; i<nzB; i++) {
1424             if (cmap[cworkB[i]/bs] < cstart)
1425               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1426             else break;
1427           }
1428           imark = i;
1429         }
1430         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1431         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1432       }
1433     } else {
1434       if (idx) *idx = 0;
1435       if (v)   *v   = 0;
1436     }
1437   }
1438   *nz = nztot;
1439   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1440   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1441   PetscFunctionReturn(0);
1442 }
1443 
1444 #undef __FUNC__
1445 #define __FUNC__ /*<a name=""></a>*/"MatRestoreRow_MPIBAIJ"
1446 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1447 {
1448   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1449 
1450   PetscFunctionBegin;
1451   if (baij->getrowactive == PETSC_FALSE) {
1452     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1453   }
1454   baij->getrowactive = PETSC_FALSE;
1455   PetscFunctionReturn(0);
1456 }
1457 
1458 #undef __FUNC__
1459 #define __FUNC__ /*<a name=""></a>*/"MatGetBlockSize_MPIBAIJ"
1460 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
1461 {
1462   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1463 
1464   PetscFunctionBegin;
1465   *bs = baij->bs;
1466   PetscFunctionReturn(0);
1467 }
1468 
1469 #undef __FUNC__
1470 #define __FUNC__ /*<a name=""></a>*/"MatZeroEntries_MPIBAIJ"
1471 int MatZeroEntries_MPIBAIJ(Mat A)
1472 {
1473   Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data;
1474   int         ierr;
1475 
1476   PetscFunctionBegin;
1477   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1478   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1479   PetscFunctionReturn(0);
1480 }
1481 
1482 #undef __FUNC__
1483 #define __FUNC__ /*<a name=""></a>*/"MatGetInfo_MPIBAIJ"
1484 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1485 {
1486   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data;
1487   Mat         A = a->A,B = a->B;
1488   int         ierr;
1489   PetscReal   isend[5],irecv[5];
1490 
1491   PetscFunctionBegin;
1492   info->block_size     = (double)a->bs;
1493   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1494   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1495   isend[3] = info->memory;  isend[4] = info->mallocs;
1496   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1497   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1498   isend[3] += info->memory;  isend[4] += info->mallocs;
1499   if (flag == MAT_LOCAL) {
1500     info->nz_used      = isend[0];
1501     info->nz_allocated = isend[1];
1502     info->nz_unneeded  = isend[2];
1503     info->memory       = isend[3];
1504     info->mallocs      = isend[4];
1505   } else if (flag == MAT_GLOBAL_MAX) {
1506     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1507     info->nz_used      = irecv[0];
1508     info->nz_allocated = irecv[1];
1509     info->nz_unneeded  = irecv[2];
1510     info->memory       = irecv[3];
1511     info->mallocs      = irecv[4];
1512   } else if (flag == MAT_GLOBAL_SUM) {
1513     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1514     info->nz_used      = irecv[0];
1515     info->nz_allocated = irecv[1];
1516     info->nz_unneeded  = irecv[2];
1517     info->memory       = irecv[3];
1518     info->mallocs      = irecv[4];
1519   } else {
1520     SETERRQ1(1,"Unknown MatInfoType argument %d",flag);
1521   }
1522   info->rows_global       = (double)a->M;
1523   info->columns_global    = (double)a->N;
1524   info->rows_local        = (double)a->m;
1525   info->columns_local     = (double)a->N;
1526   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1527   info->fill_ratio_needed = 0;
1528   info->factor_mallocs    = 0;
1529   PetscFunctionReturn(0);
1530 }
1531 
1532 #undef __FUNC__
1533 #define __FUNC__ /*<a name=""></a>*/"MatSetOption_MPIBAIJ"
1534 int MatSetOption_MPIBAIJ(Mat A,MatOption op)
1535 {
1536   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1537   int         ierr;
1538 
1539   PetscFunctionBegin;
1540   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1541       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1542       op == MAT_COLUMNS_UNSORTED ||
1543       op == MAT_COLUMNS_SORTED ||
1544       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1545       op == MAT_KEEP_ZEROED_ROWS ||
1546       op == MAT_NEW_NONZERO_LOCATION_ERR) {
1547         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1548         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1549   } else if (op == MAT_ROW_ORIENTED) {
1550         a->roworiented = PETSC_TRUE;
1551         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1552         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1553   } else if (op == MAT_ROWS_SORTED ||
1554              op == MAT_ROWS_UNSORTED ||
1555              op == MAT_SYMMETRIC ||
1556              op == MAT_STRUCTURALLY_SYMMETRIC ||
1557              op == MAT_YES_NEW_DIAGONALS ||
1558              op == MAT_USE_HASH_TABLE) {
1559     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1560   } else if (op == MAT_COLUMN_ORIENTED) {
1561     a->roworiented = PETSC_FALSE;
1562     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1563     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1564   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1565     a->donotstash = PETSC_TRUE;
1566   } else if (op == MAT_NO_NEW_DIAGONALS) {
1567     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1568   } else if (op == MAT_USE_HASH_TABLE) {
1569     a->ht_flag = PETSC_TRUE;
1570   } else {
1571     SETERRQ(PETSC_ERR_SUP,"unknown option");
1572   }
1573   PetscFunctionReturn(0);
1574 }
1575 
1576 #undef __FUNC__
1577 #define __FUNC__ /*<a name=""></a>*/"MatTranspose_MPIBAIJ("
1578 int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1579 {
1580   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data;
1581   Mat_SeqBAIJ *Aloc;
1582   Mat         B;
1583   int         ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col;
1584   int         bs=baij->bs,mbs=baij->mbs;
1585   MatScalar   *a;
1586 
1587   PetscFunctionBegin;
1588   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1589   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1590 
1591   /* copy over the A part */
1592   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1593   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1594   rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
1595 
1596   for (i=0; i<mbs; i++) {
1597     rvals[0] = bs*(baij->rstart + i);
1598     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1599     for (j=ai[i]; j<ai[i+1]; j++) {
1600       col = (baij->cstart+aj[j])*bs;
1601       for (k=0; k<bs; k++) {
1602         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1603         col++; a += bs;
1604       }
1605     }
1606   }
1607   /* copy over the B part */
1608   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1609   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1610   for (i=0; i<mbs; i++) {
1611     rvals[0] = bs*(baij->rstart + i);
1612     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1613     for (j=ai[i]; j<ai[i+1]; j++) {
1614       col = baij->garray[aj[j]]*bs;
1615       for (k=0; k<bs; k++) {
1616         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1617         col++; a += bs;
1618       }
1619     }
1620   }
1621   ierr = PetscFree(rvals);CHKERRQ(ierr);
1622   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1623   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1624 
1625   if (matout) {
1626     *matout = B;
1627   } else {
1628     PetscOps *Abops;
1629     MatOps   Aops;
1630 
1631     /* This isn't really an in-place transpose .... but free data structures from baij */
1632     ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1633     ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1634     ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1635 #if defined (PETSC_USE_CTABLE)
1636     if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1637 #else
1638     if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1639 #endif
1640     if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1641     if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1642     if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1643     ierr = PetscFree(baij);CHKERRQ(ierr);
1644 
1645     /*
1646        This is horrible, horrible code. We need to keep the
1647       A pointers for the bops and ops but copy everything
1648       else from C.
1649     */
1650     Abops   = A->bops;
1651     Aops    = A->ops;
1652     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
1653     A->bops = Abops;
1654     A->ops  = Aops;
1655 
1656     PetscHeaderDestroy(B);
1657   }
1658   PetscFunctionReturn(0);
1659 }
1660 
1661 #undef __FUNC__
1662 #define __FUNC__ /*<a name=""></a>*/"MatDiagonalScale_MPIBAIJ"
1663 int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1664 {
1665   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1666   Mat         a = baij->A,b = baij->B;
1667   int         ierr,s1,s2,s3;
1668 
1669   PetscFunctionBegin;
1670   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1671   if (rr) {
1672     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1673     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1674     /* Overlap communication with computation. */
1675     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1676   }
1677   if (ll) {
1678     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1679     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1680     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1681   }
1682   /* scale  the diagonal block */
1683   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1684 
1685   if (rr) {
1686     /* Do a scatter end and then right scale the off-diagonal block */
1687     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1688     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1689   }
1690 
1691   PetscFunctionReturn(0);
1692 }
1693 
1694 #undef __FUNC__
1695 #define __FUNC__ /*<a name=""></a>*/"MatZeroRows_MPIBAIJ"
1696 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
1697 {
1698   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1699   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
1700   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
1701   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1702   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1703   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1704   MPI_Comm       comm = A->comm;
1705   MPI_Request    *send_waits,*recv_waits;
1706   MPI_Status     recv_status,*send_status;
1707   IS             istmp;
1708 
1709   PetscFunctionBegin;
1710   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
1711   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1712 
1713   /*  first count number of contributors to each processor */
1714   nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs);
1715   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
1716   procs  = nprocs + size;
1717   owner  = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
1718   for (i=0; i<N; i++) {
1719     idx   = rows[i];
1720     found = 0;
1721     for (j=0; j<size; j++) {
1722       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1723         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
1724       }
1725     }
1726     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1727   }
1728   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
1729 
1730   /* inform other processors of number of messages and max length*/
1731   work   = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work);
1732   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
1733   nmax   = work[rank];
1734   nrecvs = work[size+rank];
1735   ierr = PetscFree(work);CHKERRQ(ierr);
1736 
1737   /* post receives:   */
1738   rvalues = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
1739   recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
1740   for (i=0; i<nrecvs; i++) {
1741     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1742   }
1743 
1744   /* do sends:
1745      1) starts[i] gives the starting index in svalues for stuff going to
1746      the ith processor
1747   */
1748   svalues    = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues);
1749   send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
1750   starts     = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts);
1751   starts[0]  = 0;
1752   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1753   for (i=0; i<N; i++) {
1754     svalues[starts[owner[i]]++] = rows[i];
1755   }
1756   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
1757 
1758   starts[0] = 0;
1759   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1760   count = 0;
1761   for (i=0; i<size; i++) {
1762     if (procs[i]) {
1763       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1764     }
1765   }
1766   ierr = PetscFree(starts);CHKERRQ(ierr);
1767 
1768   base = owners[rank]*bs;
1769 
1770   /*  wait on receives */
1771   lens   = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens);
1772   source = lens + nrecvs;
1773   count  = nrecvs; slen = 0;
1774   while (count) {
1775     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1776     /* unpack receives into our local space */
1777     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1778     source[imdex]  = recv_status.MPI_SOURCE;
1779     lens[imdex]    = n;
1780     slen          += n;
1781     count--;
1782   }
1783   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1784 
1785   /* move the data into the send scatter */
1786   lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows);
1787   count = 0;
1788   for (i=0; i<nrecvs; i++) {
1789     values = rvalues + i*nmax;
1790     for (j=0; j<lens[i]; j++) {
1791       lrows[count++] = values[j] - base;
1792     }
1793   }
1794   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1795   ierr = PetscFree(lens);CHKERRQ(ierr);
1796   ierr = PetscFree(owner);CHKERRQ(ierr);
1797   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1798 
1799   /* actually zap the local rows */
1800   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1801   PLogObjectParent(A,istmp);
1802 
1803   /*
1804         Zero the required rows. If the "diagonal block" of the matrix
1805      is square and the user wishes to set the diagonal we use seperate
1806      code so that MatSetValues() is not called for each diagonal allocating
1807      new memory, thus calling lots of mallocs and slowing things down.
1808 
1809        Contributed by: Mathew Knepley
1810   */
1811   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1812   ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr);
1813   if (diag && (l->A->M == l->A->N)) {
1814     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr);
1815   } else if (diag) {
1816     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1817     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1818       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1819 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1820     }
1821     for (i=0; i<slen; i++) {
1822       row  = lrows[i] + rstart_bs;
1823       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1824     }
1825     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1826     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1827   } else {
1828     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1829   }
1830 
1831   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1832   ierr = PetscFree(lrows);CHKERRQ(ierr);
1833 
1834   /* wait on sends */
1835   if (nsends) {
1836     send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1837     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1838     ierr        = PetscFree(send_status);CHKERRQ(ierr);
1839   }
1840   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1841   ierr = PetscFree(svalues);CHKERRQ(ierr);
1842 
1843   PetscFunctionReturn(0);
1844 }
1845 
1846 #undef __FUNC__
1847 #define __FUNC__ /*<a name=""></a>*/"MatPrintHelp_MPIBAIJ"
1848 int MatPrintHelp_MPIBAIJ(Mat A)
1849 {
1850   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1851   MPI_Comm    comm = A->comm;
1852   static int  called = 0;
1853   int         ierr;
1854 
1855   PetscFunctionBegin;
1856   if (!a->rank) {
1857     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1858   }
1859   if (called) {PetscFunctionReturn(0);} else called = 1;
1860   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1861   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1862   PetscFunctionReturn(0);
1863 }
1864 
1865 #undef __FUNC__
1866 #define __FUNC__ /*<a name=""></a>*/"MatSetUnfactored_MPIBAIJ"
1867 int MatSetUnfactored_MPIBAIJ(Mat A)
1868 {
1869   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1870   int         ierr;
1871 
1872   PetscFunctionBegin;
1873   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1874   PetscFunctionReturn(0);
1875 }
1876 
1877 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1878 
1879 #undef __FUNC__
1880 #define __FUNC__ /*<a name=""></a>*/"MatEqual_MPIBAIJ"
1881 int MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1882 {
1883   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1884   Mat         a,b,c,d;
1885   PetscTruth  flg;
1886   int         ierr;
1887 
1888   PetscFunctionBegin;
1889   if (B->type != MATMPIBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1890   a = matA->A; b = matA->B;
1891   c = matB->A; d = matB->B;
1892 
1893   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1894   if (flg == PETSC_TRUE) {
1895     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1896   }
1897   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1898   PetscFunctionReturn(0);
1899 }
1900 
1901 /* -------------------------------------------------------------------*/
1902 static struct _MatOps MatOps_Values = {
1903   MatSetValues_MPIBAIJ,
1904   MatGetRow_MPIBAIJ,
1905   MatRestoreRow_MPIBAIJ,
1906   MatMult_MPIBAIJ,
1907   MatMultAdd_MPIBAIJ,
1908   MatMultTranspose_MPIBAIJ,
1909   MatMultTransposeAdd_MPIBAIJ,
1910   0,
1911   0,
1912   0,
1913   0,
1914   0,
1915   0,
1916   0,
1917   MatTranspose_MPIBAIJ,
1918   MatGetInfo_MPIBAIJ,
1919   MatEqual_MPIBAIJ,
1920   MatGetDiagonal_MPIBAIJ,
1921   MatDiagonalScale_MPIBAIJ,
1922   MatNorm_MPIBAIJ,
1923   MatAssemblyBegin_MPIBAIJ,
1924   MatAssemblyEnd_MPIBAIJ,
1925   0,
1926   MatSetOption_MPIBAIJ,
1927   MatZeroEntries_MPIBAIJ,
1928   MatZeroRows_MPIBAIJ,
1929   0,
1930   0,
1931   0,
1932   0,
1933   MatGetOwnershipRange_MPIBAIJ,
1934   0,
1935   0,
1936   0,
1937   0,
1938   MatDuplicate_MPIBAIJ,
1939   0,
1940   0,
1941   0,
1942   0,
1943   0,
1944   MatGetSubMatrices_MPIBAIJ,
1945   MatIncreaseOverlap_MPIBAIJ,
1946   MatGetValues_MPIBAIJ,
1947   0,
1948   MatPrintHelp_MPIBAIJ,
1949   MatScale_MPIBAIJ,
1950   0,
1951   0,
1952   0,
1953   MatGetBlockSize_MPIBAIJ,
1954   0,
1955   0,
1956   0,
1957   0,
1958   0,
1959   0,
1960   MatSetUnfactored_MPIBAIJ,
1961   0,
1962   MatSetValuesBlocked_MPIBAIJ,
1963   0,
1964   MatDestroy_MPIBAIJ,
1965   MatView_MPIBAIJ,
1966   MatGetMaps_Petsc,
1967   0,
1968   0,
1969   0,
1970   0,
1971   0,
1972   0,
1973   MatGetRowMax_MPIBAIJ};
1974 
1975 
1976 EXTERN_C_BEGIN
1977 #undef __FUNC__
1978 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonalBlock_MPIBAIJ"
1979 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1980 {
1981   PetscFunctionBegin;
1982   *a      = ((Mat_MPIBAIJ *)A->data)->A;
1983   *iscopy = PETSC_FALSE;
1984   PetscFunctionReturn(0);
1985 }
1986 EXTERN_C_END
1987 
1988 #undef __FUNC__
1989 #define __FUNC__ /*<a name=""></a>*/"MatCreateMPIBAIJ"
1990 /*@C
1991    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
1992    (block compressed row).  For good matrix assembly performance
1993    the user should preallocate the matrix storage by setting the parameters
1994    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1995    performance can be increased by more than a factor of 50.
1996 
1997    Collective on MPI_Comm
1998 
1999    Input Parameters:
2000 +  comm - MPI communicator
2001 .  bs   - size of blockk
2002 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2003            This value should be the same as the local size used in creating the
2004            y vector for the matrix-vector product y = Ax.
2005 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2006            This value should be the same as the local size used in creating the
2007            x vector for the matrix-vector product y = Ax.
2008 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2009 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2010 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2011            submatrix  (same for all local rows)
2012 .  d_nnz - array containing the number of block nonzeros in the various block rows
2013            of the in diagonal portion of the local (possibly different for each block
2014            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2015 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2016            submatrix (same for all local rows).
2017 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2018            off-diagonal portion of the local submatrix (possibly different for
2019            each block row) or PETSC_NULL.
2020 
2021    Output Parameter:
2022 .  A - the matrix
2023 
2024    Options Database Keys:
2025 .   -mat_no_unroll - uses code that does not unroll the loops in the
2026                      block calculations (much slower)
2027 .   -mat_block_size - size of the blocks to use
2028 .   -mat_mpi - use the parallel matrix data structures even on one processor
2029                (defaults to using SeqBAIJ format on one processor)
2030 
2031    Notes:
2032    The user MUST specify either the local or global matrix dimensions
2033    (possibly both).
2034 
2035    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2036    than it must be used on all processors that share the object for that argument.
2037 
2038    Storage Information:
2039    For a square global matrix we define each processor's diagonal portion
2040    to be its local rows and the corresponding columns (a square submatrix);
2041    each processor's off-diagonal portion encompasses the remainder of the
2042    local matrix (a rectangular submatrix).
2043 
2044    The user can specify preallocated storage for the diagonal part of
2045    the local submatrix with either d_nz or d_nnz (not both).  Set
2046    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2047    memory allocation.  Likewise, specify preallocated storage for the
2048    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2049 
2050    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2051    the figure below we depict these three local rows and all columns (0-11).
2052 
2053 .vb
2054            0 1 2 3 4 5 6 7 8 9 10 11
2055           -------------------
2056    row 3  |  o o o d d d o o o o o o
2057    row 4  |  o o o d d d o o o o o o
2058    row 5  |  o o o d d d o o o o o o
2059           -------------------
2060 .ve
2061 
2062    Thus, any entries in the d locations are stored in the d (diagonal)
2063    submatrix, and any entries in the o locations are stored in the
2064    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2065    stored simply in the MATSEQBAIJ format for compressed row storage.
2066 
2067    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2068    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2069    In general, for PDE problems in which most nonzeros are near the diagonal,
2070    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2071    or you will get TERRIBLE performance; see the users' manual chapter on
2072    matrices.
2073 
2074    Level: intermediate
2075 
2076 .keywords: matrix, block, aij, compressed row, sparse, parallel
2077 
2078 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2079 @*/
2080 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2081 {
2082   Mat          B;
2083   Mat_MPIBAIJ  *b;
2084   int          ierr,i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size;
2085   PetscTruth   flag1,flag2,flg;
2086 
2087   PetscFunctionBegin;
2088   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2089 
2090   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2091   if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than -1: value %d",d_nz);
2092   if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than -1: value %d",o_nz);
2093   if (d_nnz) {
2094     for (i=0; i<m/bs; i++) {
2095       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
2096     }
2097   }
2098   if (o_nnz) {
2099     for (i=0; i<m/bs; i++) {
2100       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
2101     }
2102   }
2103 
2104   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2105   ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1);CHKERRQ(ierr);
2106   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr);
2107   if (!flag1 && !flag2 && size == 1) {
2108     if (M == PETSC_DECIDE) M = m;
2109     if (N == PETSC_DECIDE) N = n;
2110     ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A);CHKERRQ(ierr);
2111     PetscFunctionReturn(0);
2112   }
2113 
2114   *A = 0;
2115   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView);
2116   PLogObjectCreate(B);
2117   B->data = (void*)(b = PetscNew(Mat_MPIBAIJ));CHKPTRQ(b);
2118   ierr    = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
2119   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2120   B->mapping    = 0;
2121   B->factor     = 0;
2122   B->assembled  = PETSC_FALSE;
2123 
2124   B->insertmode = NOT_SET_VALUES;
2125   ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr);
2126   ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr);
2127 
2128   if (m == PETSC_DECIDE && (d_nnz || o_nnz)) {
2129     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
2130   }
2131   if (M == PETSC_DECIDE && m == PETSC_DECIDE) {
2132     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"either M or m should be specified");
2133   }
2134   if (N == PETSC_DECIDE && n == PETSC_DECIDE) {
2135     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"either N or n should be specified");
2136   }
2137   if (M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
2138   if (N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
2139 
2140   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
2141     work[0] = m; work[1] = n;
2142     mbs = m/bs; nbs = n/bs;
2143     ierr = MPI_Allreduce(work,sum,2,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2144     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
2145     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
2146   }
2147   if (m == PETSC_DECIDE) {
2148     Mbs = M/bs;
2149     if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,"No of global rows must be divisible by blocksize");
2150     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
2151     m   = mbs*bs;
2152   }
2153   if (n == PETSC_DECIDE) {
2154     Nbs = N/bs;
2155     if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,"No of global cols must be divisible by blocksize");
2156     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
2157     n   = nbs*bs;
2158   }
2159   if (mbs*bs != m || nbs*bs != n) {
2160     SETERRQ(PETSC_ERR_ARG_SIZ,"No of local rows, cols must be divisible by blocksize");
2161   }
2162 
2163   b->m = m; B->m = m;
2164   b->n = n; B->n = n;
2165   b->N = N; B->N = N;
2166   b->M = M; B->M = M;
2167   b->bs  = bs;
2168   b->bs2 = bs*bs;
2169   b->mbs = mbs;
2170   b->nbs = nbs;
2171   b->Mbs = Mbs;
2172   b->Nbs = Nbs;
2173 
2174   /* the information in the maps duplicates the information computed below, eventually
2175      we should remove the duplicate information that is not contained in the maps */
2176   ierr = MapCreateMPI(B->comm,m,M,&B->rmap);CHKERRQ(ierr);
2177   ierr = MapCreateMPI(B->comm,n,N,&B->cmap);CHKERRQ(ierr);
2178 
2179   /* build local table of row and column ownerships */
2180   b->rowners = (int*)PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners);
2181   PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2182   b->cowners    = b->rowners + b->size + 2;
2183   b->rowners_bs = b->cowners + b->size + 2;
2184   ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2185   b->rowners[0]    = 0;
2186   for (i=2; i<=b->size; i++) {
2187     b->rowners[i] += b->rowners[i-1];
2188   }
2189   for (i=0; i<=b->size; i++) {
2190     b->rowners_bs[i] = b->rowners[i]*bs;
2191   }
2192   b->rstart    = b->rowners[b->rank];
2193   b->rend      = b->rowners[b->rank+1];
2194   b->rstart_bs = b->rstart * bs;
2195   b->rend_bs   = b->rend * bs;
2196 
2197   ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2198   b->cowners[0] = 0;
2199   for (i=2; i<=b->size; i++) {
2200     b->cowners[i] += b->cowners[i-1];
2201   }
2202   b->cstart    = b->cowners[b->rank];
2203   b->cend      = b->cowners[b->rank+1];
2204   b->cstart_bs = b->cstart * bs;
2205   b->cend_bs   = b->cend * bs;
2206 
2207 
2208   if (d_nz == PETSC_DEFAULT) d_nz = 5;
2209   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2210   PLogObjectParent(B,b->A);
2211   if (o_nz == PETSC_DEFAULT) o_nz = 0;
2212   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2213   PLogObjectParent(B,b->B);
2214 
2215   /* build cache for off array entries formed */
2216   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
2217   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
2218   b->donotstash  = PETSC_FALSE;
2219   b->colmap      = PETSC_NULL;
2220   b->garray      = PETSC_NULL;
2221   b->roworiented = PETSC_TRUE;
2222 
2223 #if defined(PEYSC_USE_MAT_SINGLE)
2224   /* stuff for MatSetValues_XXX in single precision */
2225   b->lensetvalues     = 0;
2226   b->setvaluescopy    = PETSC_NULL;
2227 #endif
2228 
2229   /* stuff used in block assembly */
2230   b->barray       = 0;
2231 
2232   /* stuff used for matrix vector multiply */
2233   b->lvec         = 0;
2234   b->Mvctx        = 0;
2235 
2236   /* stuff for MatGetRow() */
2237   b->rowindices   = 0;
2238   b->rowvalues    = 0;
2239   b->getrowactive = PETSC_FALSE;
2240 
2241   /* hash table stuff */
2242   b->ht           = 0;
2243   b->hd           = 0;
2244   b->ht_size      = 0;
2245   b->ht_flag      = PETSC_FALSE;
2246   b->ht_fact      = 0;
2247   b->ht_total_ct  = 0;
2248   b->ht_insert_ct = 0;
2249 
2250   *A = B;
2251   ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2252   if (flg) {
2253     double fact = 1.39;
2254     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2255     ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2256     if (fact <= 1.0) fact = 1.39;
2257     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2258     PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2259   }
2260   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2261                                      "MatStoreValues_MPIBAIJ",
2262                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2263   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2264                                      "MatRetrieveValues_MPIBAIJ",
2265                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2266   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2267                                      "MatGetDiagonalBlock_MPIBAIJ",
2268                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2269   PetscFunctionReturn(0);
2270 }
2271 
2272 #undef __FUNC__
2273 #define __FUNC__ /*<a name=""></a>*/"MatDuplicate_MPIBAIJ"
2274 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2275 {
2276   Mat         mat;
2277   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2278   int         ierr,len=0;
2279   PetscTruth  flg;
2280 
2281   PetscFunctionBegin;
2282   *newmat       = 0;
2283   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView);
2284   PLogObjectCreate(mat);
2285   mat->data         = (void*)(a = PetscNew(Mat_MPIBAIJ));CHKPTRQ(a);
2286   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2287   mat->factor       = matin->factor;
2288   mat->assembled    = PETSC_TRUE;
2289   mat->insertmode   = NOT_SET_VALUES;
2290 
2291   a->m = mat->m   = oldmat->m;
2292   a->n = mat->n   = oldmat->n;
2293   a->M = mat->M   = oldmat->M;
2294   a->N = mat->N   = oldmat->N;
2295 
2296   a->bs  = oldmat->bs;
2297   a->bs2 = oldmat->bs2;
2298   a->mbs = oldmat->mbs;
2299   a->nbs = oldmat->nbs;
2300   a->Mbs = oldmat->Mbs;
2301   a->Nbs = oldmat->Nbs;
2302 
2303   a->rstart       = oldmat->rstart;
2304   a->rend         = oldmat->rend;
2305   a->cstart       = oldmat->cstart;
2306   a->cend         = oldmat->cend;
2307   a->size         = oldmat->size;
2308   a->rank         = oldmat->rank;
2309   a->donotstash   = oldmat->donotstash;
2310   a->roworiented  = oldmat->roworiented;
2311   a->rowindices   = 0;
2312   a->rowvalues    = 0;
2313   a->getrowactive = PETSC_FALSE;
2314   a->barray       = 0;
2315   a->rstart_bs    = oldmat->rstart_bs;
2316   a->rend_bs      = oldmat->rend_bs;
2317   a->cstart_bs    = oldmat->cstart_bs;
2318   a->cend_bs      = oldmat->cend_bs;
2319 
2320   /* hash table stuff */
2321   a->ht           = 0;
2322   a->hd           = 0;
2323   a->ht_size      = 0;
2324   a->ht_flag      = oldmat->ht_flag;
2325   a->ht_fact      = oldmat->ht_fact;
2326   a->ht_total_ct  = 0;
2327   a->ht_insert_ct = 0;
2328 
2329 
2330   a->rowners = (int*)PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
2331   PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2332   a->cowners    = a->rowners + a->size + 2;
2333   a->rowners_bs = a->cowners + a->size + 2;
2334   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2335   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2336   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2337   if (oldmat->colmap) {
2338 #if defined (PETSC_USE_CTABLE)
2339   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2340 #else
2341     a->colmap = (int*)PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
2342     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2343     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2344 #endif
2345   } else a->colmap = 0;
2346   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2347     a->garray = (int*)PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray);
2348     PLogObjectMemory(mat,len*sizeof(int));
2349     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2350   } else a->garray = 0;
2351 
2352   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2353   PLogObjectParent(mat,a->lvec);
2354   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2355 
2356   PLogObjectParent(mat,a->Mvctx);
2357   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2358   PLogObjectParent(mat,a->A);
2359   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2360   PLogObjectParent(mat,a->B);
2361   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
2362   ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2363   if (flg) {
2364     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
2365   }
2366   *newmat = mat;
2367   PetscFunctionReturn(0);
2368 }
2369 
2370 #include "petscsys.h"
2371 
2372 #undef __FUNC__
2373 #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPIBAIJ"
2374 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
2375 {
2376   Mat          A;
2377   int          i,nz,ierr,j,rstart,rend,fd;
2378   Scalar       *vals,*buf;
2379   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2380   MPI_Status   status;
2381   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2382   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2383   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2384   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2385   int          dcount,kmax,k,nzcount,tmp;
2386 
2387   PetscFunctionBegin;
2388   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2389 
2390   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2391   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2392   if (!rank) {
2393     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2394     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2395     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2396     if (header[3] < 0) {
2397       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPIBAIJ");
2398     }
2399   }
2400 
2401   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2402   M = header[1]; N = header[2];
2403 
2404   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2405 
2406   /*
2407      This code adds extra rows to make sure the number of rows is
2408      divisible by the blocksize
2409   */
2410   Mbs        = M/bs;
2411   extra_rows = bs - M + bs*(Mbs);
2412   if (extra_rows == bs) extra_rows = 0;
2413   else                  Mbs++;
2414   if (extra_rows &&!rank) {
2415     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
2416   }
2417 
2418   /* determine ownership of all rows */
2419   mbs = Mbs/size + ((Mbs % size) > rank);
2420   m   = mbs * bs;
2421   rowners = (int*)PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners);
2422   browners = rowners + size + 1;
2423   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2424   rowners[0] = 0;
2425   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2426   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2427   rstart = rowners[rank];
2428   rend   = rowners[rank+1];
2429 
2430   /* distribute row lengths to all processors */
2431   locrowlens = (int*)PetscMalloc((rend-rstart)*bs*sizeof(int));CHKPTRQ(locrowlens);
2432   if (!rank) {
2433     rowlengths = (int*)PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
2434     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2435     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2436     sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts);
2437     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2438     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2439     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2440   } else {
2441     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2442   }
2443 
2444   if (!rank) {
2445     /* calculate the number of nonzeros on each processor */
2446     procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz);
2447     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2448     for (i=0; i<size; i++) {
2449       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2450         procsnz[i] += rowlengths[j];
2451       }
2452     }
2453     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2454 
2455     /* determine max buffer needed and allocate it */
2456     maxnz = 0;
2457     for (i=0; i<size; i++) {
2458       maxnz = PetscMax(maxnz,procsnz[i]);
2459     }
2460     cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols);
2461 
2462     /* read in my part of the matrix column indices  */
2463     nz = procsnz[0];
2464     ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf);
2465     mycols = ibuf;
2466     if (size == 1)  nz -= extra_rows;
2467     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2468     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2469 
2470     /* read in every ones (except the last) and ship off */
2471     for (i=1; i<size-1; i++) {
2472       nz   = procsnz[i];
2473       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2474       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2475     }
2476     /* read in the stuff for the last proc */
2477     if (size != 1) {
2478       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2479       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2480       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2481       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2482     }
2483     ierr = PetscFree(cols);CHKERRQ(ierr);
2484   } else {
2485     /* determine buffer space needed for message */
2486     nz = 0;
2487     for (i=0; i<m; i++) {
2488       nz += locrowlens[i];
2489     }
2490     ibuf   = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf);
2491     mycols = ibuf;
2492     /* receive message of column indices*/
2493     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2494     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2495     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2496   }
2497 
2498   /* loop over local rows, determining number of off diagonal entries */
2499   dlens  = (int*)PetscMalloc(2*(rend-rstart+1)*sizeof(int));CHKPTRQ(dlens);
2500   odlens = dlens + (rend-rstart);
2501   mask   = (int*)PetscMalloc(3*Mbs*sizeof(int));CHKPTRQ(mask);
2502   ierr   = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2503   masked1 = mask    + Mbs;
2504   masked2 = masked1 + Mbs;
2505   rowcount = 0; nzcount = 0;
2506   for (i=0; i<mbs; i++) {
2507     dcount  = 0;
2508     odcount = 0;
2509     for (j=0; j<bs; j++) {
2510       kmax = locrowlens[rowcount];
2511       for (k=0; k<kmax; k++) {
2512         tmp = mycols[nzcount++]/bs;
2513         if (!mask[tmp]) {
2514           mask[tmp] = 1;
2515           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2516           else masked1[dcount++] = tmp;
2517         }
2518       }
2519       rowcount++;
2520     }
2521 
2522     dlens[i]  = dcount;
2523     odlens[i] = odcount;
2524 
2525     /* zero out the mask elements we set */
2526     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2527     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2528   }
2529 
2530   /* create our matrix */
2531   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr);
2532   A = *newmat;
2533   MatSetOption(A,MAT_COLUMNS_SORTED);
2534 
2535   if (!rank) {
2536     buf = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(buf);
2537     /* read in my part of the matrix numerical values  */
2538     nz = procsnz[0];
2539     vals = buf;
2540     mycols = ibuf;
2541     if (size == 1)  nz -= extra_rows;
2542     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2543     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2544 
2545     /* insert into matrix */
2546     jj      = rstart*bs;
2547     for (i=0; i<m; i++) {
2548       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2549       mycols += locrowlens[i];
2550       vals   += locrowlens[i];
2551       jj++;
2552     }
2553     /* read in other processors (except the last one) and ship out */
2554     for (i=1; i<size-1; i++) {
2555       nz   = procsnz[i];
2556       vals = buf;
2557       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2558       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2559     }
2560     /* the last proc */
2561     if (size != 1){
2562       nz   = procsnz[i] - extra_rows;
2563       vals = buf;
2564       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2565       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2566       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2567     }
2568     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2569   } else {
2570     /* receive numeric values */
2571     buf = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(buf);
2572 
2573     /* receive message of values*/
2574     vals   = buf;
2575     mycols = ibuf;
2576     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2577     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2578     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2579 
2580     /* insert into matrix */
2581     jj      = rstart*bs;
2582     for (i=0; i<m; i++) {
2583       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2584       mycols += locrowlens[i];
2585       vals   += locrowlens[i];
2586       jj++;
2587     }
2588   }
2589   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2590   ierr = PetscFree(buf);CHKERRQ(ierr);
2591   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2592   ierr = PetscFree(rowners);CHKERRQ(ierr);
2593   ierr = PetscFree(dlens);CHKERRQ(ierr);
2594   ierr = PetscFree(mask);CHKERRQ(ierr);
2595   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2596   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2597   PetscFunctionReturn(0);
2598 }
2599 
2600 #undef __FUNC__
2601 #define __FUNC__ /*<a name=""></a>*/"MatMPIBAIJSetHashTableFactor"
2602 /*@
2603    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2604 
2605    Input Parameters:
2606 .  mat  - the matrix
2607 .  fact - factor
2608 
2609    Collective on Mat
2610 
2611    Level: advanced
2612 
2613   Notes:
2614    This can also be set by the command line option: -mat_use_hash_table fact
2615 
2616 .keywords: matrix, hashtable, factor, HT
2617 
2618 .seealso: MatSetOption()
2619 @*/
2620 int MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2621 {
2622   Mat_MPIBAIJ *baij;
2623 
2624   PetscFunctionBegin;
2625   PetscValidHeaderSpecific(mat,MAT_COOKIE);
2626   if (mat->type != MATMPIBAIJ) {
2627     SETERRQ(PETSC_ERR_ARG_WRONG,"Incorrect matrix type. Use MPIBAIJ only.");
2628   }
2629   baij = (Mat_MPIBAIJ*)mat->data;
2630   baij->ht_fact = fact;
2631   PetscFunctionReturn(0);
2632 }
2633