xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 35d8aa7f675fa4ee5c80aa031e65d3ef307a4891)
1 /*$Id: mpibaij.c,v 1.206 2000/11/10 21:17:14 bsmith Exp bsmith $*/
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4 #include "src/vec/vecimpl.h"
5 
6 EXTERN int MatSetUpMultiply_MPIBAIJ(Mat);
7 EXTERN int DisAssemble_MPIBAIJ(Mat);
8 EXTERN int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
9 EXTERN int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
10 EXTERN int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *);
11 EXTERN int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode);
12 EXTERN int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
13 EXTERN int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
14 EXTERN int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
15 EXTERN int MatPrintHelp_SeqBAIJ(Mat);
16 EXTERN int MatZeroRows_SeqBAIJ(Mat,IS,Scalar*);
17 
18 /*  UGLY, ugly, ugly
19    When MatScalar == Scalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
20    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
21    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
22    converts the entries into single precision and then calls ..._MatScalar() to put them
23    into the single precision data structures.
24 */
25 #if defined(PETSC_USE_MAT_SINGLE)
26 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
27 EXTERN int MatSetValues_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
28 EXTERN int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
29 EXTERN int MatSetValues_MPIBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
30 EXTERN int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
31 #else
32 #define MatSetValuesBlocked_SeqBAIJ_MatScalar      MatSetValuesBlocked_SeqBAIJ
33 #define MatSetValues_MPIBAIJ_MatScalar             MatSetValues_MPIBAIJ
34 #define MatSetValuesBlocked_MPIBAIJ_MatScalar      MatSetValuesBlocked_MPIBAIJ
35 #define MatSetValues_MPIBAIJ_HT_MatScalar          MatSetValues_MPIBAIJ_HT
36 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar   MatSetValuesBlocked_MPIBAIJ_HT
37 #endif
38 
39 #undef __FUNC__
40 #define __FUNC__ /*<a name=""></a>*/"MatGetRowMax_MPIBAIJ"
41 int MatGetRowMax_MPIBAIJ(Mat A,Vec v)
42 {
43   Mat_MPIBAIJ  *a = (Mat_MPIBAIJ*)A->data;
44   int          ierr,i;
45   Scalar       *va,*vb;
46   Vec          vtmp;
47 
48   PetscFunctionBegin;
49 
50   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
51   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
52 
53   ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRA(ierr);
54   ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr);
55   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
56 
57   for (i=0; i<A->m; i++){
58     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i];
59   }
60 
61   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
62   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
63   ierr = VecDestroy(vtmp);CHKERRA(ierr);
64 
65   PetscFunctionReturn(0);
66 }
67 
68 EXTERN_C_BEGIN
69 #undef __FUNC__
70 #define __FUNC__ /*<a name="MatStoreValues_MPIBAIJ"></a>*/"MatStoreValues_MPIBAIJ"
71 int MatStoreValues_MPIBAIJ(Mat mat)
72 {
73   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
74   int         ierr;
75 
76   PetscFunctionBegin;
77   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
78   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
79   PetscFunctionReturn(0);
80 }
81 EXTERN_C_END
82 
83 EXTERN_C_BEGIN
84 #undef __FUNC__
85 #define __FUNC__ /*<a name="MatRetrieveValues_MPIBAIJ"></a>*/"MatRetrieveValues_MPIBAIJ"
86 int MatRetrieveValues_MPIBAIJ(Mat mat)
87 {
88   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
89   int         ierr;
90 
91   PetscFunctionBegin;
92   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
93   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
94   PetscFunctionReturn(0);
95 }
96 EXTERN_C_END
97 
98 /*
99      Local utility routine that creates a mapping from the global column
100    number to the local number in the off-diagonal part of the local
101    storage of the matrix.  This is done in a non scable way since the
102    length of colmap equals the global matrix length.
103 */
104 #undef __FUNC__
105 #define __FUNC__ /*<a name="CreateColmap_MPIBAIJ_Private"></a>*/"CreateColmap_MPIBAIJ_Private"
106 static int CreateColmap_MPIBAIJ_Private(Mat mat)
107 {
108   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
109   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data;
110   int         nbs = B->nbs,i,bs=B->bs,ierr;
111 
112   PetscFunctionBegin;
113 #if defined (PETSC_USE_CTABLE)
114   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
115   for (i=0; i<nbs; i++){
116     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
117   }
118 #else
119   baij->colmap = (int*)PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
120   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
121   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr);
122   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
123 #endif
124   PetscFunctionReturn(0);
125 }
126 
127 #define CHUNKSIZE  10
128 
129 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
130 { \
131  \
132     brow = row/bs;  \
133     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
134     rmax = aimax[brow]; nrow = ailen[brow]; \
135       bcol = col/bs; \
136       ridx = row % bs; cidx = col % bs; \
137       low = 0; high = nrow; \
138       while (high-low > 3) { \
139         t = (low+high)/2; \
140         if (rp[t] > bcol) high = t; \
141         else              low  = t; \
142       } \
143       for (_i=low; _i<high; _i++) { \
144         if (rp[_i] > bcol) break; \
145         if (rp[_i] == bcol) { \
146           bap  = ap +  bs2*_i + bs*cidx + ridx; \
147           if (addv == ADD_VALUES) *bap += value;  \
148           else                    *bap  = value;  \
149           goto a_noinsert; \
150         } \
151       } \
152       if (a->nonew == 1) goto a_noinsert; \
153       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
154       if (nrow >= rmax) { \
155         /* there is no extra room in row, therefore enlarge */ \
156         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
157         MatScalar *new_a; \
158  \
159         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
160  \
161         /* malloc new storage space */ \
162         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
163         new_a   = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \
164         new_j   = (int*)(new_a + bs2*new_nz); \
165         new_i   = new_j + new_nz; \
166  \
167         /* copy over old data into new slots */ \
168         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
169         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
170         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
171         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
172         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
173         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
174         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \
175         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
176                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
177         /* free up old matrix storage */ \
178         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
179         if (!a->singlemalloc) { \
180           ierr = PetscFree(a->i);CHKERRQ(ierr); \
181           ierr = PetscFree(a->j);CHKERRQ(ierr);\
182         } \
183         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
184         a->singlemalloc = PETSC_TRUE; \
185  \
186         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
187         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
188         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
189         a->maxnz += bs2*CHUNKSIZE; \
190         a->reallocs++; \
191         a->nz++; \
192       } \
193       N = nrow++ - 1;  \
194       /* shift up all the later entries in this row */ \
195       for (ii=N; ii>=_i; ii--) { \
196         rp[ii+1] = rp[ii]; \
197         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
198       } \
199       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
200       rp[_i]                      = bcol;  \
201       ap[bs2*_i + bs*cidx + ridx] = value;  \
202       a_noinsert:; \
203     ailen[brow] = nrow; \
204 }
205 
206 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
207 { \
208     brow = row/bs;  \
209     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
210     rmax = bimax[brow]; nrow = bilen[brow]; \
211       bcol = col/bs; \
212       ridx = row % bs; cidx = col % bs; \
213       low = 0; high = nrow; \
214       while (high-low > 3) { \
215         t = (low+high)/2; \
216         if (rp[t] > bcol) high = t; \
217         else              low  = t; \
218       } \
219       for (_i=low; _i<high; _i++) { \
220         if (rp[_i] > bcol) break; \
221         if (rp[_i] == bcol) { \
222           bap  = ap +  bs2*_i + bs*cidx + ridx; \
223           if (addv == ADD_VALUES) *bap += value;  \
224           else                    *bap  = value;  \
225           goto b_noinsert; \
226         } \
227       } \
228       if (b->nonew == 1) goto b_noinsert; \
229       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
230       if (nrow >= rmax) { \
231         /* there is no extra room in row, therefore enlarge */ \
232         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
233         MatScalar *new_a; \
234  \
235         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
236  \
237         /* malloc new storage space */ \
238         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
239         new_a   = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \
240         new_j   = (int*)(new_a + bs2*new_nz); \
241         new_i   = new_j + new_nz; \
242  \
243         /* copy over old data into new slots */ \
244         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
245         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
246         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
247         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
248         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
249         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
250         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
251         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
252                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
253         /* free up old matrix storage */ \
254         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
255         if (!b->singlemalloc) { \
256           ierr = PetscFree(b->i);CHKERRQ(ierr); \
257           ierr = PetscFree(b->j);CHKERRQ(ierr); \
258         } \
259         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
260         b->singlemalloc = PETSC_TRUE; \
261  \
262         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
263         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
264         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
265         b->maxnz += bs2*CHUNKSIZE; \
266         b->reallocs++; \
267         b->nz++; \
268       } \
269       N = nrow++ - 1;  \
270       /* shift up all the later entries in this row */ \
271       for (ii=N; ii>=_i; ii--) { \
272         rp[ii+1] = rp[ii]; \
273         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
274       } \
275       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
276       rp[_i]                      = bcol;  \
277       ap[bs2*_i + bs*cidx + ridx] = value;  \
278       b_noinsert:; \
279     bilen[brow] = nrow; \
280 }
281 
282 #if defined(PETSC_USE_MAT_SINGLE)
283 #undef __FUNC__
284 #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ"></a>*/"MatSetValues_MPIBAIJ"
285 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
286 {
287   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
288   int         ierr,i,N = m*n;
289   MatScalar   *vsingle;
290 
291   PetscFunctionBegin;
292   if (N > b->setvalueslen) {
293     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
294     b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy);
295     b->setvalueslen  = N;
296   }
297   vsingle = b->setvaluescopy;
298 
299   for (i=0; i<N; i++) {
300     vsingle[i] = v[i];
301   }
302   ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
303   PetscFunctionReturn(0);
304 }
305 
306 #undef __FUNC__
307 #define __FUNC__ /*<a name="MatSetValuesBlocked_MPIBAIJ"></a>*/"MatSetValuesBlocked_MPIBAIJ"
308 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
309 {
310   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
311   int         ierr,i,N = m*n*b->bs2;
312   MatScalar   *vsingle;
313 
314   PetscFunctionBegin;
315   if (N > b->setvalueslen) {
316     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
317     b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy);
318     b->setvalueslen  = N;
319   }
320   vsingle = b->setvaluescopy;
321   for (i=0; i<N; i++) {
322     vsingle[i] = v[i];
323   }
324   ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
325   PetscFunctionReturn(0);
326 }
327 
328 #undef __FUNC__
329 #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ_HT"></a>*/"MatSetValues_MPIBAIJ_HT"
330 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
331 {
332   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
333   int         ierr,i,N = m*n;
334   MatScalar   *vsingle;
335 
336   PetscFunctionBegin;
337   if (N > b->setvalueslen) {
338     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
339     b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy);
340     b->setvalueslen  = N;
341   }
342   vsingle = b->setvaluescopy;
343   for (i=0; i<N; i++) {
344     vsingle[i] = v[i];
345   }
346   ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
347   PetscFunctionReturn(0);
348 }
349 
350 #undef __FUNC__
351 #define __FUNC__ /*<a name="MatSetValuesBlocked_MPIBAIJ_HT"></a>*/"MatSetValuesBlocked_MPIBAIJ_HT"
352 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
353 {
354   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
355   int         ierr,i,N = m*n*b->bs2;
356   MatScalar   *vsingle;
357 
358   PetscFunctionBegin;
359   if (N > b->setvalueslen) {
360     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
361     b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy);
362     b->setvalueslen  = N;
363   }
364   vsingle = b->setvaluescopy;
365   for (i=0; i<N; i++) {
366     vsingle[i] = v[i];
367   }
368   ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
369   PetscFunctionReturn(0);
370 }
371 #endif
372 
373 #undef __FUNC__
374 #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ"></a>*/"MatSetValues_MPIBAIJ"
375 int MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
376 {
377   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
378   MatScalar   value;
379   PetscTruth  roworiented = baij->roworiented;
380   int         ierr,i,j,row,col;
381   int         rstart_orig=baij->rstart_bs;
382   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
383   int         cend_orig=baij->cend_bs,bs=baij->bs;
384 
385   /* Some Variables required in the macro */
386   Mat         A = baij->A;
387   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data;
388   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
389   MatScalar   *aa=a->a;
390 
391   Mat         B = baij->B;
392   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data;
393   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
394   MatScalar   *ba=b->a;
395 
396   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
397   int         low,high,t,ridx,cidx,bs2=a->bs2;
398   MatScalar   *ap,*bap;
399 
400   PetscFunctionBegin;
401   for (i=0; i<m; i++) {
402     if (im[i] < 0) continue;
403 #if defined(PETSC_USE_BOPT_g)
404     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
405 #endif
406     if (im[i] >= rstart_orig && im[i] < rend_orig) {
407       row = im[i] - rstart_orig;
408       for (j=0; j<n; j++) {
409         if (in[j] >= cstart_orig && in[j] < cend_orig){
410           col = in[j] - cstart_orig;
411           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
412           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
413           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
414         } else if (in[j] < 0) continue;
415 #if defined(PETSC_USE_BOPT_g)
416         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");}
417 #endif
418         else {
419           if (mat->was_assembled) {
420             if (!baij->colmap) {
421               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
422             }
423 #if defined (PETSC_USE_CTABLE)
424             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
425             col  = col - 1 + in[j]%bs;
426 #else
427             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
428 #endif
429             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
430               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
431               col =  in[j];
432               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
433               B = baij->B;
434               b = (Mat_SeqBAIJ*)(B)->data;
435               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
436               ba=b->a;
437             }
438           } else col = in[j];
439           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
440           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
441           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
442         }
443       }
444     } else {
445       if (!baij->donotstash) {
446         if (roworiented) {
447           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
448         } else {
449           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
450         }
451       }
452     }
453   }
454   PetscFunctionReturn(0);
455 }
456 
457 #undef __FUNC__
458 #define __FUNC__ /*<a name="MatSetValuesBlocked_MPIBAIJ"></a>*/"MatSetValuesBlocked_MPIBAIJ"
459 int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
460 {
461   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
462   MatScalar   *value,*barray=baij->barray;
463   PetscTruth  roworiented = baij->roworiented;
464   int         ierr,i,j,ii,jj,row,col,rstart=baij->rstart;
465   int         rend=baij->rend,cstart=baij->cstart,stepval;
466   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
467 
468   PetscFunctionBegin;
469   if(!barray) {
470     baij->barray = barray = (MatScalar*)PetscMalloc(bs2*sizeof(MatScalar));CHKPTRQ(barray);
471   }
472 
473   if (roworiented) {
474     stepval = (n-1)*bs;
475   } else {
476     stepval = (m-1)*bs;
477   }
478   for (i=0; i<m; i++) {
479     if (im[i] < 0) continue;
480 #if defined(PETSC_USE_BOPT_g)
481     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs);
482 #endif
483     if (im[i] >= rstart && im[i] < rend) {
484       row = im[i] - rstart;
485       for (j=0; j<n; j++) {
486         /* If NumCol = 1 then a copy is not required */
487         if ((roworiented) && (n == 1)) {
488           barray = v + i*bs2;
489         } else if((!roworiented) && (m == 1)) {
490           barray = v + j*bs2;
491         } else { /* Here a copy is required */
492           if (roworiented) {
493             value = v + i*(stepval+bs)*bs + j*bs;
494           } else {
495             value = v + j*(stepval+bs)*bs + i*bs;
496           }
497           for (ii=0; ii<bs; ii++,value+=stepval) {
498             for (jj=0; jj<bs; jj++) {
499               *barray++  = *value++;
500             }
501           }
502           barray -=bs2;
503         }
504 
505         if (in[j] >= cstart && in[j] < cend){
506           col  = in[j] - cstart;
507           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
508         }
509         else if (in[j] < 0) continue;
510 #if defined(PETSC_USE_BOPT_g)
511         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs);}
512 #endif
513         else {
514           if (mat->was_assembled) {
515             if (!baij->colmap) {
516               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
517             }
518 
519 #if defined(PETSC_USE_BOPT_g)
520 #if defined (PETSC_USE_CTABLE)
521             { int data;
522               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
523               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
524             }
525 #else
526             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
527 #endif
528 #endif
529 #if defined (PETSC_USE_CTABLE)
530 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
531             col  = (col - 1)/bs;
532 #else
533             col = (baij->colmap[in[j]] - 1)/bs;
534 #endif
535             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
536               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
537               col =  in[j];
538             }
539           }
540           else col = in[j];
541           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
542         }
543       }
544     } else {
545       if (!baij->donotstash) {
546         if (roworiented) {
547           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
548         } else {
549           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
550         }
551       }
552     }
553   }
554   PetscFunctionReturn(0);
555 }
556 
557 #define HASH_KEY 0.6180339887
558 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
559 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
560 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
561 #undef __FUNC__
562 #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ_HT_MatScalar"></a>*/"MatSetValues_MPIBAIJ_HT_MatScalar"
563 int MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
564 {
565   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
566   PetscTruth  roworiented = baij->roworiented;
567   int         ierr,i,j,row,col;
568   int         rstart_orig=baij->rstart_bs;
569   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
570   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
571   PetscReal   tmp;
572   MatScalar   **HD = baij->hd,value;
573 #if defined(PETSC_USE_BOPT_g)
574   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
575 #endif
576 
577   PetscFunctionBegin;
578 
579   for (i=0; i<m; i++) {
580 #if defined(PETSC_USE_BOPT_g)
581     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
582     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
583 #endif
584       row = im[i];
585     if (row >= rstart_orig && row < rend_orig) {
586       for (j=0; j<n; j++) {
587         col = in[j];
588         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
589         /* Look up into the Hash Table */
590         key = (row/bs)*Nbs+(col/bs)+1;
591         h1  = HASH(size,key,tmp);
592 
593 
594         idx = h1;
595 #if defined(PETSC_USE_BOPT_g)
596         insert_ct++;
597         total_ct++;
598         if (HT[idx] != key) {
599           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
600           if (idx == size) {
601             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
602             if (idx == h1) {
603               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table");
604             }
605           }
606         }
607 #else
608         if (HT[idx] != key) {
609           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
610           if (idx == size) {
611             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
612             if (idx == h1) {
613               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table");
614             }
615           }
616         }
617 #endif
618         /* A HASH table entry is found, so insert the values at the correct address */
619         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
620         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
621       }
622     } else {
623       if (!baij->donotstash) {
624         if (roworiented) {
625           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
626         } else {
627           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
628         }
629       }
630     }
631   }
632 #if defined(PETSC_USE_BOPT_g)
633   baij->ht_total_ct = total_ct;
634   baij->ht_insert_ct = insert_ct;
635 #endif
636   PetscFunctionReturn(0);
637 }
638 
639 #undef __FUNC__
640 #define __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPIBAIJ_HT_MatScalar"
641 int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
642 {
643   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
644   PetscTruth  roworiented = baij->roworiented;
645   int         ierr,i,j,ii,jj,row,col;
646   int         rstart=baij->rstart ;
647   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
648   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
649   PetscReal   tmp;
650   MatScalar   **HD = baij->hd,*baij_a;
651   MatScalar   *v_t,*value;
652 #if defined(PETSC_USE_BOPT_g)
653   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
654 #endif
655 
656   PetscFunctionBegin;
657 
658   if (roworiented) {
659     stepval = (n-1)*bs;
660   } else {
661     stepval = (m-1)*bs;
662   }
663   for (i=0; i<m; i++) {
664 #if defined(PETSC_USE_BOPT_g)
665     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
666     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
667 #endif
668     row   = im[i];
669     v_t   = v + i*bs2;
670     if (row >= rstart && row < rend) {
671       for (j=0; j<n; j++) {
672         col = in[j];
673 
674         /* Look up into the Hash Table */
675         key = row*Nbs+col+1;
676         h1  = HASH(size,key,tmp);
677 
678         idx = h1;
679 #if defined(PETSC_USE_BOPT_g)
680         total_ct++;
681         insert_ct++;
682        if (HT[idx] != key) {
683           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
684           if (idx == size) {
685             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
686             if (idx == h1) {
687               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table");
688             }
689           }
690         }
691 #else
692         if (HT[idx] != key) {
693           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
694           if (idx == size) {
695             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
696             if (idx == h1) {
697               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table");
698             }
699           }
700         }
701 #endif
702         baij_a = HD[idx];
703         if (roworiented) {
704           /*value = v + i*(stepval+bs)*bs + j*bs;*/
705           /* value = v + (i*(stepval+bs)+j)*bs; */
706           value = v_t;
707           v_t  += bs;
708           if (addv == ADD_VALUES) {
709             for (ii=0; ii<bs; ii++,value+=stepval) {
710               for (jj=ii; jj<bs2; jj+=bs) {
711                 baij_a[jj]  += *value++;
712               }
713             }
714           } else {
715             for (ii=0; ii<bs; ii++,value+=stepval) {
716               for (jj=ii; jj<bs2; jj+=bs) {
717                 baij_a[jj]  = *value++;
718               }
719             }
720           }
721         } else {
722           value = v + j*(stepval+bs)*bs + i*bs;
723           if (addv == ADD_VALUES) {
724             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
725               for (jj=0; jj<bs; jj++) {
726                 baij_a[jj]  += *value++;
727               }
728             }
729           } else {
730             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
731               for (jj=0; jj<bs; jj++) {
732                 baij_a[jj]  = *value++;
733               }
734             }
735           }
736         }
737       }
738     } else {
739       if (!baij->donotstash) {
740         if (roworiented) {
741           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
742         } else {
743           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
744         }
745       }
746     }
747   }
748 #if defined(PETSC_USE_BOPT_g)
749   baij->ht_total_ct = total_ct;
750   baij->ht_insert_ct = insert_ct;
751 #endif
752   PetscFunctionReturn(0);
753 }
754 
755 #undef __FUNC__
756 #define __FUNC__ /*<a name=""></a>*/"MatGetValues_MPIBAIJ"
757 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
758 {
759   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
760   int        bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
761   int        bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
762 
763   PetscFunctionBegin;
764   for (i=0; i<m; i++) {
765     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
766     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
767     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
768       row = idxm[i] - bsrstart;
769       for (j=0; j<n; j++) {
770         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
771         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
772         if (idxn[j] >= bscstart && idxn[j] < bscend){
773           col = idxn[j] - bscstart;
774           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
775         } else {
776           if (!baij->colmap) {
777             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
778           }
779 #if defined (PETSC_USE_CTABLE)
780           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
781           data --;
782 #else
783           data = baij->colmap[idxn[j]/bs]-1;
784 #endif
785           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
786           else {
787             col  = data + idxn[j]%bs;
788             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
789           }
790         }
791       }
792     } else {
793       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
794     }
795   }
796  PetscFunctionReturn(0);
797 }
798 
799 #undef __FUNC__
800 #define __FUNC__ /*<a name=""></a>*/"MatNorm_MPIBAIJ"
801 int MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *norm)
802 {
803   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
804   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
805   int        ierr,i,bs2=baij->bs2;
806   PetscReal  sum = 0.0;
807   MatScalar  *v;
808 
809   PetscFunctionBegin;
810   if (baij->size == 1) {
811     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
812   } else {
813     if (type == NORM_FROBENIUS) {
814       v = amat->a;
815       for (i=0; i<amat->nz*bs2; i++) {
816 #if defined(PETSC_USE_COMPLEX)
817         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
818 #else
819         sum += (*v)*(*v); v++;
820 #endif
821       }
822       v = bmat->a;
823       for (i=0; i<bmat->nz*bs2; i++) {
824 #if defined(PETSC_USE_COMPLEX)
825         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
826 #else
827         sum += (*v)*(*v); v++;
828 #endif
829       }
830       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
831       *norm = sqrt(*norm);
832     } else {
833       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
834     }
835   }
836   PetscFunctionReturn(0);
837 }
838 
839 
840 /*
841   Creates the hash table, and sets the table
842   This table is created only once.
843   If new entried need to be added to the matrix
844   then the hash table has to be destroyed and
845   recreated.
846 */
847 #undef __FUNC__
848 #define __FUNC__ /*<a name=""></a>*/"MatCreateHashTable_MPIBAIJ_Private"
849 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
850 {
851   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
852   Mat         A = baij->A,B=baij->B;
853   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
854   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
855   int         size,bs2=baij->bs2,rstart=baij->rstart,ierr;
856   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
857   int         *HT,key;
858   MatScalar   **HD;
859   PetscReal   tmp;
860 #if defined(PETSC_USE_BOPT_g)
861   int         ct=0,max=0;
862 #endif
863 
864   PetscFunctionBegin;
865   baij->ht_size=(int)(factor*nz);
866   size = baij->ht_size;
867 
868   if (baij->ht) {
869     PetscFunctionReturn(0);
870   }
871 
872   /* Allocate Memory for Hash Table */
873   baij->hd = (MatScalar**)PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1);CHKPTRQ(baij->hd);
874   baij->ht = (int*)(baij->hd + size);
875   HD = baij->hd;
876   HT = baij->ht;
877 
878 
879   ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr);
880 
881 
882   /* Loop Over A */
883   for (i=0; i<a->mbs; i++) {
884     for (j=ai[i]; j<ai[i+1]; j++) {
885       row = i+rstart;
886       col = aj[j]+cstart;
887 
888       key = row*Nbs + col + 1;
889       h1  = HASH(size,key,tmp);
890       for (k=0; k<size; k++){
891         if (HT[(h1+k)%size] == 0.0) {
892           HT[(h1+k)%size] = key;
893           HD[(h1+k)%size] = a->a + j*bs2;
894           break;
895 #if defined(PETSC_USE_BOPT_g)
896         } else {
897           ct++;
898 #endif
899         }
900       }
901 #if defined(PETSC_USE_BOPT_g)
902       if (k> max) max = k;
903 #endif
904     }
905   }
906   /* Loop Over B */
907   for (i=0; i<b->mbs; i++) {
908     for (j=bi[i]; j<bi[i+1]; j++) {
909       row = i+rstart;
910       col = garray[bj[j]];
911       key = row*Nbs + col + 1;
912       h1  = HASH(size,key,tmp);
913       for (k=0; k<size; k++){
914         if (HT[(h1+k)%size] == 0.0) {
915           HT[(h1+k)%size] = key;
916           HD[(h1+k)%size] = b->a + j*bs2;
917           break;
918 #if defined(PETSC_USE_BOPT_g)
919         } else {
920           ct++;
921 #endif
922         }
923       }
924 #if defined(PETSC_USE_BOPT_g)
925       if (k> max) max = k;
926 #endif
927     }
928   }
929 
930   /* Print Summary */
931 #if defined(PETSC_USE_BOPT_g)
932   for (i=0,j=0; i<size; i++) {
933     if (HT[i]) {j++;}
934   }
935   PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max);
936 #endif
937   PetscFunctionReturn(0);
938 }
939 
940 #undef __FUNC__
941 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyBegin_MPIBAIJ"
942 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
943 {
944   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
945   int         ierr,nstash,reallocs;
946   InsertMode  addv;
947 
948   PetscFunctionBegin;
949   if (baij->donotstash) {
950     PetscFunctionReturn(0);
951   }
952 
953   /* make sure all processors are either in INSERTMODE or ADDMODE */
954   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
955   if (addv == (ADD_VALUES|INSERT_VALUES)) {
956     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
957   }
958   mat->insertmode = addv; /* in case this processor had no cache */
959 
960   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
961   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
962   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
963   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
964   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
965   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
966   PetscFunctionReturn(0);
967 }
968 
969 #undef __FUNC__
970 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyEnd_MPIBAIJ"
971 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
972 {
973   Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data;
974   Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
975   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
976   int         *row,*col,other_disassembled;
977   PetscTruth  r1,r2,r3;
978   MatScalar   *val;
979   InsertMode  addv = mat->insertmode;
980 
981   PetscFunctionBegin;
982   if (!baij->donotstash) {
983     while (1) {
984       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
985       if (!flg) break;
986 
987       for (i=0; i<n;) {
988         /* Now identify the consecutive vals belonging to the same row */
989         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
990         if (j < n) ncols = j-i;
991         else       ncols = n-i;
992         /* Now assemble all these values with a single function call */
993         ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
994         i = j;
995       }
996     }
997     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
998     /* Now process the block-stash. Since the values are stashed column-oriented,
999        set the roworiented flag to column oriented, and after MatSetValues()
1000        restore the original flags */
1001     r1 = baij->roworiented;
1002     r2 = a->roworiented;
1003     r3 = b->roworiented;
1004     baij->roworiented = PETSC_FALSE;
1005     a->roworiented    = PETSC_FALSE;
1006     b->roworiented    = PETSC_FALSE;
1007     while (1) {
1008       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1009       if (!flg) break;
1010 
1011       for (i=0; i<n;) {
1012         /* Now identify the consecutive vals belonging to the same row */
1013         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1014         if (j < n) ncols = j-i;
1015         else       ncols = n-i;
1016         ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
1017         i = j;
1018       }
1019     }
1020     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
1021     baij->roworiented = r1;
1022     a->roworiented    = r2;
1023     b->roworiented    = r3;
1024   }
1025 
1026   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
1027   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
1028 
1029   /* determine if any processor has disassembled, if so we must
1030      also disassemble ourselfs, in order that we may reassemble. */
1031   /*
1032      if nonzero structure of submatrix B cannot change then we know that
1033      no processor disassembled thus we can skip this stuff
1034   */
1035   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
1036     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
1037     if (mat->was_assembled && !other_disassembled) {
1038       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
1039     }
1040   }
1041 
1042   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1043     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
1044   }
1045   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1046   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1047 
1048 #if defined(PETSC_USE_BOPT_g)
1049   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1050     PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((double)baij->ht_total_ct)/baij->ht_insert_ct);
1051     baij->ht_total_ct  = 0;
1052     baij->ht_insert_ct = 0;
1053   }
1054 #endif
1055   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1056     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1057     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1058     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1059   }
1060 
1061   if (baij->rowvalues) {
1062     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1063     baij->rowvalues = 0;
1064   }
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 #undef __FUNC__
1069 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIBAIJ_ASCIIorDraworSocket"
1070 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
1071 {
1072   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ*)mat->data;
1073   int          ierr,format,bs = baij->bs,size = baij->size,rank = baij->rank;
1074   PetscTruth   isascii,isdraw;
1075   Viewer       sviewer;
1076 
1077   PetscFunctionBegin;
1078   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
1079   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
1080   if (isascii) {
1081     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1082     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
1083       MatInfo info;
1084       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
1085       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1086       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
1087               rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
1088               baij->bs,(int)info.memory);CHKERRQ(ierr);
1089       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1090       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1091       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1092       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1093       ierr = ViewerFlush(viewer);CHKERRQ(ierr);
1094       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1095       PetscFunctionReturn(0);
1096     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
1097       ierr = ViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
1098       PetscFunctionReturn(0);
1099     }
1100   }
1101 
1102   if (isdraw) {
1103     Draw       draw;
1104     PetscTruth isnull;
1105     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1106     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1107   }
1108 
1109   if (size == 1) {
1110     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1111   } else {
1112     /* assemble the entire matrix onto first processor. */
1113     Mat         A;
1114     Mat_SeqBAIJ *Aloc;
1115     int         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1116     MatScalar   *a;
1117 
1118     if (!rank) {
1119       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1120     } else {
1121       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1122     }
1123     PLogObjectParent(mat,A);
1124 
1125     /* copy over the A part */
1126     Aloc  = (Mat_SeqBAIJ*)baij->A->data;
1127     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
1128     rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
1129 
1130     for (i=0; i<mbs; i++) {
1131       rvals[0] = bs*(baij->rstart + i);
1132       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1133       for (j=ai[i]; j<ai[i+1]; j++) {
1134         col = (baij->cstart+aj[j])*bs;
1135         for (k=0; k<bs; k++) {
1136           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1137           col++; a += bs;
1138         }
1139       }
1140     }
1141     /* copy over the B part */
1142     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1143     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1144     for (i=0; i<mbs; i++) {
1145       rvals[0] = bs*(baij->rstart + i);
1146       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1147       for (j=ai[i]; j<ai[i+1]; j++) {
1148         col = baij->garray[aj[j]]*bs;
1149         for (k=0; k<bs; k++) {
1150           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1151           col++; a += bs;
1152         }
1153       }
1154     }
1155     ierr = PetscFree(rvals);CHKERRQ(ierr);
1156     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1157     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1158     /*
1159        Everyone has to call to draw the matrix since the graphics waits are
1160        synchronized across all processors that share the Draw object
1161     */
1162     ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1163     if (!rank) {
1164       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1165     }
1166     ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1167     ierr = MatDestroy(A);CHKERRQ(ierr);
1168   }
1169   PetscFunctionReturn(0);
1170 }
1171 
1172 #undef __FUNC__
1173 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIBAIJ"
1174 int MatView_MPIBAIJ(Mat mat,Viewer viewer)
1175 {
1176   int        ierr;
1177   PetscTruth isascii,isdraw,issocket,isbinary;
1178 
1179   PetscFunctionBegin;
1180   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
1181   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
1182   ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr);
1183   ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
1184   if (isascii || isdraw || issocket || isbinary) {
1185     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1186   } else {
1187     SETERRQ1(1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1188   }
1189   PetscFunctionReturn(0);
1190 }
1191 
1192 #undef __FUNC__
1193 #define __FUNC__ /*<a name=""></a>*/"MatDestroy_MPIBAIJ"
1194 int MatDestroy_MPIBAIJ(Mat mat)
1195 {
1196   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1197   int         ierr;
1198 
1199   PetscFunctionBegin;
1200 #if defined(PETSC_USE_LOG)
1201   PLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N);
1202 #endif
1203   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1204   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1205   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1206   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1207   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1208 #if defined (PETSC_USE_CTABLE)
1209   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1210 #else
1211   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1212 #endif
1213   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1214   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1215   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1216   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1217   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1218   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
1219 #if defined(PETSC_USE_MAT_SINGLE)
1220   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
1221 #endif
1222   ierr = PetscFree(baij);CHKERRQ(ierr);
1223   PetscFunctionReturn(0);
1224 }
1225 
1226 #undef __FUNC__
1227 #define __FUNC__ /*<a name=""></a>*/"MatMult_MPIBAIJ"
1228 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1229 {
1230   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1231   int         ierr,nt;
1232 
1233   PetscFunctionBegin;
1234   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1235   if (nt != A->n) {
1236     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1237   }
1238   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1239   if (nt != A->m) {
1240     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1241   }
1242   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1243   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1244   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1245   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1246   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1247   PetscFunctionReturn(0);
1248 }
1249 
1250 #undef __FUNC__
1251 #define __FUNC__ /*<a name=""></a>*/"MatMultAdd_MPIBAIJ"
1252 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1253 {
1254   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1255   int        ierr;
1256 
1257   PetscFunctionBegin;
1258   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1259   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1260   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1261   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1262   PetscFunctionReturn(0);
1263 }
1264 
1265 #undef __FUNC__
1266 #define __FUNC__ /*<a name=""></a>*/"MatMultTranspose_MPIBAIJ"
1267 int MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1268 {
1269   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1270   int         ierr;
1271 
1272   PetscFunctionBegin;
1273   /* do nondiagonal part */
1274   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1275   /* send it on its way */
1276   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1277   /* do local part */
1278   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1279   /* receive remote parts: note this assumes the values are not actually */
1280   /* inserted in yy until the next line, which is true for my implementation*/
1281   /* but is not perhaps always true. */
1282   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1283   PetscFunctionReturn(0);
1284 }
1285 
1286 #undef __FUNC__
1287 #define __FUNC__ /*<a name=""></a>*/"MatMultTransposeAdd_MPIBAIJ"
1288 int MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1289 {
1290   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1291   int         ierr;
1292 
1293   PetscFunctionBegin;
1294   /* do nondiagonal part */
1295   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1296   /* send it on its way */
1297   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1298   /* do local part */
1299   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1300   /* receive remote parts: note this assumes the values are not actually */
1301   /* inserted in yy until the next line, which is true for my implementation*/
1302   /* but is not perhaps always true. */
1303   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1304   PetscFunctionReturn(0);
1305 }
1306 
1307 /*
1308   This only works correctly for square matrices where the subblock A->A is the
1309    diagonal block
1310 */
1311 #undef __FUNC__
1312 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonal_MPIBAIJ"
1313 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1314 {
1315   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1316   int         ierr;
1317 
1318   PetscFunctionBegin;
1319   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1320   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1321   PetscFunctionReturn(0);
1322 }
1323 
1324 #undef __FUNC__
1325 #define __FUNC__ /*<a name=""></a>*/"MatScale_MPIBAIJ"
1326 int MatScale_MPIBAIJ(Scalar *aa,Mat A)
1327 {
1328   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1329   int         ierr;
1330 
1331   PetscFunctionBegin;
1332   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1333   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1334   PetscFunctionReturn(0);
1335 }
1336 
1337 #undef __FUNC__
1338 #define __FUNC__ /*<a name=""></a>*/"MatGetOwnershipRange_MPIBAIJ"
1339 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
1340 {
1341   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1342 
1343   PetscFunctionBegin;
1344   if (m) *m = mat->rstart_bs;
1345   if (n) *n = mat->rend_bs;
1346   PetscFunctionReturn(0);
1347 }
1348 
1349 #undef __FUNC__
1350 #define __FUNC__ /*<a name=""></a>*/"MatGetRow_MPIBAIJ"
1351 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1352 {
1353   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1354   Scalar     *vworkA,*vworkB,**pvA,**pvB,*v_p;
1355   int        bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1356   int        nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1357   int        *cmap,*idx_p,cstart = mat->cstart;
1358 
1359   PetscFunctionBegin;
1360   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1361   mat->getrowactive = PETSC_TRUE;
1362 
1363   if (!mat->rowvalues && (idx || v)) {
1364     /*
1365         allocate enough space to hold information from the longest row.
1366     */
1367     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1368     int     max = 1,mbs = mat->mbs,tmp;
1369     for (i=0; i<mbs; i++) {
1370       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1371       if (max < tmp) { max = tmp; }
1372     }
1373     mat->rowvalues = (Scalar*)PetscMalloc(max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1374     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1375   }
1376 
1377   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1378   lrow = row - brstart;
1379 
1380   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1381   if (!v)   {pvA = 0; pvB = 0;}
1382   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1383   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1384   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1385   nztot = nzA + nzB;
1386 
1387   cmap  = mat->garray;
1388   if (v  || idx) {
1389     if (nztot) {
1390       /* Sort by increasing column numbers, assuming A and B already sorted */
1391       int imark = -1;
1392       if (v) {
1393         *v = v_p = mat->rowvalues;
1394         for (i=0; i<nzB; i++) {
1395           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1396           else break;
1397         }
1398         imark = i;
1399         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1400         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1401       }
1402       if (idx) {
1403         *idx = idx_p = mat->rowindices;
1404         if (imark > -1) {
1405           for (i=0; i<imark; i++) {
1406             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1407           }
1408         } else {
1409           for (i=0; i<nzB; i++) {
1410             if (cmap[cworkB[i]/bs] < cstart)
1411               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1412             else break;
1413           }
1414           imark = i;
1415         }
1416         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1417         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1418       }
1419     } else {
1420       if (idx) *idx = 0;
1421       if (v)   *v   = 0;
1422     }
1423   }
1424   *nz = nztot;
1425   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1426   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1427   PetscFunctionReturn(0);
1428 }
1429 
1430 #undef __FUNC__
1431 #define __FUNC__ /*<a name=""></a>*/"MatRestoreRow_MPIBAIJ"
1432 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1433 {
1434   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1435 
1436   PetscFunctionBegin;
1437   if (baij->getrowactive == PETSC_FALSE) {
1438     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1439   }
1440   baij->getrowactive = PETSC_FALSE;
1441   PetscFunctionReturn(0);
1442 }
1443 
1444 #undef __FUNC__
1445 #define __FUNC__ /*<a name=""></a>*/"MatGetBlockSize_MPIBAIJ"
1446 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
1447 {
1448   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1449 
1450   PetscFunctionBegin;
1451   *bs = baij->bs;
1452   PetscFunctionReturn(0);
1453 }
1454 
1455 #undef __FUNC__
1456 #define __FUNC__ /*<a name=""></a>*/"MatZeroEntries_MPIBAIJ"
1457 int MatZeroEntries_MPIBAIJ(Mat A)
1458 {
1459   Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data;
1460   int         ierr;
1461 
1462   PetscFunctionBegin;
1463   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1464   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1465   PetscFunctionReturn(0);
1466 }
1467 
1468 #undef __FUNC__
1469 #define __FUNC__ /*<a name=""></a>*/"MatGetInfo_MPIBAIJ"
1470 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1471 {
1472   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data;
1473   Mat         A = a->A,B = a->B;
1474   int         ierr;
1475   PetscReal   isend[5],irecv[5];
1476 
1477   PetscFunctionBegin;
1478   info->block_size     = (double)a->bs;
1479   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1480   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1481   isend[3] = info->memory;  isend[4] = info->mallocs;
1482   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1483   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1484   isend[3] += info->memory;  isend[4] += info->mallocs;
1485   if (flag == MAT_LOCAL) {
1486     info->nz_used      = isend[0];
1487     info->nz_allocated = isend[1];
1488     info->nz_unneeded  = isend[2];
1489     info->memory       = isend[3];
1490     info->mallocs      = isend[4];
1491   } else if (flag == MAT_GLOBAL_MAX) {
1492     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1493     info->nz_used      = irecv[0];
1494     info->nz_allocated = irecv[1];
1495     info->nz_unneeded  = irecv[2];
1496     info->memory       = irecv[3];
1497     info->mallocs      = irecv[4];
1498   } else if (flag == MAT_GLOBAL_SUM) {
1499     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1500     info->nz_used      = irecv[0];
1501     info->nz_allocated = irecv[1];
1502     info->nz_unneeded  = irecv[2];
1503     info->memory       = irecv[3];
1504     info->mallocs      = irecv[4];
1505   } else {
1506     SETERRQ1(1,"Unknown MatInfoType argument %d",flag);
1507   }
1508   info->rows_global       = (double)A->M;
1509   info->columns_global    = (double)A->N;
1510   info->rows_local        = (double)A->m;
1511   info->columns_local     = (double)A->N;
1512   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1513   info->fill_ratio_needed = 0;
1514   info->factor_mallocs    = 0;
1515   PetscFunctionReturn(0);
1516 }
1517 
1518 #undef __FUNC__
1519 #define __FUNC__ /*<a name=""></a>*/"MatSetOption_MPIBAIJ"
1520 int MatSetOption_MPIBAIJ(Mat A,MatOption op)
1521 {
1522   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1523   int         ierr;
1524 
1525   PetscFunctionBegin;
1526   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1527       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1528       op == MAT_COLUMNS_UNSORTED ||
1529       op == MAT_COLUMNS_SORTED ||
1530       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1531       op == MAT_KEEP_ZEROED_ROWS ||
1532       op == MAT_NEW_NONZERO_LOCATION_ERR) {
1533         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1534         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1535   } else if (op == MAT_ROW_ORIENTED) {
1536         a->roworiented = PETSC_TRUE;
1537         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1538         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1539   } else if (op == MAT_ROWS_SORTED ||
1540              op == MAT_ROWS_UNSORTED ||
1541              op == MAT_SYMMETRIC ||
1542              op == MAT_STRUCTURALLY_SYMMETRIC ||
1543              op == MAT_YES_NEW_DIAGONALS ||
1544              op == MAT_USE_HASH_TABLE) {
1545     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1546   } else if (op == MAT_COLUMN_ORIENTED) {
1547     a->roworiented = PETSC_FALSE;
1548     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1549     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1550   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1551     a->donotstash = PETSC_TRUE;
1552   } else if (op == MAT_NO_NEW_DIAGONALS) {
1553     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1554   } else if (op == MAT_USE_HASH_TABLE) {
1555     a->ht_flag = PETSC_TRUE;
1556   } else {
1557     SETERRQ(PETSC_ERR_SUP,"unknown option");
1558   }
1559   PetscFunctionReturn(0);
1560 }
1561 
1562 #undef __FUNC__
1563 #define __FUNC__ /*<a name=""></a>*/"MatTranspose_MPIBAIJ("
1564 int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1565 {
1566   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data;
1567   Mat_SeqBAIJ *Aloc;
1568   Mat         B;
1569   int         ierr,M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col;
1570   int         bs=baij->bs,mbs=baij->mbs;
1571   MatScalar   *a;
1572 
1573   PetscFunctionBegin;
1574   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1575   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1576 
1577   /* copy over the A part */
1578   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1579   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1580   rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
1581 
1582   for (i=0; i<mbs; i++) {
1583     rvals[0] = bs*(baij->rstart + i);
1584     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1585     for (j=ai[i]; j<ai[i+1]; j++) {
1586       col = (baij->cstart+aj[j])*bs;
1587       for (k=0; k<bs; k++) {
1588         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1589         col++; a += bs;
1590       }
1591     }
1592   }
1593   /* copy over the B part */
1594   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1595   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1596   for (i=0; i<mbs; i++) {
1597     rvals[0] = bs*(baij->rstart + i);
1598     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1599     for (j=ai[i]; j<ai[i+1]; j++) {
1600       col = baij->garray[aj[j]]*bs;
1601       for (k=0; k<bs; k++) {
1602         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1603         col++; a += bs;
1604       }
1605     }
1606   }
1607   ierr = PetscFree(rvals);CHKERRQ(ierr);
1608   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1609   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1610 
1611   if (matout) {
1612     *matout = B;
1613   } else {
1614     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1615   }
1616   PetscFunctionReturn(0);
1617 }
1618 
1619 #undef __FUNC__
1620 #define __FUNC__ /*<a name=""></a>*/"MatDiagonalScale_MPIBAIJ"
1621 int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1622 {
1623   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1624   Mat         a = baij->A,b = baij->B;
1625   int         ierr,s1,s2,s3;
1626 
1627   PetscFunctionBegin;
1628   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1629   if (rr) {
1630     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1631     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1632     /* Overlap communication with computation. */
1633     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1634   }
1635   if (ll) {
1636     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1637     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1638     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1639   }
1640   /* scale  the diagonal block */
1641   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1642 
1643   if (rr) {
1644     /* Do a scatter end and then right scale the off-diagonal block */
1645     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1646     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1647   }
1648 
1649   PetscFunctionReturn(0);
1650 }
1651 
1652 #undef __FUNC__
1653 #define __FUNC__ /*<a name=""></a>*/"MatZeroRows_MPIBAIJ"
1654 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
1655 {
1656   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1657   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
1658   int            *procs,*nprocs,j,idx,nsends,*work,row;
1659   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1660   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1661   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1662   MPI_Comm       comm = A->comm;
1663   MPI_Request    *send_waits,*recv_waits;
1664   MPI_Status     recv_status,*send_status;
1665   IS             istmp;
1666   PetscTruth     found;
1667 
1668   PetscFunctionBegin;
1669   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
1670   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1671 
1672   /*  first count number of contributors to each processor */
1673   nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs);
1674   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
1675   procs  = nprocs + size;
1676   owner  = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
1677   for (i=0; i<N; i++) {
1678     idx   = rows[i];
1679     found = PETSC_FALSE;
1680     for (j=0; j<size; j++) {
1681       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1682         nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
1683       }
1684     }
1685     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1686   }
1687   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
1688 
1689   /* inform other processors of number of messages and max length*/
1690   work   = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work);
1691   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
1692   nmax   = work[rank];
1693   nrecvs = work[size+rank];
1694   ierr = PetscFree(work);CHKERRQ(ierr);
1695 
1696   /* post receives:   */
1697   rvalues = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
1698   recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
1699   for (i=0; i<nrecvs; i++) {
1700     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1701   }
1702 
1703   /* do sends:
1704      1) starts[i] gives the starting index in svalues for stuff going to
1705      the ith processor
1706   */
1707   svalues    = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues);
1708   send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
1709   starts     = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts);
1710   starts[0]  = 0;
1711   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1712   for (i=0; i<N; i++) {
1713     svalues[starts[owner[i]]++] = rows[i];
1714   }
1715   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
1716 
1717   starts[0] = 0;
1718   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1719   count = 0;
1720   for (i=0; i<size; i++) {
1721     if (procs[i]) {
1722       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1723     }
1724   }
1725   ierr = PetscFree(starts);CHKERRQ(ierr);
1726 
1727   base = owners[rank]*bs;
1728 
1729   /*  wait on receives */
1730   lens   = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens);
1731   source = lens + nrecvs;
1732   count  = nrecvs; slen = 0;
1733   while (count) {
1734     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1735     /* unpack receives into our local space */
1736     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1737     source[imdex]  = recv_status.MPI_SOURCE;
1738     lens[imdex]    = n;
1739     slen          += n;
1740     count--;
1741   }
1742   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1743 
1744   /* move the data into the send scatter */
1745   lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows);
1746   count = 0;
1747   for (i=0; i<nrecvs; i++) {
1748     values = rvalues + i*nmax;
1749     for (j=0; j<lens[i]; j++) {
1750       lrows[count++] = values[j] - base;
1751     }
1752   }
1753   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1754   ierr = PetscFree(lens);CHKERRQ(ierr);
1755   ierr = PetscFree(owner);CHKERRQ(ierr);
1756   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1757 
1758   /* actually zap the local rows */
1759   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1760   PLogObjectParent(A,istmp);
1761 
1762   /*
1763         Zero the required rows. If the "diagonal block" of the matrix
1764      is square and the user wishes to set the diagonal we use seperate
1765      code so that MatSetValues() is not called for each diagonal allocating
1766      new memory, thus calling lots of mallocs and slowing things down.
1767 
1768        Contributed by: Mathew Knepley
1769   */
1770   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1771   ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr);
1772   if (diag && (l->A->M == l->A->N)) {
1773     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr);
1774   } else if (diag) {
1775     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1776     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1777       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1778 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1779     }
1780     for (i=0; i<slen; i++) {
1781       row  = lrows[i] + rstart_bs;
1782       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1783     }
1784     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1785     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1786   } else {
1787     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1788   }
1789 
1790   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1791   ierr = PetscFree(lrows);CHKERRQ(ierr);
1792 
1793   /* wait on sends */
1794   if (nsends) {
1795     send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1796     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1797     ierr        = PetscFree(send_status);CHKERRQ(ierr);
1798   }
1799   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1800   ierr = PetscFree(svalues);CHKERRQ(ierr);
1801 
1802   PetscFunctionReturn(0);
1803 }
1804 
1805 #undef __FUNC__
1806 #define __FUNC__ /*<a name=""></a>*/"MatPrintHelp_MPIBAIJ"
1807 int MatPrintHelp_MPIBAIJ(Mat A)
1808 {
1809   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1810   MPI_Comm    comm = A->comm;
1811   static int  called = 0;
1812   int         ierr;
1813 
1814   PetscFunctionBegin;
1815   if (!a->rank) {
1816     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1817   }
1818   if (called) {PetscFunctionReturn(0);} else called = 1;
1819   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1820   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1821   PetscFunctionReturn(0);
1822 }
1823 
1824 #undef __FUNC__
1825 #define __FUNC__ /*<a name=""></a>*/"MatSetUnfactored_MPIBAIJ"
1826 int MatSetUnfactored_MPIBAIJ(Mat A)
1827 {
1828   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1829   int         ierr;
1830 
1831   PetscFunctionBegin;
1832   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1833   PetscFunctionReturn(0);
1834 }
1835 
1836 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1837 
1838 #undef __FUNC__
1839 #define __FUNC__ /*<a name=""></a>*/"MatEqual_MPIBAIJ"
1840 int MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1841 {
1842   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1843   Mat         a,b,c,d;
1844   PetscTruth  flg;
1845   int         ierr;
1846 
1847   PetscFunctionBegin;
1848   ierr = PetscTypeCompare((PetscObject)B,MATMPIBAIJ,&flg);CHKERRQ(ierr);
1849   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1850   a = matA->A; b = matA->B;
1851   c = matB->A; d = matB->B;
1852 
1853   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1854   if (flg == PETSC_TRUE) {
1855     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1856   }
1857   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1858   PetscFunctionReturn(0);
1859 }
1860 
1861 
1862 #undef __FUNC__
1863 #define __FUNC__ /*<a name="MatSetUpPreallocation_MPIBAIJ"></a>*/"MatSetUpPreallocation_MPIBAIJ"
1864 int MatSetUpPreallocation_MPIBAIJ(Mat A)
1865 {
1866   int        ierr;
1867 
1868   PetscFunctionBegin;
1869   ierr =  MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1870   PetscFunctionReturn(0);
1871 }
1872 
1873 /* -------------------------------------------------------------------*/
1874 static struct _MatOps MatOps_Values = {
1875   MatSetValues_MPIBAIJ,
1876   MatGetRow_MPIBAIJ,
1877   MatRestoreRow_MPIBAIJ,
1878   MatMult_MPIBAIJ,
1879   MatMultAdd_MPIBAIJ,
1880   MatMultTranspose_MPIBAIJ,
1881   MatMultTransposeAdd_MPIBAIJ,
1882   0,
1883   0,
1884   0,
1885   0,
1886   0,
1887   0,
1888   0,
1889   MatTranspose_MPIBAIJ,
1890   MatGetInfo_MPIBAIJ,
1891   MatEqual_MPIBAIJ,
1892   MatGetDiagonal_MPIBAIJ,
1893   MatDiagonalScale_MPIBAIJ,
1894   MatNorm_MPIBAIJ,
1895   MatAssemblyBegin_MPIBAIJ,
1896   MatAssemblyEnd_MPIBAIJ,
1897   0,
1898   MatSetOption_MPIBAIJ,
1899   MatZeroEntries_MPIBAIJ,
1900   MatZeroRows_MPIBAIJ,
1901   0,
1902   0,
1903   0,
1904   0,
1905   MatSetUpPreallocation_MPIBAIJ,
1906   0,
1907   MatGetOwnershipRange_MPIBAIJ,
1908   0,
1909   0,
1910   0,
1911   0,
1912   MatDuplicate_MPIBAIJ,
1913   0,
1914   0,
1915   0,
1916   0,
1917   0,
1918   MatGetSubMatrices_MPIBAIJ,
1919   MatIncreaseOverlap_MPIBAIJ,
1920   MatGetValues_MPIBAIJ,
1921   0,
1922   MatPrintHelp_MPIBAIJ,
1923   MatScale_MPIBAIJ,
1924   0,
1925   0,
1926   0,
1927   MatGetBlockSize_MPIBAIJ,
1928   0,
1929   0,
1930   0,
1931   0,
1932   0,
1933   0,
1934   MatSetUnfactored_MPIBAIJ,
1935   0,
1936   MatSetValuesBlocked_MPIBAIJ,
1937   0,
1938   MatDestroy_MPIBAIJ,
1939   MatView_MPIBAIJ,
1940   MatGetMaps_Petsc,
1941   0,
1942   0,
1943   0,
1944   0,
1945   0,
1946   0,
1947   MatGetRowMax_MPIBAIJ};
1948 
1949 
1950 EXTERN_C_BEGIN
1951 #undef __FUNC__
1952 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonalBlock_MPIBAIJ"
1953 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1954 {
1955   PetscFunctionBegin;
1956   *a      = ((Mat_MPIBAIJ *)A->data)->A;
1957   *iscopy = PETSC_FALSE;
1958   PetscFunctionReturn(0);
1959 }
1960 EXTERN_C_END
1961 
1962 EXTERN_C_BEGIN
1963 #undef __FUNC__
1964 #define __FUNC__ /*<a name=""></a>*/"MatCreate_MPIBAIJ"
1965 int MatCreate_MPIBAIJ(Mat B)
1966 {
1967   Mat_MPIBAIJ  *b;
1968   int          ierr,size;
1969   PetscTruth   flg;
1970 
1971   PetscFunctionBegin;
1972 
1973   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
1974   B->data = (void*)(b = PetscNew(Mat_MPIBAIJ));CHKPTRQ(b);
1975   ierr    = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
1976   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1977   B->mapping    = 0;
1978   B->factor     = 0;
1979   B->assembled  = PETSC_FALSE;
1980 
1981   B->insertmode = NOT_SET_VALUES;
1982   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1983   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1984 
1985   /* build local table of row and column ownerships */
1986   b->rowners = (int*)PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners);
1987   PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
1988   b->cowners    = b->rowners + b->size + 2;
1989   b->rowners_bs = b->cowners + b->size + 2;
1990 
1991   /* build cache for off array entries formed */
1992   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1993   b->donotstash  = PETSC_FALSE;
1994   b->colmap      = PETSC_NULL;
1995   b->garray      = PETSC_NULL;
1996   b->roworiented = PETSC_TRUE;
1997 
1998 #if defined(PEYSC_USE_MAT_SINGLE)
1999   /* stuff for MatSetValues_XXX in single precision */
2000   b->lensetvalues     = 0;
2001   b->setvaluescopy    = PETSC_NULL;
2002 #endif
2003 
2004   /* stuff used in block assembly */
2005   b->barray       = 0;
2006 
2007   /* stuff used for matrix vector multiply */
2008   b->lvec         = 0;
2009   b->Mvctx        = 0;
2010 
2011   /* stuff for MatGetRow() */
2012   b->rowindices   = 0;
2013   b->rowvalues    = 0;
2014   b->getrowactive = PETSC_FALSE;
2015 
2016   /* hash table stuff */
2017   b->ht           = 0;
2018   b->hd           = 0;
2019   b->ht_size      = 0;
2020   b->ht_flag      = PETSC_FALSE;
2021   b->ht_fact      = 0;
2022   b->ht_total_ct  = 0;
2023   b->ht_insert_ct = 0;
2024 
2025   ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2026   if (flg) {
2027     double fact = 1.39;
2028     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2029     ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2030     if (fact <= 1.0) fact = 1.39;
2031     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2032     PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2033   }
2034   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2035                                      "MatStoreValues_MPIBAIJ",
2036                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2037   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2038                                      "MatRetrieveValues_MPIBAIJ",
2039                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2040   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2041                                      "MatGetDiagonalBlock_MPIBAIJ",
2042                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2043   PetscFunctionReturn(0);
2044 }
2045 EXTERN_C_END
2046 
2047 #undef __FUNC__
2048 #define __FUNC__ /*<a name=""></a>*/"MatMPIBAIJSetPreallocation"
2049 /*@C
2050    MatMPIBAIJSetPreallocation - Creates a sparse parallel matrix in block AIJ format
2051    (block compressed row).  For good matrix assembly performance
2052    the user should preallocate the matrix storage by setting the parameters
2053    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2054    performance can be increased by more than a factor of 50.
2055 
2056    Collective on Mat
2057 
2058    Input Parameters:
2059 +  A - the matrix
2060 .  bs   - size of blockk
2061 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2062            submatrix  (same for all local rows)
2063 .  d_nnz - array containing the number of block nonzeros in the various block rows
2064            of the in diagonal portion of the local (possibly different for each block
2065            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2066 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2067            submatrix (same for all local rows).
2068 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2069            off-diagonal portion of the local submatrix (possibly different for
2070            each block row) or PETSC_NULL.
2071 
2072    Output Parameter:
2073 
2074 
2075    Options Database Keys:
2076 .   -mat_no_unroll - uses code that does not unroll the loops in the
2077                      block calculations (much slower)
2078 .   -mat_block_size - size of the blocks to use
2079 
2080    Notes:
2081    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2082    than it must be used on all processors that share the object for that argument.
2083 
2084    Storage Information:
2085    For a square global matrix we define each processor's diagonal portion
2086    to be its local rows and the corresponding columns (a square submatrix);
2087    each processor's off-diagonal portion encompasses the remainder of the
2088    local matrix (a rectangular submatrix).
2089 
2090    The user can specify preallocated storage for the diagonal part of
2091    the local submatrix with either d_nz or d_nnz (not both).  Set
2092    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2093    memory allocation.  Likewise, specify preallocated storage for the
2094    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2095 
2096    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2097    the figure below we depict these three local rows and all columns (0-11).
2098 
2099 .vb
2100            0 1 2 3 4 5 6 7 8 9 10 11
2101           -------------------
2102    row 3  |  o o o d d d o o o o o o
2103    row 4  |  o o o d d d o o o o o o
2104    row 5  |  o o o d d d o o o o o o
2105           -------------------
2106 .ve
2107 
2108    Thus, any entries in the d locations are stored in the d (diagonal)
2109    submatrix, and any entries in the o locations are stored in the
2110    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2111    stored simply in the MATSEQBAIJ format for compressed row storage.
2112 
2113    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2114    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2115    In general, for PDE problems in which most nonzeros are near the diagonal,
2116    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2117    or you will get TERRIBLE performance; see the users' manual chapter on
2118    matrices.
2119 
2120    Level: intermediate
2121 
2122 .keywords: matrix, block, aij, compressed row, sparse, parallel
2123 
2124 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2125 @*/
2126 int MatMPIBAIJSetPreallocation(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2127 {
2128   Mat_MPIBAIJ  *b;
2129   int          ierr,i;
2130   PetscTruth   flg2;
2131 
2132   PetscFunctionBegin;
2133   ierr = PetscTypeCompare((PetscObject)B,MATMPIBAIJ,&flg2);CHKERRQ(ierr);
2134   if (!flg2) PetscFunctionReturn(0);
2135 
2136   B->preallocated = PETSC_TRUE;
2137   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2138 
2139   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2140   if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than -1: value %d",d_nz);
2141   if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than -1: value %d",o_nz);
2142   if (d_nnz) {
2143   for (i=0; i<B->m/bs; i++) {
2144       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
2145     }
2146   }
2147   if (o_nnz) {
2148     for (i=0; i<B->m/bs; i++) {
2149       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
2150     }
2151   }
2152 
2153   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
2154   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
2155   ierr = MapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
2156   ierr = MapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
2157 
2158   b = (Mat_MPIBAIJ*)B->data;
2159   b->bs  = bs;
2160   b->bs2 = bs*bs;
2161   b->mbs = B->m/bs;
2162   b->nbs = B->n/bs;
2163   b->Mbs = B->M/bs;
2164   b->Nbs = B->N/bs;
2165 
2166   ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
2167   b->rowners[0]    = 0;
2168   for (i=2; i<=b->size; i++) {
2169     b->rowners[i] += b->rowners[i-1];
2170   }
2171   b->rstart    = b->rowners[b->rank];
2172   b->rend      = b->rowners[b->rank+1];
2173 
2174   ierr = MPI_Allgather(&b->nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
2175   b->cowners[0] = 0;
2176   for (i=2; i<=b->size; i++) {
2177     b->cowners[i] += b->cowners[i-1];
2178   }
2179   b->cstart    = b->cowners[b->rank];
2180   b->cend      = b->cowners[b->rank+1];
2181 
2182   for (i=0; i<=b->size; i++) {
2183     b->rowners_bs[i] = b->rowners[i]*bs;
2184   }
2185   b->rstart_bs = b->rstart*bs;
2186   b->rend_bs   = b->rend*bs;
2187   b->cstart_bs = b->cstart*bs;
2188   b->cend_bs   = b->cend*bs;
2189 
2190   if (d_nz == PETSC_DEFAULT) d_nz = 5;
2191   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2192   PLogObjectParent(B,b->A);
2193   if (o_nz == PETSC_DEFAULT) o_nz = 0;
2194   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2195   PLogObjectParent(B,b->B);
2196   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
2197 
2198   PetscFunctionReturn(0);
2199 }
2200 
2201 #undef __FUNC__
2202 #define __FUNC__ /*<a name=""></a>*/"MatCreateMPIBAIJ"
2203 /*@C
2204    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
2205    (block compressed row).  For good matrix assembly performance
2206    the user should preallocate the matrix storage by setting the parameters
2207    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2208    performance can be increased by more than a factor of 50.
2209 
2210    Collective on MPI_Comm
2211 
2212    Input Parameters:
2213 +  comm - MPI communicator
2214 .  bs   - size of blockk
2215 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2216            This value should be the same as the local size used in creating the
2217            y vector for the matrix-vector product y = Ax.
2218 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2219            This value should be the same as the local size used in creating the
2220            x vector for the matrix-vector product y = Ax.
2221 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2222 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2223 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
2224            submatrix  (same for all local rows)
2225 .  d_nnz - array containing the number of nonzero blocks in the various block rows
2226            of the in diagonal portion of the local (possibly different for each block
2227            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2228 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
2229            submatrix (same for all local rows).
2230 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
2231            off-diagonal portion of the local submatrix (possibly different for
2232            each block row) or PETSC_NULL.
2233 
2234    Output Parameter:
2235 .  A - the matrix
2236 
2237    Options Database Keys:
2238 .   -mat_no_unroll - uses code that does not unroll the loops in the
2239                      block calculations (much slower)
2240 .   -mat_block_size - size of the blocks to use
2241 
2242    Notes:
2243    A nonzero block is any block that as 1 or more nonzeros in it
2244 
2245    The user MUST specify either the local or global matrix dimensions
2246    (possibly both).
2247 
2248    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2249    than it must be used on all processors that share the object for that argument.
2250 
2251    Storage Information:
2252    For a square global matrix we define each processor's diagonal portion
2253    to be its local rows and the corresponding columns (a square submatrix);
2254    each processor's off-diagonal portion encompasses the remainder of the
2255    local matrix (a rectangular submatrix).
2256 
2257    The user can specify preallocated storage for the diagonal part of
2258    the local submatrix with either d_nz or d_nnz (not both).  Set
2259    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2260    memory allocation.  Likewise, specify preallocated storage for the
2261    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2262 
2263    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2264    the figure below we depict these three local rows and all columns (0-11).
2265 
2266 .vb
2267            0 1 2 3 4 5 6 7 8 9 10 11
2268           -------------------
2269    row 3  |  o o o d d d o o o o o o
2270    row 4  |  o o o d d d o o o o o o
2271    row 5  |  o o o d d d o o o o o o
2272           -------------------
2273 .ve
2274 
2275    Thus, any entries in the d locations are stored in the d (diagonal)
2276    submatrix, and any entries in the o locations are stored in the
2277    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2278    stored simply in the MATSEQBAIJ format for compressed row storage.
2279 
2280    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2281    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2282    In general, for PDE problems in which most nonzeros are near the diagonal,
2283    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2284    or you will get TERRIBLE performance; see the users' manual chapter on
2285    matrices.
2286 
2287    Level: intermediate
2288 
2289 .keywords: matrix, block, aij, compressed row, sparse, parallel
2290 
2291 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2292 @*/
2293 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2294 {
2295   int ierr,size;
2296 
2297   PetscFunctionBegin;
2298   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2299   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2300   if (size > 1) {
2301     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
2302     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2303   } else {
2304     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2305     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2306   }
2307   PetscFunctionReturn(0);
2308 }
2309 
2310 #undef __FUNC__
2311 #define __FUNC__ /*<a name=""></a>*/"MatDuplicate_MPIBAIJ"
2312 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2313 {
2314   Mat         mat;
2315   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2316   int         ierr,len=0;
2317 
2318   PetscFunctionBegin;
2319   *newmat       = 0;
2320   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2321   ierr = MatSetType(mat,MATMPIBAIJ);CHKERRQ(ierr);
2322   mat->preallocated = PETSC_TRUE;
2323   mat->assembled    = PETSC_TRUE;
2324   a      = (Mat_MPIBAIJ*)mat->data;
2325   a->bs  = oldmat->bs;
2326   a->bs2 = oldmat->bs2;
2327   a->mbs = oldmat->mbs;
2328   a->nbs = oldmat->nbs;
2329   a->Mbs = oldmat->Mbs;
2330   a->Nbs = oldmat->Nbs;
2331 
2332   a->rstart       = oldmat->rstart;
2333   a->rend         = oldmat->rend;
2334   a->cstart       = oldmat->cstart;
2335   a->cend         = oldmat->cend;
2336   a->size         = oldmat->size;
2337   a->rank         = oldmat->rank;
2338   a->donotstash   = oldmat->donotstash;
2339   a->roworiented  = oldmat->roworiented;
2340   a->rowindices   = 0;
2341   a->rowvalues    = 0;
2342   a->getrowactive = PETSC_FALSE;
2343   a->barray       = 0;
2344   a->rstart_bs    = oldmat->rstart_bs;
2345   a->rend_bs      = oldmat->rend_bs;
2346   a->cstart_bs    = oldmat->cstart_bs;
2347   a->cend_bs      = oldmat->cend_bs;
2348 
2349   /* hash table stuff */
2350   a->ht           = 0;
2351   a->hd           = 0;
2352   a->ht_size      = 0;
2353   a->ht_flag      = oldmat->ht_flag;
2354   a->ht_fact      = oldmat->ht_fact;
2355   a->ht_total_ct  = 0;
2356   a->ht_insert_ct = 0;
2357 
2358   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2359   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2360   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2361   if (oldmat->colmap) {
2362 #if defined (PETSC_USE_CTABLE)
2363   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2364 #else
2365     a->colmap = (int*)PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
2366     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2367     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2368 #endif
2369   } else a->colmap = 0;
2370   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2371     a->garray = (int*)PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray);
2372     PLogObjectMemory(mat,len*sizeof(int));
2373     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2374   } else a->garray = 0;
2375 
2376   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2377   PLogObjectParent(mat,a->lvec);
2378   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2379 
2380   PLogObjectParent(mat,a->Mvctx);
2381   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2382   PLogObjectParent(mat,a->A);
2383   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2384   PLogObjectParent(mat,a->B);
2385   ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2386   *newmat = mat;
2387   PetscFunctionReturn(0);
2388 }
2389 
2390 #include "petscsys.h"
2391 
2392 EXTERN_C_BEGIN
2393 #undef __FUNC__
2394 #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPIBAIJ"
2395 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
2396 {
2397   Mat          A;
2398   int          i,nz,ierr,j,rstart,rend,fd;
2399   Scalar       *vals,*buf;
2400   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2401   MPI_Status   status;
2402   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2403   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2404   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2405   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2406   int          dcount,kmax,k,nzcount,tmp;
2407 
2408   PetscFunctionBegin;
2409   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2410 
2411   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2412   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2413   if (!rank) {
2414     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2415     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2416     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2417     if (header[3] < 0) {
2418       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPIBAIJ");
2419     }
2420   }
2421 
2422   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2423   M = header[1]; N = header[2];
2424 
2425   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2426 
2427   /*
2428      This code adds extra rows to make sure the number of rows is
2429      divisible by the blocksize
2430   */
2431   Mbs        = M/bs;
2432   extra_rows = bs - M + bs*(Mbs);
2433   if (extra_rows == bs) extra_rows = 0;
2434   else                  Mbs++;
2435   if (extra_rows &&!rank) {
2436     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
2437   }
2438 
2439   /* determine ownership of all rows */
2440   mbs = Mbs/size + ((Mbs % size) > rank);
2441   m   = mbs*bs;
2442   rowners = (int*)PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners);
2443   browners = rowners + size + 1;
2444   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2445   rowners[0] = 0;
2446   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2447   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2448   rstart = rowners[rank];
2449   rend   = rowners[rank+1];
2450 
2451   /* distribute row lengths to all processors */
2452   locrowlens = (int*)PetscMalloc((rend-rstart)*bs*sizeof(int));CHKPTRQ(locrowlens);
2453   if (!rank) {
2454     rowlengths = (int*)PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
2455     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2456     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2457     sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts);
2458     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2459     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2460     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2461   } else {
2462     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2463   }
2464 
2465   if (!rank) {
2466     /* calculate the number of nonzeros on each processor */
2467     procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz);
2468     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2469     for (i=0; i<size; i++) {
2470       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2471         procsnz[i] += rowlengths[j];
2472       }
2473     }
2474     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2475 
2476     /* determine max buffer needed and allocate it */
2477     maxnz = 0;
2478     for (i=0; i<size; i++) {
2479       maxnz = PetscMax(maxnz,procsnz[i]);
2480     }
2481     cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols);
2482 
2483     /* read in my part of the matrix column indices  */
2484     nz = procsnz[0];
2485     ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf);
2486     mycols = ibuf;
2487     if (size == 1)  nz -= extra_rows;
2488     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2489     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2490 
2491     /* read in every ones (except the last) and ship off */
2492     for (i=1; i<size-1; i++) {
2493       nz   = procsnz[i];
2494       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2495       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2496     }
2497     /* read in the stuff for the last proc */
2498     if (size != 1) {
2499       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2500       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2501       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2502       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2503     }
2504     ierr = PetscFree(cols);CHKERRQ(ierr);
2505   } else {
2506     /* determine buffer space needed for message */
2507     nz = 0;
2508     for (i=0; i<m; i++) {
2509       nz += locrowlens[i];
2510     }
2511     ibuf   = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf);
2512     mycols = ibuf;
2513     /* receive message of column indices*/
2514     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2515     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2516     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2517   }
2518 
2519   /* loop over local rows, determining number of off diagonal entries */
2520   dlens  = (int*)PetscMalloc(2*(rend-rstart+1)*sizeof(int));CHKPTRQ(dlens);
2521   odlens = dlens + (rend-rstart);
2522   mask   = (int*)PetscMalloc(3*Mbs*sizeof(int));CHKPTRQ(mask);
2523   ierr   = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2524   masked1 = mask    + Mbs;
2525   masked2 = masked1 + Mbs;
2526   rowcount = 0; nzcount = 0;
2527   for (i=0; i<mbs; i++) {
2528     dcount  = 0;
2529     odcount = 0;
2530     for (j=0; j<bs; j++) {
2531       kmax = locrowlens[rowcount];
2532       for (k=0; k<kmax; k++) {
2533         tmp = mycols[nzcount++]/bs;
2534         if (!mask[tmp]) {
2535           mask[tmp] = 1;
2536           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2537           else masked1[dcount++] = tmp;
2538         }
2539       }
2540       rowcount++;
2541     }
2542 
2543     dlens[i]  = dcount;
2544     odlens[i] = odcount;
2545 
2546     /* zero out the mask elements we set */
2547     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2548     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2549   }
2550 
2551   /* create our matrix */
2552   ierr = MatCreateMPIBAIJ(comm,bs,m,m,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr);
2553   A = *newmat;
2554   MatSetOption(A,MAT_COLUMNS_SORTED);
2555 
2556   if (!rank) {
2557     buf = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(buf);
2558     /* read in my part of the matrix numerical values  */
2559     nz = procsnz[0];
2560     vals = buf;
2561     mycols = ibuf;
2562     if (size == 1)  nz -= extra_rows;
2563     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2564     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2565 
2566     /* insert into matrix */
2567     jj      = rstart*bs;
2568     for (i=0; i<m; i++) {
2569       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2570       mycols += locrowlens[i];
2571       vals   += locrowlens[i];
2572       jj++;
2573     }
2574     /* read in other processors (except the last one) and ship out */
2575     for (i=1; i<size-1; i++) {
2576       nz   = procsnz[i];
2577       vals = buf;
2578       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2579       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2580     }
2581     /* the last proc */
2582     if (size != 1){
2583       nz   = procsnz[i] - extra_rows;
2584       vals = buf;
2585       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2586       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2587       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2588     }
2589     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2590   } else {
2591     /* receive numeric values */
2592     buf = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(buf);
2593 
2594     /* receive message of values*/
2595     vals   = buf;
2596     mycols = ibuf;
2597     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2598     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2599     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2600 
2601     /* insert into matrix */
2602     jj      = rstart*bs;
2603     for (i=0; i<m; i++) {
2604       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2605       mycols += locrowlens[i];
2606       vals   += locrowlens[i];
2607       jj++;
2608     }
2609   }
2610   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2611   ierr = PetscFree(buf);CHKERRQ(ierr);
2612   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2613   ierr = PetscFree(rowners);CHKERRQ(ierr);
2614   ierr = PetscFree(dlens);CHKERRQ(ierr);
2615   ierr = PetscFree(mask);CHKERRQ(ierr);
2616   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2617   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2618   PetscFunctionReturn(0);
2619 }
2620 EXTERN_C_END
2621 
2622 #undef __FUNC__
2623 #define __FUNC__ /*<a name=""></a>*/"MatMPIBAIJSetHashTableFactor"
2624 /*@
2625    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2626 
2627    Input Parameters:
2628 .  mat  - the matrix
2629 .  fact - factor
2630 
2631    Collective on Mat
2632 
2633    Level: advanced
2634 
2635   Notes:
2636    This can also be set by the command line option: -mat_use_hash_table fact
2637 
2638 .keywords: matrix, hashtable, factor, HT
2639 
2640 .seealso: MatSetOption()
2641 @*/
2642 int MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2643 {
2644   Mat_MPIBAIJ *baij;
2645   int         ierr;
2646   PetscTruth  flg;
2647 
2648   PetscFunctionBegin;
2649   PetscValidHeaderSpecific(mat,MAT_COOKIE);
2650   ierr = PetscTypeCompare((PetscObject)mat,MATMPIBAIJ,&flg);CHKERRQ(ierr);
2651   if (!flg) {
2652     SETERRQ(PETSC_ERR_ARG_WRONG,"Incorrect matrix type. Use MPIBAIJ only.");
2653   }
2654   baij = (Mat_MPIBAIJ*)mat->data;
2655   baij->ht_fact = fact;
2656   PetscFunctionReturn(0);
2657 }
2658