xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision d34347eca8ac9657cccc548b3ba1c8201ddea9cf)
1 /*$Id: mpibaij.c,v 1.234 2001/09/25 22:56:49 balay Exp $*/
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4 #include "src/vec/vecimpl.h"
5 
6 EXTERN int MatSetUpMultiply_MPIBAIJ(Mat);
7 EXTERN int DisAssemble_MPIBAIJ(Mat);
8 EXTERN int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS[],int);
9 EXTERN int MatGetSubMatrices_MPIBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat *[]);
10 EXTERN int MatGetValues_SeqBAIJ(Mat,int,const int[],int,const int [],PetscScalar []);
11 EXTERN int MatSetValues_SeqBAIJ(Mat,int,const int[],int,const int [],const PetscScalar [],InsertMode);
12 EXTERN int MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
13 EXTERN int MatGetRow_SeqBAIJ(Mat,int,int*,int*[],PetscScalar*[]);
14 EXTERN int MatRestoreRow_SeqBAIJ(Mat,int,int*,int*[],PetscScalar*[]);
15 EXTERN int MatPrintHelp_SeqBAIJ(Mat);
16 EXTERN int MatZeroRows_SeqBAIJ(Mat,IS,const PetscScalar*);
17 
18 /*  UGLY, ugly, ugly
19    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
20    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
21    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
22    converts the entries into single precision and then calls ..._MatScalar() to put them
23    into the single precision data structures.
24 */
25 #if defined(PETSC_USE_MAT_SINGLE)
26 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
27 EXTERN int MatSetValues_MPIBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
28 EXTERN int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
29 EXTERN int MatSetValues_MPIBAIJ_HT_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
30 EXTERN int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
31 #else
32 #define MatSetValuesBlocked_SeqBAIJ_MatScalar      MatSetValuesBlocked_SeqBAIJ
33 #define MatSetValues_MPIBAIJ_MatScalar             MatSetValues_MPIBAIJ
34 #define MatSetValuesBlocked_MPIBAIJ_MatScalar      MatSetValuesBlocked_MPIBAIJ
35 #define MatSetValues_MPIBAIJ_HT_MatScalar          MatSetValues_MPIBAIJ_HT
36 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar   MatSetValuesBlocked_MPIBAIJ_HT
37 #endif
38 
39 #undef __FUNCT__
40 #define __FUNCT__ "MatGetRowMax_MPIBAIJ"
41 int MatGetRowMax_MPIBAIJ(Mat A,Vec v)
42 {
43   Mat_MPIBAIJ  *a = (Mat_MPIBAIJ*)A->data;
44   int          ierr,i;
45   PetscScalar  *va,*vb;
46   Vec          vtmp;
47 
48   PetscFunctionBegin;
49 
50   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
51   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
52 
53   ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr);
54   ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr);
55   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
56 
57   for (i=0; i<A->m; i++){
58     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i];
59   }
60 
61   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
62   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
63   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
64 
65   PetscFunctionReturn(0);
66 }
67 
68 EXTERN_C_BEGIN
69 #undef __FUNCT__
70 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
71 int MatStoreValues_MPIBAIJ(Mat mat)
72 {
73   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
74   int         ierr;
75 
76   PetscFunctionBegin;
77   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
78   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
79   PetscFunctionReturn(0);
80 }
81 EXTERN_C_END
82 
83 EXTERN_C_BEGIN
84 #undef __FUNCT__
85 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
86 int MatRetrieveValues_MPIBAIJ(Mat mat)
87 {
88   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
89   int         ierr;
90 
91   PetscFunctionBegin;
92   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
93   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
94   PetscFunctionReturn(0);
95 }
96 EXTERN_C_END
97 
98 /*
99      Local utility routine that creates a mapping from the global column
100    number to the local number in the off-diagonal part of the local
101    storage of the matrix.  This is done in a non scable way since the
102    length of colmap equals the global matrix length.
103 */
104 #undef __FUNCT__
105 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
106 static int CreateColmap_MPIBAIJ_Private(Mat mat)
107 {
108   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
109   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data;
110   int         nbs = B->nbs,i,bs=B->bs,ierr;
111 
112   PetscFunctionBegin;
113 #if defined (PETSC_USE_CTABLE)
114   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
115   for (i=0; i<nbs; i++){
116     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
117   }
118 #else
119   ierr = PetscMalloc((baij->Nbs+1)*sizeof(int),&baij->colmap);CHKERRQ(ierr);
120   PetscLogObjectMemory(mat,baij->Nbs*sizeof(int));
121   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr);
122   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
123 #endif
124   PetscFunctionReturn(0);
125 }
126 
127 #define CHUNKSIZE  10
128 
129 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
130 { \
131  \
132     brow = row/bs;  \
133     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
134     rmax = aimax[brow]; nrow = ailen[brow]; \
135       bcol = col/bs; \
136       ridx = row % bs; cidx = col % bs; \
137       low = 0; high = nrow; \
138       while (high-low > 3) { \
139         t = (low+high)/2; \
140         if (rp[t] > bcol) high = t; \
141         else              low  = t; \
142       } \
143       for (_i=low; _i<high; _i++) { \
144         if (rp[_i] > bcol) break; \
145         if (rp[_i] == bcol) { \
146           bap  = ap +  bs2*_i + bs*cidx + ridx; \
147           if (addv == ADD_VALUES) *bap += value;  \
148           else                    *bap  = value;  \
149           goto a_noinsert; \
150         } \
151       } \
152       if (a->nonew == 1) goto a_noinsert; \
153       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
154       if (nrow >= rmax) { \
155         /* there is no extra room in row, therefore enlarge */ \
156         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
157         MatScalar *new_a; \
158  \
159         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
160  \
161         /* malloc new storage space */ \
162         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
163         ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
164         new_j   = (int*)(new_a + bs2*new_nz); \
165         new_i   = new_j + new_nz; \
166  \
167         /* copy over old data into new slots */ \
168         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
169         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
170         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
171         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
172         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
173         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
174         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
175         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
176                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
177         /* free up old matrix storage */ \
178         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
179         if (!a->singlemalloc) { \
180           ierr = PetscFree(a->i);CHKERRQ(ierr); \
181           ierr = PetscFree(a->j);CHKERRQ(ierr);\
182         } \
183         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
184         a->singlemalloc = PETSC_TRUE; \
185  \
186         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
187         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
188         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
189         a->maxnz += bs2*CHUNKSIZE; \
190         a->reallocs++; \
191         a->nz++; \
192       } \
193       N = nrow++ - 1;  \
194       /* shift up all the later entries in this row */ \
195       for (ii=N; ii>=_i; ii--) { \
196         rp[ii+1] = rp[ii]; \
197         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
198       } \
199       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
200       rp[_i]                      = bcol;  \
201       ap[bs2*_i + bs*cidx + ridx] = value;  \
202       a_noinsert:; \
203     ailen[brow] = nrow; \
204 }
205 
206 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
207 { \
208     brow = row/bs;  \
209     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
210     rmax = bimax[brow]; nrow = bilen[brow]; \
211       bcol = col/bs; \
212       ridx = row % bs; cidx = col % bs; \
213       low = 0; high = nrow; \
214       while (high-low > 3) { \
215         t = (low+high)/2; \
216         if (rp[t] > bcol) high = t; \
217         else              low  = t; \
218       } \
219       for (_i=low; _i<high; _i++) { \
220         if (rp[_i] > bcol) break; \
221         if (rp[_i] == bcol) { \
222           bap  = ap +  bs2*_i + bs*cidx + ridx; \
223           if (addv == ADD_VALUES) *bap += value;  \
224           else                    *bap  = value;  \
225           goto b_noinsert; \
226         } \
227       } \
228       if (b->nonew == 1) goto b_noinsert; \
229       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
230       if (nrow >= rmax) { \
231         /* there is no extra room in row, therefore enlarge */ \
232         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
233         MatScalar *new_a; \
234  \
235         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
236  \
237         /* malloc new storage space */ \
238         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
239         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
240         new_j   = (int*)(new_a + bs2*new_nz); \
241         new_i   = new_j + new_nz; \
242  \
243         /* copy over old data into new slots */ \
244         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
245         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
246         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
247         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
248         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
249         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
250         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
251         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
252                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
253         /* free up old matrix storage */ \
254         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
255         if (!b->singlemalloc) { \
256           ierr = PetscFree(b->i);CHKERRQ(ierr); \
257           ierr = PetscFree(b->j);CHKERRQ(ierr); \
258         } \
259         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
260         b->singlemalloc = PETSC_TRUE; \
261  \
262         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
263         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
264         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
265         b->maxnz += bs2*CHUNKSIZE; \
266         b->reallocs++; \
267         b->nz++; \
268       } \
269       N = nrow++ - 1;  \
270       /* shift up all the later entries in this row */ \
271       for (ii=N; ii>=_i; ii--) { \
272         rp[ii+1] = rp[ii]; \
273         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
274       } \
275       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
276       rp[_i]                      = bcol;  \
277       ap[bs2*_i + bs*cidx + ridx] = value;  \
278       b_noinsert:; \
279     bilen[brow] = nrow; \
280 }
281 
282 #if defined(PETSC_USE_MAT_SINGLE)
283 #undef __FUNCT__
284 #define __FUNCT__ "MatSetValues_MPIBAIJ"
285 int MatSetValues_MPIBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
286 {
287   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
288   int         ierr,i,N = m*n;
289   MatScalar   *vsingle;
290 
291   PetscFunctionBegin;
292   if (N > b->setvalueslen) {
293     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
294     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
295     b->setvalueslen  = N;
296   }
297   vsingle = b->setvaluescopy;
298 
299   for (i=0; i<N; i++) {
300     vsingle[i] = v[i];
301   }
302   ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
303   PetscFunctionReturn(0);
304 }
305 
306 #undef __FUNCT__
307 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
308 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
309 {
310   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
311   int         ierr,i,N = m*n*b->bs2;
312   MatScalar   *vsingle;
313 
314   PetscFunctionBegin;
315   if (N > b->setvalueslen) {
316     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
317     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
318     b->setvalueslen  = N;
319   }
320   vsingle = b->setvaluescopy;
321   for (i=0; i<N; i++) {
322     vsingle[i] = v[i];
323   }
324   ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
325   PetscFunctionReturn(0);
326 }
327 
328 #undef __FUNCT__
329 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
330 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
331 {
332   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
333   int         ierr,i,N = m*n;
334   MatScalar   *vsingle;
335 
336   PetscFunctionBegin;
337   if (N > b->setvalueslen) {
338     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
339     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
340     b->setvalueslen  = N;
341   }
342   vsingle = b->setvaluescopy;
343   for (i=0; i<N; i++) {
344     vsingle[i] = v[i];
345   }
346   ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
347   PetscFunctionReturn(0);
348 }
349 
350 #undef __FUNCT__
351 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
352 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
353 {
354   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
355   int         ierr,i,N = m*n*b->bs2;
356   MatScalar   *vsingle;
357 
358   PetscFunctionBegin;
359   if (N > b->setvalueslen) {
360     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
361     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
362     b->setvalueslen  = N;
363   }
364   vsingle = b->setvaluescopy;
365   for (i=0; i<N; i++) {
366     vsingle[i] = v[i];
367   }
368   ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
369   PetscFunctionReturn(0);
370 }
371 #endif
372 
373 #undef __FUNCT__
374 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
375 int MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
376 {
377   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
378   MatScalar   value;
379   PetscTruth  roworiented = baij->roworiented;
380   int         ierr,i,j,row,col;
381   int         rstart_orig=baij->rstart_bs;
382   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
383   int         cend_orig=baij->cend_bs,bs=baij->bs;
384 
385   /* Some Variables required in the macro */
386   Mat         A = baij->A;
387   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data;
388   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
389   MatScalar   *aa=a->a;
390 
391   Mat         B = baij->B;
392   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data;
393   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
394   MatScalar   *ba=b->a;
395 
396   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
397   int         low,high,t,ridx,cidx,bs2=a->bs2;
398   MatScalar   *ap,*bap;
399 
400   PetscFunctionBegin;
401   for (i=0; i<m; i++) {
402     if (im[i] < 0) continue;
403 #if defined(PETSC_USE_BOPT_g)
404     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
405 #endif
406     if (im[i] >= rstart_orig && im[i] < rend_orig) {
407       row = im[i] - rstart_orig;
408       for (j=0; j<n; j++) {
409         if (in[j] >= cstart_orig && in[j] < cend_orig){
410           col = in[j] - cstart_orig;
411           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
412           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
413           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
414         } else if (in[j] < 0) continue;
415 #if defined(PETSC_USE_BOPT_g)
416         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[i],mat->N-1);}
417 #endif
418         else {
419           if (mat->was_assembled) {
420             if (!baij->colmap) {
421               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
422             }
423 #if defined (PETSC_USE_CTABLE)
424             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
425             col  = col - 1;
426 #else
427             col = baij->colmap[in[j]/bs] - 1;
428 #endif
429             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
430               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
431               col =  in[j];
432               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
433               B = baij->B;
434               b = (Mat_SeqBAIJ*)(B)->data;
435               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
436               ba=b->a;
437             } else col += in[j]%bs;
438           } else col = in[j];
439           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
440           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
441           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
442         }
443       }
444     } else {
445       if (!baij->donotstash) {
446         if (roworiented) {
447           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
448         } else {
449           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
450         }
451       }
452     }
453   }
454   PetscFunctionReturn(0);
455 }
456 
457 #undef __FUNCT__
458 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar"
459 int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
460 {
461   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
462   const MatScalar *value;
463   MatScalar       *barray=baij->barray;
464   PetscTruth      roworiented = baij->roworiented;
465   int             ierr,i,j,ii,jj,row,col,rstart=baij->rstart;
466   int             rend=baij->rend,cstart=baij->cstart,stepval;
467   int             cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
468 
469   PetscFunctionBegin;
470   if(!barray) {
471     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
472     baij->barray = barray;
473   }
474 
475   if (roworiented) {
476     stepval = (n-1)*bs;
477   } else {
478     stepval = (m-1)*bs;
479   }
480   for (i=0; i<m; i++) {
481     if (im[i] < 0) continue;
482 #if defined(PETSC_USE_BOPT_g)
483     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1);
484 #endif
485     if (im[i] >= rstart && im[i] < rend) {
486       row = im[i] - rstart;
487       for (j=0; j<n; j++) {
488         /* If NumCol = 1 then a copy is not required */
489         if ((roworiented) && (n == 1)) {
490           barray = (MatScalar*)v + i*bs2;
491         } else if((!roworiented) && (m == 1)) {
492           barray = (MatScalar*)v + j*bs2;
493         } else { /* Here a copy is required */
494           if (roworiented) {
495             value = v + i*(stepval+bs)*bs + j*bs;
496           } else {
497             value = v + j*(stepval+bs)*bs + i*bs;
498           }
499           for (ii=0; ii<bs; ii++,value+=stepval) {
500             for (jj=0; jj<bs; jj++) {
501               *barray++  = *value++;
502             }
503           }
504           barray -=bs2;
505         }
506 
507         if (in[j] >= cstart && in[j] < cend){
508           col  = in[j] - cstart;
509           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
510         }
511         else if (in[j] < 0) continue;
512 #if defined(PETSC_USE_BOPT_g)
513         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);}
514 #endif
515         else {
516           if (mat->was_assembled) {
517             if (!baij->colmap) {
518               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
519             }
520 
521 #if defined(PETSC_USE_BOPT_g)
522 #if defined (PETSC_USE_CTABLE)
523             { int data;
524               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
525               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
526             }
527 #else
528             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
529 #endif
530 #endif
531 #if defined (PETSC_USE_CTABLE)
532 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
533             col  = (col - 1)/bs;
534 #else
535             col = (baij->colmap[in[j]] - 1)/bs;
536 #endif
537             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
538               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
539               col =  in[j];
540             }
541           }
542           else col = in[j];
543           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
544         }
545       }
546     } else {
547       if (!baij->donotstash) {
548         if (roworiented) {
549           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
550         } else {
551           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
552         }
553       }
554     }
555   }
556   PetscFunctionReturn(0);
557 }
558 
559 #define HASH_KEY 0.6180339887
560 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
561 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
562 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
563 #undef __FUNCT__
564 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar"
565 int MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
566 {
567   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
568   PetscTruth  roworiented = baij->roworiented;
569   int         ierr,i,j,row,col;
570   int         rstart_orig=baij->rstart_bs;
571   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
572   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
573   PetscReal   tmp;
574   MatScalar   **HD = baij->hd,value;
575 #if defined(PETSC_USE_BOPT_g)
576   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
577 #endif
578 
579   PetscFunctionBegin;
580 
581   for (i=0; i<m; i++) {
582 #if defined(PETSC_USE_BOPT_g)
583     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
584     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
585 #endif
586       row = im[i];
587     if (row >= rstart_orig && row < rend_orig) {
588       for (j=0; j<n; j++) {
589         col = in[j];
590         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
591         /* Look up into the Hash Table */
592         key = (row/bs)*Nbs+(col/bs)+1;
593         h1  = HASH(size,key,tmp);
594 
595 
596         idx = h1;
597 #if defined(PETSC_USE_BOPT_g)
598         insert_ct++;
599         total_ct++;
600         if (HT[idx] != key) {
601           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
602           if (idx == size) {
603             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
604             if (idx == h1) {
605               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table");
606             }
607           }
608         }
609 #else
610         if (HT[idx] != key) {
611           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
612           if (idx == size) {
613             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
614             if (idx == h1) {
615               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table");
616             }
617           }
618         }
619 #endif
620         /* A HASH table entry is found, so insert the values at the correct address */
621         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
622         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
623       }
624     } else {
625       if (!baij->donotstash) {
626         if (roworiented) {
627           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
628         } else {
629           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
630         }
631       }
632     }
633   }
634 #if defined(PETSC_USE_BOPT_g)
635   baij->ht_total_ct = total_ct;
636   baij->ht_insert_ct = insert_ct;
637 #endif
638   PetscFunctionReturn(0);
639 }
640 
641 #undef __FUNCT__
642 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar"
643 int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
644 {
645   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
646   PetscTruth      roworiented = baij->roworiented;
647   int             ierr,i,j,ii,jj,row,col;
648   int             rstart=baij->rstart ;
649   int             rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
650   int             h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
651   PetscReal       tmp;
652   MatScalar       **HD = baij->hd,*baij_a;
653   const MatScalar *v_t,*value;
654 #if defined(PETSC_USE_BOPT_g)
655   int             total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
656 #endif
657 
658   PetscFunctionBegin;
659 
660   if (roworiented) {
661     stepval = (n-1)*bs;
662   } else {
663     stepval = (m-1)*bs;
664   }
665   for (i=0; i<m; i++) {
666 #if defined(PETSC_USE_BOPT_g)
667     if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",im[i]);
668     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],baij->Mbs-1);
669 #endif
670     row   = im[i];
671     v_t   = v + i*bs2;
672     if (row >= rstart && row < rend) {
673       for (j=0; j<n; j++) {
674         col = in[j];
675 
676         /* Look up into the Hash Table */
677         key = row*Nbs+col+1;
678         h1  = HASH(size,key,tmp);
679 
680         idx = h1;
681 #if defined(PETSC_USE_BOPT_g)
682         total_ct++;
683         insert_ct++;
684        if (HT[idx] != key) {
685           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
686           if (idx == size) {
687             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
688             if (idx == h1) {
689               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table");
690             }
691           }
692         }
693 #else
694         if (HT[idx] != key) {
695           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
696           if (idx == size) {
697             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
698             if (idx == h1) {
699               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"(row,col) has no entry in the hash table");
700             }
701           }
702         }
703 #endif
704         baij_a = HD[idx];
705         if (roworiented) {
706           /*value = v + i*(stepval+bs)*bs + j*bs;*/
707           /* value = v + (i*(stepval+bs)+j)*bs; */
708           value = v_t;
709           v_t  += bs;
710           if (addv == ADD_VALUES) {
711             for (ii=0; ii<bs; ii++,value+=stepval) {
712               for (jj=ii; jj<bs2; jj+=bs) {
713                 baij_a[jj]  += *value++;
714               }
715             }
716           } else {
717             for (ii=0; ii<bs; ii++,value+=stepval) {
718               for (jj=ii; jj<bs2; jj+=bs) {
719                 baij_a[jj]  = *value++;
720               }
721             }
722           }
723         } else {
724           value = v + j*(stepval+bs)*bs + i*bs;
725           if (addv == ADD_VALUES) {
726             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
727               for (jj=0; jj<bs; jj++) {
728                 baij_a[jj]  += *value++;
729               }
730             }
731           } else {
732             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
733               for (jj=0; jj<bs; jj++) {
734                 baij_a[jj]  = *value++;
735               }
736             }
737           }
738         }
739       }
740     } else {
741       if (!baij->donotstash) {
742         if (roworiented) {
743           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
744         } else {
745           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
746         }
747       }
748     }
749   }
750 #if defined(PETSC_USE_BOPT_g)
751   baij->ht_total_ct = total_ct;
752   baij->ht_insert_ct = insert_ct;
753 #endif
754   PetscFunctionReturn(0);
755 }
756 
757 #undef __FUNCT__
758 #define __FUNCT__ "MatGetValues_MPIBAIJ"
759 int MatGetValues_MPIBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
760 {
761   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
762   int        bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
763   int        bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
764 
765   PetscFunctionBegin;
766   for (i=0; i<m; i++) {
767     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
768     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
769     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
770       row = idxm[i] - bsrstart;
771       for (j=0; j<n; j++) {
772         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]);
773         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
774         if (idxn[j] >= bscstart && idxn[j] < bscend){
775           col = idxn[j] - bscstart;
776           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
777         } else {
778           if (!baij->colmap) {
779             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
780           }
781 #if defined (PETSC_USE_CTABLE)
782           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
783           data --;
784 #else
785           data = baij->colmap[idxn[j]/bs]-1;
786 #endif
787           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
788           else {
789             col  = data + idxn[j]%bs;
790             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
791           }
792         }
793       }
794     } else {
795       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
796     }
797   }
798  PetscFunctionReturn(0);
799 }
800 
801 #undef __FUNCT__
802 #define __FUNCT__ "MatNorm_MPIBAIJ"
803 int MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
804 {
805   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
806   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
807   int        ierr,i,bs2=baij->bs2;
808   PetscReal  sum = 0.0;
809   MatScalar  *v;
810 
811   PetscFunctionBegin;
812   if (baij->size == 1) {
813     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
814   } else {
815     if (type == NORM_FROBENIUS) {
816       v = amat->a;
817       for (i=0; i<amat->nz*bs2; i++) {
818 #if defined(PETSC_USE_COMPLEX)
819         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
820 #else
821         sum += (*v)*(*v); v++;
822 #endif
823       }
824       v = bmat->a;
825       for (i=0; i<bmat->nz*bs2; i++) {
826 #if defined(PETSC_USE_COMPLEX)
827         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
828 #else
829         sum += (*v)*(*v); v++;
830 #endif
831       }
832       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
833       *nrm = sqrt(*nrm);
834     } else {
835       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
836     }
837   }
838   PetscFunctionReturn(0);
839 }
840 
841 
842 /*
843   Creates the hash table, and sets the table
844   This table is created only once.
845   If new entried need to be added to the matrix
846   then the hash table has to be destroyed and
847   recreated.
848 */
849 #undef __FUNCT__
850 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
851 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
852 {
853   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
854   Mat         A = baij->A,B=baij->B;
855   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
856   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
857   int         size,bs2=baij->bs2,rstart=baij->rstart,ierr;
858   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
859   int         *HT,key;
860   MatScalar   **HD;
861   PetscReal   tmp;
862 #if defined(PETSC_USE_BOPT_g)
863   int         ct=0,max=0;
864 #endif
865 
866   PetscFunctionBegin;
867   baij->ht_size=(int)(factor*nz);
868   size = baij->ht_size;
869 
870   if (baij->ht) {
871     PetscFunctionReturn(0);
872   }
873 
874   /* Allocate Memory for Hash Table */
875   ierr     = PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr);
876   baij->ht = (int*)(baij->hd + size);
877   HD       = baij->hd;
878   HT       = baij->ht;
879 
880 
881   ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(PetscScalar*)));CHKERRQ(ierr);
882 
883 
884   /* Loop Over A */
885   for (i=0; i<a->mbs; i++) {
886     for (j=ai[i]; j<ai[i+1]; j++) {
887       row = i+rstart;
888       col = aj[j]+cstart;
889 
890       key = row*Nbs + col + 1;
891       h1  = HASH(size,key,tmp);
892       for (k=0; k<size; k++){
893         if (HT[(h1+k)%size] == 0.0) {
894           HT[(h1+k)%size] = key;
895           HD[(h1+k)%size] = a->a + j*bs2;
896           break;
897 #if defined(PETSC_USE_BOPT_g)
898         } else {
899           ct++;
900 #endif
901         }
902       }
903 #if defined(PETSC_USE_BOPT_g)
904       if (k> max) max = k;
905 #endif
906     }
907   }
908   /* Loop Over B */
909   for (i=0; i<b->mbs; i++) {
910     for (j=bi[i]; j<bi[i+1]; j++) {
911       row = i+rstart;
912       col = garray[bj[j]];
913       key = row*Nbs + col + 1;
914       h1  = HASH(size,key,tmp);
915       for (k=0; k<size; k++){
916         if (HT[(h1+k)%size] == 0.0) {
917           HT[(h1+k)%size] = key;
918           HD[(h1+k)%size] = b->a + j*bs2;
919           break;
920 #if defined(PETSC_USE_BOPT_g)
921         } else {
922           ct++;
923 #endif
924         }
925       }
926 #if defined(PETSC_USE_BOPT_g)
927       if (k> max) max = k;
928 #endif
929     }
930   }
931 
932   /* Print Summary */
933 #if defined(PETSC_USE_BOPT_g)
934   for (i=0,j=0; i<size; i++) {
935     if (HT[i]) {j++;}
936   }
937   PetscLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max);
938 #endif
939   PetscFunctionReturn(0);
940 }
941 
942 #undef __FUNCT__
943 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
944 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
945 {
946   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
947   int         ierr,nstash,reallocs;
948   InsertMode  addv;
949 
950   PetscFunctionBegin;
951   if (baij->donotstash) {
952     PetscFunctionReturn(0);
953   }
954 
955   /* make sure all processors are either in INSERTMODE or ADDMODE */
956   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
957   if (addv == (ADD_VALUES|INSERT_VALUES)) {
958     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
959   }
960   mat->insertmode = addv; /* in case this processor had no cache */
961 
962   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
963   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
964   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
965   PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
966   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
967   PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
968   PetscFunctionReturn(0);
969 }
970 
971 EXTERN int MatUseDSCPACK_MPIBAIJ(Mat);
972 #undef __FUNCT__
973 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
974 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
975 {
976   Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data;
977   Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
978   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
979   int         *row,*col,other_disassembled;
980   PetscTruth  r1,r2,r3;
981   MatScalar   *val;
982   InsertMode  addv = mat->insertmode;
983 #if defined(PETSC_HAVE_DSCPACK)
984   PetscTruth   flag;
985 #endif
986 
987   PetscFunctionBegin;
988   if (!baij->donotstash) {
989     while (1) {
990       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
991       if (!flg) break;
992 
993       for (i=0; i<n;) {
994         /* Now identify the consecutive vals belonging to the same row */
995         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
996         if (j < n) ncols = j-i;
997         else       ncols = n-i;
998         /* Now assemble all these values with a single function call */
999         ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
1000         i = j;
1001       }
1002     }
1003     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
1004     /* Now process the block-stash. Since the values are stashed column-oriented,
1005        set the roworiented flag to column oriented, and after MatSetValues()
1006        restore the original flags */
1007     r1 = baij->roworiented;
1008     r2 = a->roworiented;
1009     r3 = b->roworiented;
1010     baij->roworiented = PETSC_FALSE;
1011     a->roworiented    = PETSC_FALSE;
1012     b->roworiented    = PETSC_FALSE;
1013     while (1) {
1014       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1015       if (!flg) break;
1016 
1017       for (i=0; i<n;) {
1018         /* Now identify the consecutive vals belonging to the same row */
1019         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1020         if (j < n) ncols = j-i;
1021         else       ncols = n-i;
1022         ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
1023         i = j;
1024       }
1025     }
1026     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
1027     baij->roworiented = r1;
1028     a->roworiented    = r2;
1029     b->roworiented    = r3;
1030   }
1031 
1032   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
1033   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
1034 
1035   /* determine if any processor has disassembled, if so we must
1036      also disassemble ourselfs, in order that we may reassemble. */
1037   /*
1038      if nonzero structure of submatrix B cannot change then we know that
1039      no processor disassembled thus we can skip this stuff
1040   */
1041   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
1042     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
1043     if (mat->was_assembled && !other_disassembled) {
1044       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
1045     }
1046   }
1047 
1048   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1049     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
1050   }
1051   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1052   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1053 
1054 #if defined(PETSC_USE_BOPT_g)
1055   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1056     PetscLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
1057     baij->ht_total_ct  = 0;
1058     baij->ht_insert_ct = 0;
1059   }
1060 #endif
1061   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1062     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1063     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1064     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1065   }
1066 
1067   if (baij->rowvalues) {
1068     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1069     baij->rowvalues = 0;
1070   }
1071 #if defined(PETSC_HAVE_DSCPACK)
1072   ierr = PetscOptionsHasName(mat->prefix,"-mat_baij_dscpack",&flag);CHKERRQ(ierr);
1073   if (flag) { ierr = MatUseDSCPACK_MPIBAIJ(mat);CHKERRQ(ierr); }
1074 #endif
1075   PetscFunctionReturn(0);
1076 }
1077 
1078 extern int MatMPIBAIJFactorInfo_DSCPACK(Mat,PetscViewer);
1079 
1080 #undef __FUNCT__
1081 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
1082 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1083 {
1084   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
1085   int               ierr,bs = baij->bs,size = baij->size,rank = baij->rank;
1086   PetscTruth        isascii,isdraw;
1087   PetscViewer       sviewer;
1088   PetscViewerFormat format;
1089 
1090   PetscFunctionBegin;
1091   /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */
1092   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
1093   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1094   if (isascii) {
1095     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1096     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1097       MatInfo info;
1098       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
1099       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1100       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
1101               rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
1102               baij->bs,(int)info.memory);CHKERRQ(ierr);
1103       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1104       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1105       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1106       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1107       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1108       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1109       PetscFunctionReturn(0);
1110     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1111       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
1112       PetscFunctionReturn(0);
1113     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1114 #if defined(PETSC_HAVE_DSCPACK) && !defined(PETSC_USE_SINGLE) && !defined(PETSC_USE_COMPLEX)
1115       ierr = MatMPIBAIJFactorInfo_DSCPACK(mat,viewer);CHKERRQ(ierr);
1116 #endif
1117       PetscFunctionReturn(0);
1118     }
1119   }
1120 
1121   if (isdraw) {
1122     PetscDraw       draw;
1123     PetscTruth isnull;
1124     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1125     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1126   }
1127 
1128   if (size == 1) {
1129     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
1130     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1131   } else {
1132     /* assemble the entire matrix onto first processor. */
1133     Mat         A;
1134     Mat_SeqBAIJ *Aloc;
1135     int         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1136     MatScalar   *a;
1137 
1138     if (!rank) {
1139       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1140     } else {
1141       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1142     }
1143     PetscLogObjectParent(mat,A);
1144 
1145     /* copy over the A part */
1146     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1147     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1148     ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
1149 
1150     for (i=0; i<mbs; i++) {
1151       rvals[0] = bs*(baij->rstart + i);
1152       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1153       for (j=ai[i]; j<ai[i+1]; j++) {
1154         col = (baij->cstart+aj[j])*bs;
1155         for (k=0; k<bs; k++) {
1156           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1157           col++; a += bs;
1158         }
1159       }
1160     }
1161     /* copy over the B part */
1162     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1163     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1164     for (i=0; i<mbs; i++) {
1165       rvals[0] = bs*(baij->rstart + i);
1166       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1167       for (j=ai[i]; j<ai[i+1]; j++) {
1168         col = baij->garray[aj[j]]*bs;
1169         for (k=0; k<bs; k++) {
1170           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1171           col++; a += bs;
1172         }
1173       }
1174     }
1175     ierr = PetscFree(rvals);CHKERRQ(ierr);
1176     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1177     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1178     /*
1179        Everyone has to call to draw the matrix since the graphics waits are
1180        synchronized across all processors that share the PetscDraw object
1181     */
1182     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1183     if (!rank) {
1184       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1185       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1186     }
1187     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1188     ierr = MatDestroy(A);CHKERRQ(ierr);
1189   }
1190   PetscFunctionReturn(0);
1191 }
1192 
1193 #undef __FUNCT__
1194 #define __FUNCT__ "MatView_MPIBAIJ"
1195 int MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1196 {
1197   int        ierr;
1198   PetscTruth isascii,isdraw,issocket,isbinary;
1199 
1200   PetscFunctionBegin;
1201   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
1202   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1203   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1204   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1205   if (isascii || isdraw || issocket || isbinary) {
1206     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1207   } else {
1208     SETERRQ1(1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1209   }
1210   PetscFunctionReturn(0);
1211 }
1212 
1213 #undef __FUNCT__
1214 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1215 int MatDestroy_MPIBAIJ(Mat mat)
1216 {
1217   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1218   int         ierr;
1219 
1220   PetscFunctionBegin;
1221 #if defined(PETSC_USE_LOG)
1222   PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N);
1223 #endif
1224   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1225   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1226   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1227   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1228   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1229 #if defined (PETSC_USE_CTABLE)
1230   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1231 #else
1232   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1233 #endif
1234   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1235   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1236   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1237   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1238   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1239   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
1240 #if defined(PETSC_USE_MAT_SINGLE)
1241   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
1242 #endif
1243   ierr = PetscFree(baij);CHKERRQ(ierr);
1244   PetscFunctionReturn(0);
1245 }
1246 
1247 #undef __FUNCT__
1248 #define __FUNCT__ "MatMult_MPIBAIJ"
1249 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1250 {
1251   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1252   int         ierr,nt;
1253 
1254   PetscFunctionBegin;
1255   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1256   if (nt != A->n) {
1257     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1258   }
1259   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1260   if (nt != A->m) {
1261     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1262   }
1263   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1264   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1265   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1266   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1267   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1268   PetscFunctionReturn(0);
1269 }
1270 
1271 #undef __FUNCT__
1272 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1273 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1274 {
1275   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1276   int        ierr;
1277 
1278   PetscFunctionBegin;
1279   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1280   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1281   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1282   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1283   PetscFunctionReturn(0);
1284 }
1285 
1286 #undef __FUNCT__
1287 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1288 int MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1289 {
1290   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1291   int         ierr;
1292 
1293   PetscFunctionBegin;
1294   /* do nondiagonal part */
1295   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1296   /* send it on its way */
1297   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1298   /* do local part */
1299   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1300   /* receive remote parts: note this assumes the values are not actually */
1301   /* inserted in yy until the next line, which is true for my implementation*/
1302   /* but is not perhaps always true. */
1303   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1304   PetscFunctionReturn(0);
1305 }
1306 
1307 #undef __FUNCT__
1308 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1309 int MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1310 {
1311   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1312   int         ierr;
1313 
1314   PetscFunctionBegin;
1315   /* do nondiagonal part */
1316   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1317   /* send it on its way */
1318   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1319   /* do local part */
1320   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1321   /* receive remote parts: note this assumes the values are not actually */
1322   /* inserted in yy until the next line, which is true for my implementation*/
1323   /* but is not perhaps always true. */
1324   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1325   PetscFunctionReturn(0);
1326 }
1327 
1328 /*
1329   This only works correctly for square matrices where the subblock A->A is the
1330    diagonal block
1331 */
1332 #undef __FUNCT__
1333 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1334 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1335 {
1336   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1337   int         ierr;
1338 
1339   PetscFunctionBegin;
1340   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1341   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1342   PetscFunctionReturn(0);
1343 }
1344 
1345 #undef __FUNCT__
1346 #define __FUNCT__ "MatScale_MPIBAIJ"
1347 int MatScale_MPIBAIJ(const PetscScalar *aa,Mat A)
1348 {
1349   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1350   int         ierr;
1351 
1352   PetscFunctionBegin;
1353   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1354   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1355   PetscFunctionReturn(0);
1356 }
1357 
1358 #undef __FUNCT__
1359 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1360 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1361 {
1362   Mat_MPIBAIJ  *mat = (Mat_MPIBAIJ*)matin->data;
1363   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1364   int          bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1365   int          nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1366   int          *cmap,*idx_p,cstart = mat->cstart;
1367 
1368   PetscFunctionBegin;
1369   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1370   mat->getrowactive = PETSC_TRUE;
1371 
1372   if (!mat->rowvalues && (idx || v)) {
1373     /*
1374         allocate enough space to hold information from the longest row.
1375     */
1376     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1377     int     max = 1,mbs = mat->mbs,tmp;
1378     for (i=0; i<mbs; i++) {
1379       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1380       if (max < tmp) { max = tmp; }
1381     }
1382     ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1383     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1384   }
1385 
1386   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1387   lrow = row - brstart;
1388 
1389   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1390   if (!v)   {pvA = 0; pvB = 0;}
1391   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1392   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1393   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1394   nztot = nzA + nzB;
1395 
1396   cmap  = mat->garray;
1397   if (v  || idx) {
1398     if (nztot) {
1399       /* Sort by increasing column numbers, assuming A and B already sorted */
1400       int imark = -1;
1401       if (v) {
1402         *v = v_p = mat->rowvalues;
1403         for (i=0; i<nzB; i++) {
1404           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1405           else break;
1406         }
1407         imark = i;
1408         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1409         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1410       }
1411       if (idx) {
1412         *idx = idx_p = mat->rowindices;
1413         if (imark > -1) {
1414           for (i=0; i<imark; i++) {
1415             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1416           }
1417         } else {
1418           for (i=0; i<nzB; i++) {
1419             if (cmap[cworkB[i]/bs] < cstart)
1420               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1421             else break;
1422           }
1423           imark = i;
1424         }
1425         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1426         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1427       }
1428     } else {
1429       if (idx) *idx = 0;
1430       if (v)   *v   = 0;
1431     }
1432   }
1433   *nz = nztot;
1434   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1435   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1436   PetscFunctionReturn(0);
1437 }
1438 
1439 #undef __FUNCT__
1440 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1441 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1442 {
1443   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1444 
1445   PetscFunctionBegin;
1446   if (baij->getrowactive == PETSC_FALSE) {
1447     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1448   }
1449   baij->getrowactive = PETSC_FALSE;
1450   PetscFunctionReturn(0);
1451 }
1452 
1453 #undef __FUNCT__
1454 #define __FUNCT__ "MatGetBlockSize_MPIBAIJ"
1455 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
1456 {
1457   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1458 
1459   PetscFunctionBegin;
1460   *bs = baij->bs;
1461   PetscFunctionReturn(0);
1462 }
1463 
1464 #undef __FUNCT__
1465 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1466 int MatZeroEntries_MPIBAIJ(Mat A)
1467 {
1468   Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data;
1469   int         ierr;
1470 
1471   PetscFunctionBegin;
1472   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1473   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1474   PetscFunctionReturn(0);
1475 }
1476 
1477 #undef __FUNCT__
1478 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1479 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1480 {
1481   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data;
1482   Mat         A = a->A,B = a->B;
1483   int         ierr;
1484   PetscReal   isend[5],irecv[5];
1485 
1486   PetscFunctionBegin;
1487   info->block_size     = (PetscReal)a->bs;
1488   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1489   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1490   isend[3] = info->memory;  isend[4] = info->mallocs;
1491   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1492   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1493   isend[3] += info->memory;  isend[4] += info->mallocs;
1494   if (flag == MAT_LOCAL) {
1495     info->nz_used      = isend[0];
1496     info->nz_allocated = isend[1];
1497     info->nz_unneeded  = isend[2];
1498     info->memory       = isend[3];
1499     info->mallocs      = isend[4];
1500   } else if (flag == MAT_GLOBAL_MAX) {
1501     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1502     info->nz_used      = irecv[0];
1503     info->nz_allocated = irecv[1];
1504     info->nz_unneeded  = irecv[2];
1505     info->memory       = irecv[3];
1506     info->mallocs      = irecv[4];
1507   } else if (flag == MAT_GLOBAL_SUM) {
1508     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1509     info->nz_used      = irecv[0];
1510     info->nz_allocated = irecv[1];
1511     info->nz_unneeded  = irecv[2];
1512     info->memory       = irecv[3];
1513     info->mallocs      = irecv[4];
1514   } else {
1515     SETERRQ1(1,"Unknown MatInfoType argument %d",flag);
1516   }
1517   info->rows_global       = (PetscReal)A->M;
1518   info->columns_global    = (PetscReal)A->N;
1519   info->rows_local        = (PetscReal)A->m;
1520   info->columns_local     = (PetscReal)A->N;
1521   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1522   info->fill_ratio_needed = 0;
1523   info->factor_mallocs    = 0;
1524   PetscFunctionReturn(0);
1525 }
1526 
1527 #undef __FUNCT__
1528 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1529 int MatSetOption_MPIBAIJ(Mat A,MatOption op)
1530 {
1531   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1532   int         ierr;
1533 
1534   PetscFunctionBegin;
1535   switch (op) {
1536   case MAT_NO_NEW_NONZERO_LOCATIONS:
1537   case MAT_YES_NEW_NONZERO_LOCATIONS:
1538   case MAT_COLUMNS_UNSORTED:
1539   case MAT_COLUMNS_SORTED:
1540   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1541   case MAT_KEEP_ZEROED_ROWS:
1542   case MAT_NEW_NONZERO_LOCATION_ERR:
1543     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1544     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1545     break;
1546   case MAT_ROW_ORIENTED:
1547     a->roworiented = PETSC_TRUE;
1548     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1549     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1550     break;
1551   case MAT_ROWS_SORTED:
1552   case MAT_ROWS_UNSORTED:
1553   case MAT_YES_NEW_DIAGONALS:
1554     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1555     break;
1556   case MAT_COLUMN_ORIENTED:
1557     a->roworiented = PETSC_FALSE;
1558     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1559     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1560     break;
1561   case MAT_IGNORE_OFF_PROC_ENTRIES:
1562     a->donotstash = PETSC_TRUE;
1563     break;
1564   case MAT_NO_NEW_DIAGONALS:
1565     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1566   case MAT_USE_HASH_TABLE:
1567     a->ht_flag = PETSC_TRUE;
1568     break;
1569   default:
1570     SETERRQ(PETSC_ERR_SUP,"unknown option");
1571   }
1572   PetscFunctionReturn(0);
1573 }
1574 
1575 #undef __FUNCT__
1576 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1577 int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1578 {
1579   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data;
1580   Mat_SeqBAIJ *Aloc;
1581   Mat         B;
1582   int         ierr,M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col;
1583   int         bs=baij->bs,mbs=baij->mbs;
1584   MatScalar   *a;
1585 
1586   PetscFunctionBegin;
1587   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1588   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,A->n,A->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1589 
1590   /* copy over the A part */
1591   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1592   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1593   ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
1594 
1595   for (i=0; i<mbs; i++) {
1596     rvals[0] = bs*(baij->rstart + i);
1597     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1598     for (j=ai[i]; j<ai[i+1]; j++) {
1599       col = (baij->cstart+aj[j])*bs;
1600       for (k=0; k<bs; k++) {
1601         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1602         col++; a += bs;
1603       }
1604     }
1605   }
1606   /* copy over the B part */
1607   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1608   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1609   for (i=0; i<mbs; i++) {
1610     rvals[0] = bs*(baij->rstart + i);
1611     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1612     for (j=ai[i]; j<ai[i+1]; j++) {
1613       col = baij->garray[aj[j]]*bs;
1614       for (k=0; k<bs; k++) {
1615         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1616         col++; a += bs;
1617       }
1618     }
1619   }
1620   ierr = PetscFree(rvals);CHKERRQ(ierr);
1621   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1622   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1623 
1624   if (matout) {
1625     *matout = B;
1626   } else {
1627     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1628   }
1629   PetscFunctionReturn(0);
1630 }
1631 
1632 #undef __FUNCT__
1633 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1634 int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1635 {
1636   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1637   Mat         a = baij->A,b = baij->B;
1638   int         ierr,s1,s2,s3;
1639 
1640   PetscFunctionBegin;
1641   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1642   if (rr) {
1643     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1644     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1645     /* Overlap communication with computation. */
1646     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1647   }
1648   if (ll) {
1649     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1650     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1651     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1652   }
1653   /* scale  the diagonal block */
1654   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1655 
1656   if (rr) {
1657     /* Do a scatter end and then right scale the off-diagonal block */
1658     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1659     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1660   }
1661 
1662   PetscFunctionReturn(0);
1663 }
1664 
1665 #undef __FUNCT__
1666 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1667 int MatZeroRows_MPIBAIJ(Mat A,IS is,const PetscScalar *diag)
1668 {
1669   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1670   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
1671   int            *nprocs,j,idx,nsends,row;
1672   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1673   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1674   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1675   MPI_Comm       comm = A->comm;
1676   MPI_Request    *send_waits,*recv_waits;
1677   MPI_Status     recv_status,*send_status;
1678   IS             istmp;
1679   PetscTruth     found;
1680 
1681   PetscFunctionBegin;
1682   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
1683   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1684 
1685   /*  first count number of contributors to each processor */
1686   ierr  = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
1687   ierr  = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
1688   ierr  = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
1689   for (i=0; i<N; i++) {
1690     idx   = rows[i];
1691     found = PETSC_FALSE;
1692     for (j=0; j<size; j++) {
1693       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1694         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
1695       }
1696     }
1697     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1698   }
1699   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1700 
1701   /* inform other processors of number of messages and max length*/
1702   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1703 
1704   /* post receives:   */
1705   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
1706   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1707   for (i=0; i<nrecvs; i++) {
1708     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1709   }
1710 
1711   /* do sends:
1712      1) starts[i] gives the starting index in svalues for stuff going to
1713      the ith processor
1714   */
1715   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
1716   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1717   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
1718   starts[0]  = 0;
1719   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1720   for (i=0; i<N; i++) {
1721     svalues[starts[owner[i]]++] = rows[i];
1722   }
1723   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
1724 
1725   starts[0] = 0;
1726   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1727   count = 0;
1728   for (i=0; i<size; i++) {
1729     if (nprocs[2*i+1]) {
1730       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1731     }
1732   }
1733   ierr = PetscFree(starts);CHKERRQ(ierr);
1734 
1735   base = owners[rank]*bs;
1736 
1737   /*  wait on receives */
1738   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
1739   source = lens + nrecvs;
1740   count  = nrecvs; slen = 0;
1741   while (count) {
1742     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1743     /* unpack receives into our local space */
1744     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1745     source[imdex]  = recv_status.MPI_SOURCE;
1746     lens[imdex]    = n;
1747     slen          += n;
1748     count--;
1749   }
1750   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1751 
1752   /* move the data into the send scatter */
1753   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
1754   count = 0;
1755   for (i=0; i<nrecvs; i++) {
1756     values = rvalues + i*nmax;
1757     for (j=0; j<lens[i]; j++) {
1758       lrows[count++] = values[j] - base;
1759     }
1760   }
1761   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1762   ierr = PetscFree(lens);CHKERRQ(ierr);
1763   ierr = PetscFree(owner);CHKERRQ(ierr);
1764   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1765 
1766   /* actually zap the local rows */
1767   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1768   PetscLogObjectParent(A,istmp);
1769 
1770   /*
1771         Zero the required rows. If the "diagonal block" of the matrix
1772      is square and the user wishes to set the diagonal we use seperate
1773      code so that MatSetValues() is not called for each diagonal allocating
1774      new memory, thus calling lots of mallocs and slowing things down.
1775 
1776        Contributed by: Mathew Knepley
1777   */
1778   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1779   ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr);
1780   if (diag && (l->A->M == l->A->N)) {
1781     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr);
1782   } else if (diag) {
1783     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1784     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1785       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1786 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1787     }
1788     for (i=0; i<slen; i++) {
1789       row  = lrows[i] + rstart_bs;
1790       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1791     }
1792     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1793     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1794   } else {
1795     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1796   }
1797 
1798   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1799   ierr = PetscFree(lrows);CHKERRQ(ierr);
1800 
1801   /* wait on sends */
1802   if (nsends) {
1803     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1804     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1805     ierr = PetscFree(send_status);CHKERRQ(ierr);
1806   }
1807   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1808   ierr = PetscFree(svalues);CHKERRQ(ierr);
1809 
1810   PetscFunctionReturn(0);
1811 }
1812 
1813 #undef __FUNCT__
1814 #define __FUNCT__ "MatPrintHelp_MPIBAIJ"
1815 int MatPrintHelp_MPIBAIJ(Mat A)
1816 {
1817   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1818   MPI_Comm    comm = A->comm;
1819   static int  called = 0;
1820   int         ierr;
1821 
1822   PetscFunctionBegin;
1823   if (!a->rank) {
1824     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1825   }
1826   if (called) {PetscFunctionReturn(0);} else called = 1;
1827   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1828   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1829   PetscFunctionReturn(0);
1830 }
1831 
1832 #undef __FUNCT__
1833 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1834 int MatSetUnfactored_MPIBAIJ(Mat A)
1835 {
1836   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1837   int         ierr;
1838 
1839   PetscFunctionBegin;
1840   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1841   PetscFunctionReturn(0);
1842 }
1843 
1844 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1845 
1846 #undef __FUNCT__
1847 #define __FUNCT__ "MatEqual_MPIBAIJ"
1848 int MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1849 {
1850   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1851   Mat         a,b,c,d;
1852   PetscTruth  flg;
1853   int         ierr;
1854 
1855   PetscFunctionBegin;
1856   a = matA->A; b = matA->B;
1857   c = matB->A; d = matB->B;
1858 
1859   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1860   if (flg == PETSC_TRUE) {
1861     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1862   }
1863   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1864   PetscFunctionReturn(0);
1865 }
1866 
1867 
1868 #undef __FUNCT__
1869 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1870 int MatSetUpPreallocation_MPIBAIJ(Mat A)
1871 {
1872   int        ierr;
1873 
1874   PetscFunctionBegin;
1875   ierr =  MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1876   PetscFunctionReturn(0);
1877 }
1878 
1879 /* -------------------------------------------------------------------*/
1880 static struct _MatOps MatOps_Values = {
1881   MatSetValues_MPIBAIJ,
1882   MatGetRow_MPIBAIJ,
1883   MatRestoreRow_MPIBAIJ,
1884   MatMult_MPIBAIJ,
1885   MatMultAdd_MPIBAIJ,
1886   MatMultTranspose_MPIBAIJ,
1887   MatMultTransposeAdd_MPIBAIJ,
1888   0,
1889   0,
1890   0,
1891   0,
1892   0,
1893   0,
1894   0,
1895   MatTranspose_MPIBAIJ,
1896   MatGetInfo_MPIBAIJ,
1897   MatEqual_MPIBAIJ,
1898   MatGetDiagonal_MPIBAIJ,
1899   MatDiagonalScale_MPIBAIJ,
1900   MatNorm_MPIBAIJ,
1901   MatAssemblyBegin_MPIBAIJ,
1902   MatAssemblyEnd_MPIBAIJ,
1903   0,
1904   MatSetOption_MPIBAIJ,
1905   MatZeroEntries_MPIBAIJ,
1906   MatZeroRows_MPIBAIJ,
1907   0,
1908   0,
1909   0,
1910   0,
1911   MatSetUpPreallocation_MPIBAIJ,
1912   0,
1913   0,
1914   0,
1915   0,
1916   MatDuplicate_MPIBAIJ,
1917   0,
1918   0,
1919   0,
1920   0,
1921   0,
1922   MatGetSubMatrices_MPIBAIJ,
1923   MatIncreaseOverlap_MPIBAIJ,
1924   MatGetValues_MPIBAIJ,
1925   0,
1926   MatPrintHelp_MPIBAIJ,
1927   MatScale_MPIBAIJ,
1928   0,
1929   0,
1930   0,
1931   MatGetBlockSize_MPIBAIJ,
1932   0,
1933   0,
1934   0,
1935   0,
1936   0,
1937   0,
1938   MatSetUnfactored_MPIBAIJ,
1939   0,
1940   MatSetValuesBlocked_MPIBAIJ,
1941   0,
1942   MatDestroy_MPIBAIJ,
1943   MatView_MPIBAIJ,
1944   MatGetPetscMaps_Petsc,
1945   0,
1946   0,
1947   0,
1948   0,
1949   0,
1950   0,
1951   MatGetRowMax_MPIBAIJ};
1952 
1953 
1954 EXTERN_C_BEGIN
1955 #undef __FUNCT__
1956 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
1957 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1958 {
1959   PetscFunctionBegin;
1960   *a      = ((Mat_MPIBAIJ *)A->data)->A;
1961   *iscopy = PETSC_FALSE;
1962   PetscFunctionReturn(0);
1963 }
1964 EXTERN_C_END
1965 
1966 EXTERN_C_BEGIN
1967 #undef __FUNCT__
1968 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
1969 int MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
1970 {
1971   Mat_MPIBAIJ  *b;
1972   int          ierr,i;
1973 
1974   PetscFunctionBegin;
1975   B->preallocated = PETSC_TRUE;
1976   ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1977 
1978   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1979   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
1980   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
1981   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1982   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1983   if (d_nnz) {
1984   for (i=0; i<B->m/bs; i++) {
1985       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
1986     }
1987   }
1988   if (o_nnz) {
1989     for (i=0; i<B->m/bs; i++) {
1990       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
1991     }
1992   }
1993 
1994   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1995   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1996   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1997   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
1998 
1999   b = (Mat_MPIBAIJ*)B->data;
2000   b->bs  = bs;
2001   b->bs2 = bs*bs;
2002   b->mbs = B->m/bs;
2003   b->nbs = B->n/bs;
2004   b->Mbs = B->M/bs;
2005   b->Nbs = B->N/bs;
2006 
2007   ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
2008   b->rowners[0]    = 0;
2009   for (i=2; i<=b->size; i++) {
2010     b->rowners[i] += b->rowners[i-1];
2011   }
2012   b->rstart    = b->rowners[b->rank];
2013   b->rend      = b->rowners[b->rank+1];
2014 
2015   ierr = MPI_Allgather(&b->nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
2016   b->cowners[0] = 0;
2017   for (i=2; i<=b->size; i++) {
2018     b->cowners[i] += b->cowners[i-1];
2019   }
2020   b->cstart    = b->cowners[b->rank];
2021   b->cend      = b->cowners[b->rank+1];
2022 
2023   for (i=0; i<=b->size; i++) {
2024     b->rowners_bs[i] = b->rowners[i]*bs;
2025   }
2026   b->rstart_bs = b->rstart*bs;
2027   b->rend_bs   = b->rend*bs;
2028   b->cstart_bs = b->cstart*bs;
2029   b->cend_bs   = b->cend*bs;
2030 
2031   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2032   PetscLogObjectParent(B,b->A);
2033   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2034   PetscLogObjectParent(B,b->B);
2035   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
2036 
2037   PetscFunctionReturn(0);
2038 }
2039 EXTERN_C_END
2040 
2041 EXTERN_C_BEGIN
2042 EXTERN int MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2043 EXTERN int MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2044 EXTERN_C_END
2045 
2046 EXTERN_C_BEGIN
2047 #undef __FUNCT__
2048 #define __FUNCT__ "MatCreate_MPIBAIJ"
2049 int MatCreate_MPIBAIJ(Mat B)
2050 {
2051   Mat_MPIBAIJ  *b;
2052   int          ierr;
2053   PetscTruth   flg;
2054 
2055   PetscFunctionBegin;
2056 
2057   ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr);
2058   B->data = (void*)b;
2059 
2060   ierr    = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
2061   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2062   B->mapping    = 0;
2063   B->factor     = 0;
2064   B->assembled  = PETSC_FALSE;
2065 
2066   B->insertmode = NOT_SET_VALUES;
2067   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
2068   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
2069 
2070   /* build local table of row and column ownerships */
2071   ierr          = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
2072   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2073   b->cowners    = b->rowners + b->size + 2;
2074   b->rowners_bs = b->cowners + b->size + 2;
2075 
2076   /* build cache for off array entries formed */
2077   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
2078   b->donotstash  = PETSC_FALSE;
2079   b->colmap      = PETSC_NULL;
2080   b->garray      = PETSC_NULL;
2081   b->roworiented = PETSC_TRUE;
2082 
2083 #if defined(PETSC_USE_MAT_SINGLE)
2084   /* stuff for MatSetValues_XXX in single precision */
2085   b->setvalueslen     = 0;
2086   b->setvaluescopy    = PETSC_NULL;
2087 #endif
2088 
2089   /* stuff used in block assembly */
2090   b->barray       = 0;
2091 
2092   /* stuff used for matrix vector multiply */
2093   b->lvec         = 0;
2094   b->Mvctx        = 0;
2095 
2096   /* stuff for MatGetRow() */
2097   b->rowindices   = 0;
2098   b->rowvalues    = 0;
2099   b->getrowactive = PETSC_FALSE;
2100 
2101   /* hash table stuff */
2102   b->ht           = 0;
2103   b->hd           = 0;
2104   b->ht_size      = 0;
2105   b->ht_flag      = PETSC_FALSE;
2106   b->ht_fact      = 0;
2107   b->ht_total_ct  = 0;
2108   b->ht_insert_ct = 0;
2109 
2110   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2111   if (flg) {
2112     PetscReal fact = 1.39;
2113     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2114     ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2115     if (fact <= 1.0) fact = 1.39;
2116     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2117     PetscLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2118   }
2119   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2120                                      "MatStoreValues_MPIBAIJ",
2121                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2122   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2123                                      "MatRetrieveValues_MPIBAIJ",
2124                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2125   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2126                                      "MatGetDiagonalBlock_MPIBAIJ",
2127                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2128   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
2129                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
2130                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2131   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
2132                                      "MatDiagonalScaleLocal_MPIBAIJ",
2133                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
2134   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
2135                                      "MatSetHashTableFactor_MPIBAIJ",
2136                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2137   PetscFunctionReturn(0);
2138 }
2139 EXTERN_C_END
2140 
2141 #undef __FUNCT__
2142 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
2143 /*@C
2144    MatMPIBAIJSetPreallocation - Creates a sparse parallel matrix in block AIJ format
2145    (block compressed row).  For good matrix assembly performance
2146    the user should preallocate the matrix storage by setting the parameters
2147    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2148    performance can be increased by more than a factor of 50.
2149 
2150    Collective on Mat
2151 
2152    Input Parameters:
2153 +  A - the matrix
2154 .  bs   - size of blockk
2155 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2156            submatrix  (same for all local rows)
2157 .  d_nnz - array containing the number of block nonzeros in the various block rows
2158            of the in diagonal portion of the local (possibly different for each block
2159            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2160 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2161            submatrix (same for all local rows).
2162 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2163            off-diagonal portion of the local submatrix (possibly different for
2164            each block row) or PETSC_NULL.
2165 
2166    Output Parameter:
2167 
2168 
2169    Options Database Keys:
2170 .   -mat_no_unroll - uses code that does not unroll the loops in the
2171                      block calculations (much slower)
2172 .   -mat_block_size - size of the blocks to use
2173 
2174    Notes:
2175    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2176    than it must be used on all processors that share the object for that argument.
2177 
2178    Storage Information:
2179    For a square global matrix we define each processor's diagonal portion
2180    to be its local rows and the corresponding columns (a square submatrix);
2181    each processor's off-diagonal portion encompasses the remainder of the
2182    local matrix (a rectangular submatrix).
2183 
2184    The user can specify preallocated storage for the diagonal part of
2185    the local submatrix with either d_nz or d_nnz (not both).  Set
2186    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2187    memory allocation.  Likewise, specify preallocated storage for the
2188    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2189 
2190    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2191    the figure below we depict these three local rows and all columns (0-11).
2192 
2193 .vb
2194            0 1 2 3 4 5 6 7 8 9 10 11
2195           -------------------
2196    row 3  |  o o o d d d o o o o o o
2197    row 4  |  o o o d d d o o o o o o
2198    row 5  |  o o o d d d o o o o o o
2199           -------------------
2200 .ve
2201 
2202    Thus, any entries in the d locations are stored in the d (diagonal)
2203    submatrix, and any entries in the o locations are stored in the
2204    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2205    stored simply in the MATSEQBAIJ format for compressed row storage.
2206 
2207    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2208    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2209    In general, for PDE problems in which most nonzeros are near the diagonal,
2210    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2211    or you will get TERRIBLE performance; see the users' manual chapter on
2212    matrices.
2213 
2214    Level: intermediate
2215 
2216 .keywords: matrix, block, aij, compressed row, sparse, parallel
2217 
2218 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2219 @*/
2220 int MatMPIBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
2221 {
2222   int ierr,(*f)(Mat,int,int,const int[],int,const int[]);
2223 
2224   PetscFunctionBegin;
2225   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2226   if (f) {
2227     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2228   }
2229   PetscFunctionReturn(0);
2230 }
2231 
2232 #undef __FUNCT__
2233 #define __FUNCT__ "MatCreateMPIBAIJ"
2234 /*@C
2235    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
2236    (block compressed row).  For good matrix assembly performance
2237    the user should preallocate the matrix storage by setting the parameters
2238    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2239    performance can be increased by more than a factor of 50.
2240 
2241    Collective on MPI_Comm
2242 
2243    Input Parameters:
2244 +  comm - MPI communicator
2245 .  bs   - size of blockk
2246 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2247            This value should be the same as the local size used in creating the
2248            y vector for the matrix-vector product y = Ax.
2249 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2250            This value should be the same as the local size used in creating the
2251            x vector for the matrix-vector product y = Ax.
2252 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2253 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2254 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
2255            submatrix  (same for all local rows)
2256 .  d_nnz - array containing the number of nonzero blocks in the various block rows
2257            of the in diagonal portion of the local (possibly different for each block
2258            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2259 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
2260            submatrix (same for all local rows).
2261 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
2262            off-diagonal portion of the local submatrix (possibly different for
2263            each block row) or PETSC_NULL.
2264 
2265    Output Parameter:
2266 .  A - the matrix
2267 
2268    Options Database Keys:
2269 .   -mat_no_unroll - uses code that does not unroll the loops in the
2270                      block calculations (much slower)
2271 .   -mat_block_size - size of the blocks to use
2272 
2273    Notes:
2274    A nonzero block is any block that as 1 or more nonzeros in it
2275 
2276    The user MUST specify either the local or global matrix dimensions
2277    (possibly both).
2278 
2279    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2280    than it must be used on all processors that share the object for that argument.
2281 
2282    Storage Information:
2283    For a square global matrix we define each processor's diagonal portion
2284    to be its local rows and the corresponding columns (a square submatrix);
2285    each processor's off-diagonal portion encompasses the remainder of the
2286    local matrix (a rectangular submatrix).
2287 
2288    The user can specify preallocated storage for the diagonal part of
2289    the local submatrix with either d_nz or d_nnz (not both).  Set
2290    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2291    memory allocation.  Likewise, specify preallocated storage for the
2292    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2293 
2294    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2295    the figure below we depict these three local rows and all columns (0-11).
2296 
2297 .vb
2298            0 1 2 3 4 5 6 7 8 9 10 11
2299           -------------------
2300    row 3  |  o o o d d d o o o o o o
2301    row 4  |  o o o d d d o o o o o o
2302    row 5  |  o o o d d d o o o o o o
2303           -------------------
2304 .ve
2305 
2306    Thus, any entries in the d locations are stored in the d (diagonal)
2307    submatrix, and any entries in the o locations are stored in the
2308    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2309    stored simply in the MATSEQBAIJ format for compressed row storage.
2310 
2311    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2312    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2313    In general, for PDE problems in which most nonzeros are near the diagonal,
2314    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2315    or you will get TERRIBLE performance; see the users' manual chapter on
2316    matrices.
2317 
2318    Level: intermediate
2319 
2320 .keywords: matrix, block, aij, compressed row, sparse, parallel
2321 
2322 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2323 @*/
2324 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
2325 {
2326   int ierr,size;
2327 
2328   PetscFunctionBegin;
2329   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2330   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2331   if (size > 1) {
2332     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
2333     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2334   } else {
2335     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2336     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2337   }
2338   PetscFunctionReturn(0);
2339 }
2340 
2341 #undef __FUNCT__
2342 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
2343 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2344 {
2345   Mat         mat;
2346   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2347   int         ierr,len=0;
2348 
2349   PetscFunctionBegin;
2350   *newmat       = 0;
2351   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2352   ierr = MatSetType(mat,MATMPIBAIJ);CHKERRQ(ierr);
2353   mat->preallocated = PETSC_TRUE;
2354   mat->assembled    = PETSC_TRUE;
2355   a      = (Mat_MPIBAIJ*)mat->data;
2356   a->bs  = oldmat->bs;
2357   a->bs2 = oldmat->bs2;
2358   a->mbs = oldmat->mbs;
2359   a->nbs = oldmat->nbs;
2360   a->Mbs = oldmat->Mbs;
2361   a->Nbs = oldmat->Nbs;
2362 
2363   a->rstart       = oldmat->rstart;
2364   a->rend         = oldmat->rend;
2365   a->cstart       = oldmat->cstart;
2366   a->cend         = oldmat->cend;
2367   a->size         = oldmat->size;
2368   a->rank         = oldmat->rank;
2369   a->donotstash   = oldmat->donotstash;
2370   a->roworiented  = oldmat->roworiented;
2371   a->rowindices   = 0;
2372   a->rowvalues    = 0;
2373   a->getrowactive = PETSC_FALSE;
2374   a->barray       = 0;
2375   a->rstart_bs    = oldmat->rstart_bs;
2376   a->rend_bs      = oldmat->rend_bs;
2377   a->cstart_bs    = oldmat->cstart_bs;
2378   a->cend_bs      = oldmat->cend_bs;
2379 
2380   /* hash table stuff */
2381   a->ht           = 0;
2382   a->hd           = 0;
2383   a->ht_size      = 0;
2384   a->ht_flag      = oldmat->ht_flag;
2385   a->ht_fact      = oldmat->ht_fact;
2386   a->ht_total_ct  = 0;
2387   a->ht_insert_ct = 0;
2388 
2389   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2390   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2391   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2392   if (oldmat->colmap) {
2393 #if defined (PETSC_USE_CTABLE)
2394   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2395 #else
2396   ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr);
2397   PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2398   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2399 #endif
2400   } else a->colmap = 0;
2401   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2402     ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr);
2403     PetscLogObjectMemory(mat,len*sizeof(int));
2404     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2405   } else a->garray = 0;
2406 
2407   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2408   PetscLogObjectParent(mat,a->lvec);
2409   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2410 
2411   PetscLogObjectParent(mat,a->Mvctx);
2412   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2413   PetscLogObjectParent(mat,a->A);
2414   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2415   PetscLogObjectParent(mat,a->B);
2416   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2417   *newmat = mat;
2418   PetscFunctionReturn(0);
2419 }
2420 
2421 #include "petscsys.h"
2422 
2423 EXTERN_C_BEGIN
2424 #undef __FUNCT__
2425 #define __FUNCT__ "MatLoad_MPIBAIJ"
2426 int MatLoad_MPIBAIJ(PetscViewer viewer,MatType type,Mat *newmat)
2427 {
2428   Mat          A;
2429   int          i,nz,ierr,j,rstart,rend,fd;
2430   PetscScalar  *vals,*buf;
2431   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2432   MPI_Status   status;
2433   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2434   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2435   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2436   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2437   int          dcount,kmax,k,nzcount,tmp;
2438 #if defined(PETSC_HAVE_DSCPACK)
2439   PetscTruth   flag;
2440 #endif
2441 
2442   PetscFunctionBegin;
2443   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2444 
2445   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2446   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2447   if (!rank) {
2448     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2449     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2450     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2451     if (header[3] < 0) {
2452       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPIBAIJ");
2453     }
2454   }
2455 
2456   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2457   M = header[1]; N = header[2];
2458 
2459   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2460 
2461   /*
2462      This code adds extra rows to make sure the number of rows is
2463      divisible by the blocksize
2464   */
2465   Mbs        = M/bs;
2466   extra_rows = bs - M + bs*(Mbs);
2467   if (extra_rows == bs) extra_rows = 0;
2468   else                  Mbs++;
2469   if (extra_rows &&!rank) {
2470     PetscLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
2471   }
2472 
2473   /* determine ownership of all rows */
2474   mbs        = Mbs/size + ((Mbs % size) > rank);
2475   m          = mbs*bs;
2476   ierr       = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
2477   browners   = rowners + size + 1;
2478   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2479   rowners[0] = 0;
2480   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2481   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2482   rstart = rowners[rank];
2483   rend   = rowners[rank+1];
2484 
2485   /* distribute row lengths to all processors */
2486   ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr);
2487   if (!rank) {
2488     ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
2489     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2490     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2491     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
2492     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2493     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2494     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2495   } else {
2496     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2497   }
2498 
2499   if (!rank) {
2500     /* calculate the number of nonzeros on each processor */
2501     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
2502     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2503     for (i=0; i<size; i++) {
2504       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2505         procsnz[i] += rowlengths[j];
2506       }
2507     }
2508     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2509 
2510     /* determine max buffer needed and allocate it */
2511     maxnz = 0;
2512     for (i=0; i<size; i++) {
2513       maxnz = PetscMax(maxnz,procsnz[i]);
2514     }
2515     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
2516 
2517     /* read in my part of the matrix column indices  */
2518     nz     = procsnz[0];
2519     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2520     mycols = ibuf;
2521     if (size == 1)  nz -= extra_rows;
2522     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2523     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2524 
2525     /* read in every ones (except the last) and ship off */
2526     for (i=1; i<size-1; i++) {
2527       nz   = procsnz[i];
2528       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2529       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2530     }
2531     /* read in the stuff for the last proc */
2532     if (size != 1) {
2533       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2534       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2535       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2536       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2537     }
2538     ierr = PetscFree(cols);CHKERRQ(ierr);
2539   } else {
2540     /* determine buffer space needed for message */
2541     nz = 0;
2542     for (i=0; i<m; i++) {
2543       nz += locrowlens[i];
2544     }
2545     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2546     mycols = ibuf;
2547     /* receive message of column indices*/
2548     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2549     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2550     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2551   }
2552 
2553   /* loop over local rows, determining number of off diagonal entries */
2554   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2555   odlens   = dlens + (rend-rstart);
2556   ierr     = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr);
2557   ierr     = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2558   masked1  = mask    + Mbs;
2559   masked2  = masked1 + Mbs;
2560   rowcount = 0; nzcount = 0;
2561   for (i=0; i<mbs; i++) {
2562     dcount  = 0;
2563     odcount = 0;
2564     for (j=0; j<bs; j++) {
2565       kmax = locrowlens[rowcount];
2566       for (k=0; k<kmax; k++) {
2567         tmp = mycols[nzcount++]/bs;
2568         if (!mask[tmp]) {
2569           mask[tmp] = 1;
2570           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2571           else masked1[dcount++] = tmp;
2572         }
2573       }
2574       rowcount++;
2575     }
2576 
2577     dlens[i]  = dcount;
2578     odlens[i] = odcount;
2579 
2580     /* zero out the mask elements we set */
2581     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2582     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2583   }
2584 
2585   /* create our matrix */
2586   ierr = MatCreateMPIBAIJ(comm,bs,m,m,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr);
2587   A = *newmat;
2588   MatSetOption(A,MAT_COLUMNS_SORTED);
2589 
2590   if (!rank) {
2591     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2592     /* read in my part of the matrix numerical values  */
2593     nz = procsnz[0];
2594     vals = buf;
2595     mycols = ibuf;
2596     if (size == 1)  nz -= extra_rows;
2597     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2598     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2599 
2600     /* insert into matrix */
2601     jj      = rstart*bs;
2602     for (i=0; i<m; i++) {
2603       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2604       mycols += locrowlens[i];
2605       vals   += locrowlens[i];
2606       jj++;
2607     }
2608     /* read in other processors (except the last one) and ship out */
2609     for (i=1; i<size-1; i++) {
2610       nz   = procsnz[i];
2611       vals = buf;
2612       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2613       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2614     }
2615     /* the last proc */
2616     if (size != 1){
2617       nz   = procsnz[i] - extra_rows;
2618       vals = buf;
2619       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2620       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2621       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2622     }
2623     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2624   } else {
2625     /* receive numeric values */
2626     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2627 
2628     /* receive message of values*/
2629     vals   = buf;
2630     mycols = ibuf;
2631     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2632     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2633     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2634 
2635     /* insert into matrix */
2636     jj      = rstart*bs;
2637     for (i=0; i<m; i++) {
2638       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2639       mycols += locrowlens[i];
2640       vals   += locrowlens[i];
2641       jj++;
2642     }
2643   }
2644   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2645   ierr = PetscFree(buf);CHKERRQ(ierr);
2646   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2647   ierr = PetscFree(rowners);CHKERRQ(ierr);
2648   ierr = PetscFree(dlens);CHKERRQ(ierr);
2649   ierr = PetscFree(mask);CHKERRQ(ierr);
2650   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2651   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2652 #if defined(PETSC_HAVE_DSCPACK)
2653   ierr = PetscOptionsHasName(A->prefix,"-mat_baij_dscpack",&flag);CHKERRQ(ierr);
2654   if (flag) { ierr = MatUseDSCPACK_MPIBAIJ(A);CHKERRQ(ierr); }
2655 #endif
2656   PetscFunctionReturn(0);
2657 }
2658 EXTERN_C_END
2659 
2660 #undef __FUNCT__
2661 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
2662 /*@
2663    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2664 
2665    Input Parameters:
2666 .  mat  - the matrix
2667 .  fact - factor
2668 
2669    Collective on Mat
2670 
2671    Level: advanced
2672 
2673   Notes:
2674    This can also be set by the command line option: -mat_use_hash_table fact
2675 
2676 .keywords: matrix, hashtable, factor, HT
2677 
2678 .seealso: MatSetOption()
2679 @*/
2680 int MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2681 {
2682   int ierr,(*f)(Mat,PetscReal);
2683 
2684   PetscFunctionBegin;
2685   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
2686   if (f) {
2687     ierr = (*f)(mat,fact);CHKERRQ(ierr);
2688   }
2689   PetscFunctionReturn(0);
2690 }
2691 
2692 #undef __FUNCT__
2693 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
2694 int MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
2695 {
2696   Mat_MPIBAIJ *baij;
2697 
2698   PetscFunctionBegin;
2699   PetscValidHeaderSpecific(mat,MAT_COOKIE);
2700   baij = (Mat_MPIBAIJ*)mat->data;
2701   baij->ht_fact = fact;
2702   PetscFunctionReturn(0);
2703 }
2704 
2705 #undef __FUNCT__
2706 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
2707 int MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[])
2708 {
2709   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2710   PetscFunctionBegin;
2711   *Ad     = a->A;
2712   *Ao     = a->B;
2713   *colmap = a->garray;
2714   PetscFunctionReturn(0);
2715 }
2716