xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision a96a251defc07734f733d1abf27b5765e87fdc8b)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4 
5 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
7 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
8 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
9 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
10 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
11 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
12 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
14 EXTERN PetscErrorCode MatPrintHelp_SeqBAIJ(Mat);
15 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,const PetscScalar*);
16 
17 /*  UGLY, ugly, ugly
18    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
19    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
20    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
21    converts the entries into single precision and then calls ..._MatScalar() to put them
22    into the single precision data structures.
23 */
24 #if defined(PETSC_USE_MAT_SINGLE)
25 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
26 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
27 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
28 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
29 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
30 #else
31 #define MatSetValuesBlocked_SeqBAIJ_MatScalar      MatSetValuesBlocked_SeqBAIJ
32 #define MatSetValues_MPIBAIJ_MatScalar             MatSetValues_MPIBAIJ
33 #define MatSetValuesBlocked_MPIBAIJ_MatScalar      MatSetValuesBlocked_MPIBAIJ
34 #define MatSetValues_MPIBAIJ_HT_MatScalar          MatSetValues_MPIBAIJ_HT
35 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar   MatSetValuesBlocked_MPIBAIJ_HT
36 #endif
37 
38 #undef __FUNCT__
39 #define __FUNCT__ "MatGetRowMax_MPIBAIJ"
40 PetscErrorCode MatGetRowMax_MPIBAIJ(Mat A,Vec v)
41 {
42   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
43   PetscErrorCode ierr;
44   PetscInt       i;
45   PetscScalar    *va,*vb;
46   Vec            vtmp;
47 
48   PetscFunctionBegin;
49 
50   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
51   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
52 
53   ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr);
54   ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr);
55   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
56 
57   for (i=0; i<A->m; i++){
58     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i];
59   }
60 
61   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
62   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
63   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
64 
65   PetscFunctionReturn(0);
66 }
67 
68 EXTERN_C_BEGIN
69 #undef __FUNCT__
70 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
71 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat)
72 {
73   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
74   PetscErrorCode ierr;
75 
76   PetscFunctionBegin;
77   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
78   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
79   PetscFunctionReturn(0);
80 }
81 EXTERN_C_END
82 
83 EXTERN_C_BEGIN
84 #undef __FUNCT__
85 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
86 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat)
87 {
88   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
89   PetscErrorCode ierr;
90 
91   PetscFunctionBegin;
92   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
93   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
94   PetscFunctionReturn(0);
95 }
96 EXTERN_C_END
97 
98 /*
99      Local utility routine that creates a mapping from the global column
100    number to the local number in the off-diagonal part of the local
101    storage of the matrix.  This is done in a non scable way since the
102    length of colmap equals the global matrix length.
103 */
104 #undef __FUNCT__
105 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
106 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
107 {
108   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
109   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
110   PetscErrorCode ierr;
111   PetscInt       nbs = B->nbs,i,bs=mat->bs;
112 
113   PetscFunctionBegin;
114 #if defined (PETSC_USE_CTABLE)
115   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
116   for (i=0; i<nbs; i++){
117     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
118   }
119 #else
120   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
121   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
122   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
123   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
124 #endif
125   PetscFunctionReturn(0);
126 }
127 
128 #define CHUNKSIZE  10
129 
130 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
131 { \
132  \
133     brow = row/bs;  \
134     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
135     rmax = aimax[brow]; nrow = ailen[brow]; \
136       bcol = col/bs; \
137       ridx = row % bs; cidx = col % bs; \
138       low = 0; high = nrow; \
139       while (high-low > 3) { \
140         t = (low+high)/2; \
141         if (rp[t] > bcol) high = t; \
142         else              low  = t; \
143       } \
144       for (_i=low; _i<high; _i++) { \
145         if (rp[_i] > bcol) break; \
146         if (rp[_i] == bcol) { \
147           bap  = ap +  bs2*_i + bs*cidx + ridx; \
148           if (addv == ADD_VALUES) *bap += value;  \
149           else                    *bap  = value;  \
150           goto a_noinsert; \
151         } \
152       } \
153       if (a->nonew == 1) goto a_noinsert; \
154       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
155       if (nrow >= rmax) { \
156         /* there is no extra room in row, therefore enlarge */ \
157         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
158         MatScalar *new_a; \
159  \
160         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
161  \
162         /* malloc new storage space */ \
163         ierr = PetscMalloc3(bs2*new_nz,PetscScalar,&new_a,new_nz,PetscInt,&new_j,a->mbs+1,PetscInt,&new_i);CHKERRQ(ierr);\
164  \
165         /* copy over old data into new slots */ \
166         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
167         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
168         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
169         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
170         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
171         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
172         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
173         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
174         /* free up old matrix storage */ \
175        if (!a->singlemalloc) {\
176           ierr = PetscFree(a->a);CHKERRQ(ierr);\
177           ierr = PetscFree(a->i);CHKERRQ(ierr);\
178           ierr = PetscFree(a->j);CHKERRQ(ierr);\
179         } else {\
180           ierr = PetscFree3(a->a,a->i,a->j);CHKERRQ(ierr);\
181         }\
182         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
183         a->singlemalloc = PETSC_TRUE; \
184  \
185         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
186         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
187         ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); \
188         a->maxnz += bs2*CHUNKSIZE; \
189         a->reallocs++; \
190         a->nz++; \
191       } \
192       N = nrow++ - 1;  \
193       /* shift up all the later entries in this row */ \
194       for (ii=N; ii>=_i; ii--) { \
195         rp[ii+1] = rp[ii]; \
196         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
197       } \
198       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
199       rp[_i]                      = bcol;  \
200       ap[bs2*_i + bs*cidx + ridx] = value;  \
201       a_noinsert:; \
202     ailen[brow] = nrow; \
203 }
204 
205 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
206 { \
207     brow = row/bs;  \
208     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
209     rmax = bimax[brow]; nrow = bilen[brow]; \
210       bcol = col/bs; \
211       ridx = row % bs; cidx = col % bs; \
212       low = 0; high = nrow; \
213       while (high-low > 3) { \
214         t = (low+high)/2; \
215         if (rp[t] > bcol) high = t; \
216         else              low  = t; \
217       } \
218       for (_i=low; _i<high; _i++) { \
219         if (rp[_i] > bcol) break; \
220         if (rp[_i] == bcol) { \
221           bap  = ap +  bs2*_i + bs*cidx + ridx; \
222           if (addv == ADD_VALUES) *bap += value;  \
223           else                    *bap  = value;  \
224           goto b_noinsert; \
225         } \
226       } \
227       if (b->nonew == 1) goto b_noinsert; \
228       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
229       if (nrow >= rmax) { \
230         /* there is no extra room in row, therefore enlarge */ \
231         PetscInt       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
232         MatScalar *new_a; \
233  \
234         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
235  \
236         /* malloc new storage space */ \
237         ierr = PetscMalloc3(bs2*new_nz,PetscScalar,&new_a,new_nz,PetscInt,&new_j,b->mbs+1,PetscInt,&new_i);CHKERRQ(ierr);\
238  \
239         /* copy over old data into new slots */ \
240         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
241         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
242         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
243         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
244         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
245         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
246         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
247         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
248                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
249         /* free up old matrix storage */ \
250        if (!a->singlemalloc) {\
251           ierr = PetscFree(b->a);CHKERRQ(ierr);\
252           ierr = PetscFree(b->i);CHKERRQ(ierr);\
253           ierr = PetscFree(b->j);CHKERRQ(ierr);\
254         } else {\
255           ierr = PetscFree3(b->a,b->i,b->j);CHKERRQ(ierr);\
256         }\
257         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
258         b->singlemalloc = PETSC_TRUE; \
259  \
260         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
261         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
262         ierr = PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); \
263         b->maxnz += bs2*CHUNKSIZE; \
264         b->reallocs++; \
265         b->nz++; \
266       } \
267       N = nrow++ - 1;  \
268       /* shift up all the later entries in this row */ \
269       for (ii=N; ii>=_i; ii--) { \
270         rp[ii+1] = rp[ii]; \
271         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
272       } \
273       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
274       rp[_i]                      = bcol;  \
275       ap[bs2*_i + bs*cidx + ridx] = value;  \
276       b_noinsert:; \
277     bilen[brow] = nrow; \
278 }
279 
280 #if defined(PETSC_USE_MAT_SINGLE)
281 #undef __FUNCT__
282 #define __FUNCT__ "MatSetValues_MPIBAIJ"
283 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
284 {
285   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
286   PetscErrorCode ierr;
287   PetscInt       i,N = m*n;
288   MatScalar      *vsingle;
289 
290   PetscFunctionBegin;
291   if (N > b->setvalueslen) {
292     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
293     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
294     b->setvalueslen  = N;
295   }
296   vsingle = b->setvaluescopy;
297 
298   for (i=0; i<N; i++) {
299     vsingle[i] = v[i];
300   }
301   ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
302   PetscFunctionReturn(0);
303 }
304 
305 #undef __FUNCT__
306 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
307 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
308 {
309   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
310   PetscErrorCode ierr;
311   PetscInt       i,N = m*n*b->bs2;
312   MatScalar      *vsingle;
313 
314   PetscFunctionBegin;
315   if (N > b->setvalueslen) {
316     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
317     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
318     b->setvalueslen  = N;
319   }
320   vsingle = b->setvaluescopy;
321   for (i=0; i<N; i++) {
322     vsingle[i] = v[i];
323   }
324   ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
325   PetscFunctionReturn(0);
326 }
327 
328 #undef __FUNCT__
329 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
330 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
331 {
332   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
333   PetscErrorCode ierr;
334   PetscInt       i,N = m*n;
335   MatScalar      *vsingle;
336 
337   PetscFunctionBegin;
338   if (N > b->setvalueslen) {
339     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
340     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
341     b->setvalueslen  = N;
342   }
343   vsingle = b->setvaluescopy;
344   for (i=0; i<N; i++) {
345     vsingle[i] = v[i];
346   }
347   ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
348   PetscFunctionReturn(0);
349 }
350 
351 #undef __FUNCT__
352 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
353 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
354 {
355   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
356   PetscErrorCode ierr;
357   PetscInt       i,N = m*n*b->bs2;
358   MatScalar      *vsingle;
359 
360   PetscFunctionBegin;
361   if (N > b->setvalueslen) {
362     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
363     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
364     b->setvalueslen  = N;
365   }
366   vsingle = b->setvaluescopy;
367   for (i=0; i<N; i++) {
368     vsingle[i] = v[i];
369   }
370   ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
371   PetscFunctionReturn(0);
372 }
373 #endif
374 
375 #undef __FUNCT__
376 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
377 PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
378 {
379   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
380   MatScalar      value;
381   PetscTruth     roworiented = baij->roworiented;
382   PetscErrorCode ierr;
383   PetscInt       i,j,row,col;
384   PetscInt       rstart_orig=baij->rstart_bs;
385   PetscInt       rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
386   PetscInt       cend_orig=baij->cend_bs,bs=mat->bs;
387 
388   /* Some Variables required in the macro */
389   Mat            A = baij->A;
390   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
391   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
392   MatScalar      *aa=a->a;
393 
394   Mat            B = baij->B;
395   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
396   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
397   MatScalar      *ba=b->a;
398 
399   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
400   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
401   MatScalar      *ap,*bap;
402 
403   PetscFunctionBegin;
404   for (i=0; i<m; i++) {
405     if (im[i] < 0) continue;
406 #if defined(PETSC_USE_DEBUG)
407     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
408 #endif
409     if (im[i] >= rstart_orig && im[i] < rend_orig) {
410       row = im[i] - rstart_orig;
411       for (j=0; j<n; j++) {
412         if (in[j] >= cstart_orig && in[j] < cend_orig){
413           col = in[j] - cstart_orig;
414           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
415           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
416           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
417         } else if (in[j] < 0) continue;
418 #if defined(PETSC_USE_DEBUG)
419         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->N-1);}
420 #endif
421         else {
422           if (mat->was_assembled) {
423             if (!baij->colmap) {
424               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
425             }
426 #if defined (PETSC_USE_CTABLE)
427             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
428             col  = col - 1;
429 #else
430             col = baij->colmap[in[j]/bs] - 1;
431 #endif
432             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
433               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
434               col =  in[j];
435               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
436               B = baij->B;
437               b = (Mat_SeqBAIJ*)(B)->data;
438               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
439               ba=b->a;
440             } else col += in[j]%bs;
441           } else col = in[j];
442           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
443           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
444           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
445         }
446       }
447     } else {
448       if (!baij->donotstash) {
449         if (roworiented) {
450           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
451         } else {
452           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
453         }
454       }
455     }
456   }
457   PetscFunctionReturn(0);
458 }
459 
460 #undef __FUNCT__
461 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar"
462 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
463 {
464   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
465   const MatScalar *value;
466   MatScalar       *barray=baij->barray;
467   PetscTruth      roworiented = baij->roworiented;
468   PetscErrorCode  ierr;
469   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstart;
470   PetscInt        rend=baij->rend,cstart=baij->cstart,stepval;
471   PetscInt        cend=baij->cend,bs=mat->bs,bs2=baij->bs2;
472 
473   PetscFunctionBegin;
474   if(!barray) {
475     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
476     baij->barray = barray;
477   }
478 
479   if (roworiented) {
480     stepval = (n-1)*bs;
481   } else {
482     stepval = (m-1)*bs;
483   }
484   for (i=0; i<m; i++) {
485     if (im[i] < 0) continue;
486 #if defined(PETSC_USE_DEBUG)
487     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
488 #endif
489     if (im[i] >= rstart && im[i] < rend) {
490       row = im[i] - rstart;
491       for (j=0; j<n; j++) {
492         /* If NumCol = 1 then a copy is not required */
493         if ((roworiented) && (n == 1)) {
494           barray = (MatScalar*)v + i*bs2;
495         } else if((!roworiented) && (m == 1)) {
496           barray = (MatScalar*)v + j*bs2;
497         } else { /* Here a copy is required */
498           if (roworiented) {
499             value = v + i*(stepval+bs)*bs + j*bs;
500           } else {
501             value = v + j*(stepval+bs)*bs + i*bs;
502           }
503           for (ii=0; ii<bs; ii++,value+=stepval) {
504             for (jj=0; jj<bs; jj++) {
505               *barray++  = *value++;
506             }
507           }
508           barray -=bs2;
509         }
510 
511         if (in[j] >= cstart && in[j] < cend){
512           col  = in[j] - cstart;
513           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
514         }
515         else if (in[j] < 0) continue;
516 #if defined(PETSC_USE_DEBUG)
517         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
518 #endif
519         else {
520           if (mat->was_assembled) {
521             if (!baij->colmap) {
522               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
523             }
524 
525 #if defined(PETSC_USE_DEBUG)
526 #if defined (PETSC_USE_CTABLE)
527             { PetscInt data;
528               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
529               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
530             }
531 #else
532             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
533 #endif
534 #endif
535 #if defined (PETSC_USE_CTABLE)
536 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
537             col  = (col - 1)/bs;
538 #else
539             col = (baij->colmap[in[j]] - 1)/bs;
540 #endif
541             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
542               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
543               col =  in[j];
544             }
545           }
546           else col = in[j];
547           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
548         }
549       }
550     } else {
551       if (!baij->donotstash) {
552         if (roworiented) {
553           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
554         } else {
555           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
556         }
557       }
558     }
559   }
560   PetscFunctionReturn(0);
561 }
562 
563 #define HASH_KEY 0.6180339887
564 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
565 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
566 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
567 #undef __FUNCT__
568 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar"
569 PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
570 {
571   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
572   PetscTruth     roworiented = baij->roworiented;
573   PetscErrorCode ierr;
574   PetscInt       i,j,row,col;
575   PetscInt       rstart_orig=baij->rstart_bs;
576   PetscInt       rend_orig=baij->rend_bs,Nbs=baij->Nbs;
577   PetscInt       h1,key,size=baij->ht_size,bs=mat->bs,*HT=baij->ht,idx;
578   PetscReal      tmp;
579   MatScalar      **HD = baij->hd,value;
580 #if defined(PETSC_USE_DEBUG)
581   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
582 #endif
583 
584   PetscFunctionBegin;
585 
586   for (i=0; i<m; i++) {
587 #if defined(PETSC_USE_DEBUG)
588     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
589     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
590 #endif
591       row = im[i];
592     if (row >= rstart_orig && row < rend_orig) {
593       for (j=0; j<n; j++) {
594         col = in[j];
595         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
596         /* Look up PetscInto the Hash Table */
597         key = (row/bs)*Nbs+(col/bs)+1;
598         h1  = HASH(size,key,tmp);
599 
600 
601         idx = h1;
602 #if defined(PETSC_USE_DEBUG)
603         insert_ct++;
604         total_ct++;
605         if (HT[idx] != key) {
606           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
607           if (idx == size) {
608             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
609             if (idx == h1) {
610               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
611             }
612           }
613         }
614 #else
615         if (HT[idx] != key) {
616           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
617           if (idx == size) {
618             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
619             if (idx == h1) {
620               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
621             }
622           }
623         }
624 #endif
625         /* A HASH table entry is found, so insert the values at the correct address */
626         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
627         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
628       }
629     } else {
630       if (!baij->donotstash) {
631         if (roworiented) {
632           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
633         } else {
634           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
635         }
636       }
637     }
638   }
639 #if defined(PETSC_USE_DEBUG)
640   baij->ht_total_ct = total_ct;
641   baij->ht_insert_ct = insert_ct;
642 #endif
643   PetscFunctionReturn(0);
644 }
645 
646 #undef __FUNCT__
647 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar"
648 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
649 {
650   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
651   PetscTruth      roworiented = baij->roworiented;
652   PetscErrorCode  ierr;
653   PetscInt        i,j,ii,jj,row,col;
654   PetscInt        rstart=baij->rstart ;
655   PetscInt        rend=baij->rend,stepval,bs=mat->bs,bs2=baij->bs2;
656   PetscInt        h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
657   PetscReal       tmp;
658   MatScalar       **HD = baij->hd,*baij_a;
659   const MatScalar *v_t,*value;
660 #if defined(PETSC_USE_DEBUG)
661   PetscInt        total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
662 #endif
663 
664   PetscFunctionBegin;
665 
666   if (roworiented) {
667     stepval = (n-1)*bs;
668   } else {
669     stepval = (m-1)*bs;
670   }
671   for (i=0; i<m; i++) {
672 #if defined(PETSC_USE_DEBUG)
673     if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
674     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
675 #endif
676     row   = im[i];
677     v_t   = v + i*bs2;
678     if (row >= rstart && row < rend) {
679       for (j=0; j<n; j++) {
680         col = in[j];
681 
682         /* Look up into the Hash Table */
683         key = row*Nbs+col+1;
684         h1  = HASH(size,key,tmp);
685 
686         idx = h1;
687 #if defined(PETSC_USE_DEBUG)
688         total_ct++;
689         insert_ct++;
690        if (HT[idx] != key) {
691           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
692           if (idx == size) {
693             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
694             if (idx == h1) {
695               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
696             }
697           }
698         }
699 #else
700         if (HT[idx] != key) {
701           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
702           if (idx == size) {
703             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
704             if (idx == h1) {
705               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
706             }
707           }
708         }
709 #endif
710         baij_a = HD[idx];
711         if (roworiented) {
712           /*value = v + i*(stepval+bs)*bs + j*bs;*/
713           /* value = v + (i*(stepval+bs)+j)*bs; */
714           value = v_t;
715           v_t  += bs;
716           if (addv == ADD_VALUES) {
717             for (ii=0; ii<bs; ii++,value+=stepval) {
718               for (jj=ii; jj<bs2; jj+=bs) {
719                 baij_a[jj]  += *value++;
720               }
721             }
722           } else {
723             for (ii=0; ii<bs; ii++,value+=stepval) {
724               for (jj=ii; jj<bs2; jj+=bs) {
725                 baij_a[jj]  = *value++;
726               }
727             }
728           }
729         } else {
730           value = v + j*(stepval+bs)*bs + i*bs;
731           if (addv == ADD_VALUES) {
732             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
733               for (jj=0; jj<bs; jj++) {
734                 baij_a[jj]  += *value++;
735               }
736             }
737           } else {
738             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
739               for (jj=0; jj<bs; jj++) {
740                 baij_a[jj]  = *value++;
741               }
742             }
743           }
744         }
745       }
746     } else {
747       if (!baij->donotstash) {
748         if (roworiented) {
749           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
750         } else {
751           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
752         }
753       }
754     }
755   }
756 #if defined(PETSC_USE_DEBUG)
757   baij->ht_total_ct = total_ct;
758   baij->ht_insert_ct = insert_ct;
759 #endif
760   PetscFunctionReturn(0);
761 }
762 
763 #undef __FUNCT__
764 #define __FUNCT__ "MatGetValues_MPIBAIJ"
765 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
766 {
767   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
768   PetscErrorCode ierr;
769   PetscInt       bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
770   PetscInt       bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
771 
772   PetscFunctionBegin;
773   for (i=0; i<m; i++) {
774     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
775     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
776     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
777       row = idxm[i] - bsrstart;
778       for (j=0; j<n; j++) {
779         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
780         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
781         if (idxn[j] >= bscstart && idxn[j] < bscend){
782           col = idxn[j] - bscstart;
783           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
784         } else {
785           if (!baij->colmap) {
786             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
787           }
788 #if defined (PETSC_USE_CTABLE)
789           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
790           data --;
791 #else
792           data = baij->colmap[idxn[j]/bs]-1;
793 #endif
794           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
795           else {
796             col  = data + idxn[j]%bs;
797             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
798           }
799         }
800       }
801     } else {
802       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
803     }
804   }
805  PetscFunctionReturn(0);
806 }
807 
808 #undef __FUNCT__
809 #define __FUNCT__ "MatNorm_MPIBAIJ"
810 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
811 {
812   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
813   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
814   PetscErrorCode ierr;
815   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->bs,nz,row,col;
816   PetscReal      sum = 0.0;
817   MatScalar      *v;
818 
819   PetscFunctionBegin;
820   if (baij->size == 1) {
821     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
822   } else {
823     if (type == NORM_FROBENIUS) {
824       v = amat->a;
825       nz = amat->nz*bs2;
826       for (i=0; i<nz; i++) {
827 #if defined(PETSC_USE_COMPLEX)
828         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
829 #else
830         sum += (*v)*(*v); v++;
831 #endif
832       }
833       v = bmat->a;
834       nz = bmat->nz*bs2;
835       for (i=0; i<nz; i++) {
836 #if defined(PETSC_USE_COMPLEX)
837         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
838 #else
839         sum += (*v)*(*v); v++;
840 #endif
841       }
842       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
843       *nrm = sqrt(*nrm);
844     } else if (type == NORM_1) { /* max column sum */
845       PetscReal *tmp,*tmp2;
846       PetscInt  *jj,*garray=baij->garray,cstart=baij->cstart;
847       ierr = PetscMalloc((2*mat->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
848       tmp2 = tmp + mat->N;
849       ierr = PetscMemzero(tmp,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
850       v = amat->a; jj = amat->j;
851       for (i=0; i<amat->nz; i++) {
852         for (j=0; j<bs; j++){
853           col = bs*(cstart + *jj) + j; /* column index */
854           for (row=0; row<bs; row++){
855             tmp[col] += PetscAbsScalar(*v);  v++;
856           }
857         }
858         jj++;
859       }
860       v = bmat->a; jj = bmat->j;
861       for (i=0; i<bmat->nz; i++) {
862         for (j=0; j<bs; j++){
863           col = bs*garray[*jj] + j;
864           for (row=0; row<bs; row++){
865             tmp[col] += PetscAbsScalar(*v); v++;
866           }
867         }
868         jj++;
869       }
870       ierr = MPI_Allreduce(tmp,tmp2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
871       *nrm = 0.0;
872       for (j=0; j<mat->N; j++) {
873         if (tmp2[j] > *nrm) *nrm = tmp2[j];
874       }
875       ierr = PetscFree(tmp);CHKERRQ(ierr);
876     } else if (type == NORM_INFINITY) { /* max row sum */
877       PetscReal *sums;
878       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr)
879       sum = 0.0;
880       for (j=0; j<amat->mbs; j++) {
881         for (row=0; row<bs; row++) sums[row] = 0.0;
882         v = amat->a + bs2*amat->i[j];
883         nz = amat->i[j+1]-amat->i[j];
884         for (i=0; i<nz; i++) {
885           for (col=0; col<bs; col++){
886             for (row=0; row<bs; row++){
887               sums[row] += PetscAbsScalar(*v); v++;
888             }
889           }
890         }
891         v = bmat->a + bs2*bmat->i[j];
892         nz = bmat->i[j+1]-bmat->i[j];
893         for (i=0; i<nz; i++) {
894           for (col=0; col<bs; col++){
895             for (row=0; row<bs; row++){
896               sums[row] += PetscAbsScalar(*v); v++;
897             }
898           }
899         }
900         for (row=0; row<bs; row++){
901           if (sums[row] > sum) sum = sums[row];
902         }
903       }
904       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
905       ierr = PetscFree(sums);CHKERRQ(ierr);
906     } else {
907       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
908     }
909   }
910   PetscFunctionReturn(0);
911 }
912 
913 /*
914   Creates the hash table, and sets the table
915   This table is created only once.
916   If new entried need to be added to the matrix
917   then the hash table has to be destroyed and
918   recreated.
919 */
920 #undef __FUNCT__
921 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
922 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
923 {
924   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
925   Mat            A = baij->A,B=baij->B;
926   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
927   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
928   PetscErrorCode ierr;
929   PetscInt       size,bs2=baij->bs2,rstart=baij->rstart;
930   PetscInt       cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
931   PetscInt       *HT,key;
932   MatScalar      **HD;
933   PetscReal      tmp;
934 #if defined(PETSC_USE_DEBUG)
935   PetscInt       ct=0,max=0;
936 #endif
937 
938   PetscFunctionBegin;
939   baij->ht_size=(PetscInt)(factor*nz);
940   size = baij->ht_size;
941 
942   if (baij->ht) {
943     PetscFunctionReturn(0);
944   }
945 
946   /* Allocate Memory for Hash Table */
947   ierr     = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr);
948   baij->ht = (PetscInt*)(baij->hd + size);
949   HD       = baij->hd;
950   HT       = baij->ht;
951 
952 
953   ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr);
954 
955 
956   /* Loop Over A */
957   for (i=0; i<a->mbs; i++) {
958     for (j=ai[i]; j<ai[i+1]; j++) {
959       row = i+rstart;
960       col = aj[j]+cstart;
961 
962       key = row*Nbs + col + 1;
963       h1  = HASH(size,key,tmp);
964       for (k=0; k<size; k++){
965         if (!HT[(h1+k)%size]) {
966           HT[(h1+k)%size] = key;
967           HD[(h1+k)%size] = a->a + j*bs2;
968           break;
969 #if defined(PETSC_USE_DEBUG)
970         } else {
971           ct++;
972 #endif
973         }
974       }
975 #if defined(PETSC_USE_DEBUG)
976       if (k> max) max = k;
977 #endif
978     }
979   }
980   /* Loop Over B */
981   for (i=0; i<b->mbs; i++) {
982     for (j=bi[i]; j<bi[i+1]; j++) {
983       row = i+rstart;
984       col = garray[bj[j]];
985       key = row*Nbs + col + 1;
986       h1  = HASH(size,key,tmp);
987       for (k=0; k<size; k++){
988         if (!HT[(h1+k)%size]) {
989           HT[(h1+k)%size] = key;
990           HD[(h1+k)%size] = b->a + j*bs2;
991           break;
992 #if defined(PETSC_USE_DEBUG)
993         } else {
994           ct++;
995 #endif
996         }
997       }
998 #if defined(PETSC_USE_DEBUG)
999       if (k> max) max = k;
1000 #endif
1001     }
1002   }
1003 
1004   /* Print Summary */
1005 #if defined(PETSC_USE_DEBUG)
1006   for (i=0,j=0; i<size; i++) {
1007     if (HT[i]) {j++;}
1008   }
1009   ierr = PetscLogInfo((0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max));CHKERRQ(ierr);
1010 #endif
1011   PetscFunctionReturn(0);
1012 }
1013 
1014 #undef __FUNCT__
1015 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
1016 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
1017 {
1018   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1019   PetscErrorCode ierr;
1020   PetscInt       nstash,reallocs;
1021   InsertMode     addv;
1022 
1023   PetscFunctionBegin;
1024   if (baij->donotstash) {
1025     PetscFunctionReturn(0);
1026   }
1027 
1028   /* make sure all processors are either in INSERTMODE or ADDMODE */
1029   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
1030   if (addv == (ADD_VALUES|INSERT_VALUES)) {
1031     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
1032   }
1033   mat->insertmode = addv; /* in case this processor had no cache */
1034 
1035   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
1036   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
1037   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
1038   ierr = PetscLogInfo((0,"MatAssemblyBegin_MPIBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
1039   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
1040   ierr = PetscLogInfo((0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
1041   PetscFunctionReturn(0);
1042 }
1043 
1044 #undef __FUNCT__
1045 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
1046 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
1047 {
1048   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
1049   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
1050   PetscErrorCode ierr;
1051   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
1052   PetscInt       *row,*col,other_disassembled;
1053   PetscTruth     r1,r2,r3;
1054   MatScalar      *val;
1055   InsertMode     addv = mat->insertmode;
1056   PetscMPIInt    n;
1057 
1058   PetscFunctionBegin;
1059   if (!baij->donotstash) {
1060     while (1) {
1061       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1062       if (!flg) break;
1063 
1064       for (i=0; i<n;) {
1065         /* Now identify the consecutive vals belonging to the same row */
1066         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1067         if (j < n) ncols = j-i;
1068         else       ncols = n-i;
1069         /* Now assemble all these values with a single function call */
1070         ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
1071         i = j;
1072       }
1073     }
1074     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
1075     /* Now process the block-stash. Since the values are stashed column-oriented,
1076        set the roworiented flag to column oriented, and after MatSetValues()
1077        restore the original flags */
1078     r1 = baij->roworiented;
1079     r2 = a->roworiented;
1080     r3 = b->roworiented;
1081     baij->roworiented = PETSC_FALSE;
1082     a->roworiented    = PETSC_FALSE;
1083     b->roworiented    = PETSC_FALSE;
1084     while (1) {
1085       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1086       if (!flg) break;
1087 
1088       for (i=0; i<n;) {
1089         /* Now identify the consecutive vals belonging to the same row */
1090         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1091         if (j < n) ncols = j-i;
1092         else       ncols = n-i;
1093         ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
1094         i = j;
1095       }
1096     }
1097     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
1098     baij->roworiented = r1;
1099     a->roworiented    = r2;
1100     b->roworiented    = r3;
1101   }
1102 
1103   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
1104   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
1105 
1106   /* determine if any processor has disassembled, if so we must
1107      also disassemble ourselfs, in order that we may reassemble. */
1108   /*
1109      if nonzero structure of submatrix B cannot change then we know that
1110      no processor disassembled thus we can skip this stuff
1111   */
1112   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
1113     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
1114     if (mat->was_assembled && !other_disassembled) {
1115       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
1116     }
1117   }
1118 
1119   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1120     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
1121   }
1122   b->compressedrow.use = PETSC_TRUE;
1123   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1124   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1125 
1126 #if defined(PETSC_USE_DEBUG)
1127   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1128     ierr = PetscLogInfo((0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct));CHKERRQ(ierr);
1129     baij->ht_total_ct  = 0;
1130     baij->ht_insert_ct = 0;
1131   }
1132 #endif
1133   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1134     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1135     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1136     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1137   }
1138 
1139   if (baij->rowvalues) {
1140     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1141     baij->rowvalues = 0;
1142   }
1143   PetscFunctionReturn(0);
1144 }
1145 
1146 #undef __FUNCT__
1147 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
1148 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1149 {
1150   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
1151   PetscErrorCode    ierr;
1152   PetscMPIInt       size = baij->size,rank = baij->rank;
1153   PetscInt          bs = mat->bs;
1154   PetscTruth        iascii,isdraw;
1155   PetscViewer       sviewer;
1156   PetscViewerFormat format;
1157 
1158   PetscFunctionBegin;
1159   /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */
1160   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1161   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1162   if (iascii) {
1163     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1164     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1165       MatInfo info;
1166       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
1167       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1168       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
1169               rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
1170               mat->bs,(PetscInt)info.memory);CHKERRQ(ierr);
1171       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1172       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
1173       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1174       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
1175       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1176       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1177       PetscFunctionReturn(0);
1178     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1179       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1180       PetscFunctionReturn(0);
1181     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1182       PetscFunctionReturn(0);
1183     }
1184   }
1185 
1186   if (isdraw) {
1187     PetscDraw       draw;
1188     PetscTruth isnull;
1189     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1190     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1191   }
1192 
1193   if (size == 1) {
1194     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
1195     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1196   } else {
1197     /* assemble the entire matrix onto first processor. */
1198     Mat         A;
1199     Mat_SeqBAIJ *Aloc;
1200     PetscInt    M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1201     MatScalar   *a;
1202 
1203     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1204     /* Perhaps this should be the type of mat? */
1205     if (!rank) {
1206       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
1207     } else {
1208       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
1209     }
1210     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1211     ierr = MatMPIBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1212     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1213 
1214     /* copy over the A part */
1215     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1216     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1217     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1218 
1219     for (i=0; i<mbs; i++) {
1220       rvals[0] = bs*(baij->rstart + i);
1221       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1222       for (j=ai[i]; j<ai[i+1]; j++) {
1223         col = (baij->cstart+aj[j])*bs;
1224         for (k=0; k<bs; k++) {
1225           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1226           col++; a += bs;
1227         }
1228       }
1229     }
1230     /* copy over the B part */
1231     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1232     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1233     for (i=0; i<mbs; i++) {
1234       rvals[0] = bs*(baij->rstart + i);
1235       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1236       for (j=ai[i]; j<ai[i+1]; j++) {
1237         col = baij->garray[aj[j]]*bs;
1238         for (k=0; k<bs; k++) {
1239           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1240           col++; a += bs;
1241         }
1242       }
1243     }
1244     ierr = PetscFree(rvals);CHKERRQ(ierr);
1245     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1246     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1247     /*
1248        Everyone has to call to draw the matrix since the graphics waits are
1249        synchronized across all processors that share the PetscDraw object
1250     */
1251     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1252     if (!rank) {
1253       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1254       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1255     }
1256     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1257     ierr = MatDestroy(A);CHKERRQ(ierr);
1258   }
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 #undef __FUNCT__
1263 #define __FUNCT__ "MatView_MPIBAIJ"
1264 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1265 {
1266   PetscErrorCode ierr;
1267   PetscTruth     iascii,isdraw,issocket,isbinary;
1268 
1269   PetscFunctionBegin;
1270   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1271   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1272   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1273   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1274   if (iascii || isdraw || issocket || isbinary) {
1275     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1276   } else {
1277     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1278   }
1279   PetscFunctionReturn(0);
1280 }
1281 
1282 #undef __FUNCT__
1283 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1284 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1285 {
1286   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1287   PetscErrorCode ierr;
1288 
1289   PetscFunctionBegin;
1290 #if defined(PETSC_USE_LOG)
1291   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N);
1292 #endif
1293   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1294   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1295   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1296   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1297   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1298 #if defined (PETSC_USE_CTABLE)
1299   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1300 #else
1301   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1302 #endif
1303   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1304   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1305   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1306   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1307   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1308   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
1309 #if defined(PETSC_USE_MAT_SINGLE)
1310   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
1311 #endif
1312   ierr = PetscFree(baij);CHKERRQ(ierr);
1313 
1314   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1315   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1316   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1317   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1318   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1319   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1320   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1321   PetscFunctionReturn(0);
1322 }
1323 
1324 #undef __FUNCT__
1325 #define __FUNCT__ "MatMult_MPIBAIJ"
1326 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1327 {
1328   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1329   PetscErrorCode ierr;
1330   PetscInt       nt;
1331 
1332   PetscFunctionBegin;
1333   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1334   if (nt != A->n) {
1335     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1336   }
1337   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1338   if (nt != A->m) {
1339     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1340   }
1341   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1342   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1343   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1344   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1345   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1346   PetscFunctionReturn(0);
1347 }
1348 
1349 #undef __FUNCT__
1350 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1351 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1352 {
1353   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1354   PetscErrorCode ierr;
1355 
1356   PetscFunctionBegin;
1357   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1358   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1359   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1360   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1361   PetscFunctionReturn(0);
1362 }
1363 
1364 #undef __FUNCT__
1365 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1366 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1367 {
1368   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1369   PetscErrorCode ierr;
1370   PetscTruth     merged;
1371 
1372   PetscFunctionBegin;
1373   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1374   /* do nondiagonal part */
1375   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1376   if (!merged) {
1377     /* send it on its way */
1378     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1379     /* do local part */
1380     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1381     /* receive remote parts: note this assumes the values are not actually */
1382     /* inserted in yy until the next line */
1383     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1384   } else {
1385     /* do local part */
1386     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1387     /* send it on its way */
1388     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1389     /* values actually were received in the Begin() but we need to call this nop */
1390     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1391   }
1392   PetscFunctionReturn(0);
1393 }
1394 
1395 #undef __FUNCT__
1396 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1397 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1398 {
1399   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1400   PetscErrorCode ierr;
1401 
1402   PetscFunctionBegin;
1403   /* do nondiagonal part */
1404   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1405   /* send it on its way */
1406   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1407   /* do local part */
1408   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1409   /* receive remote parts: note this assumes the values are not actually */
1410   /* inserted in yy until the next line, which is true for my implementation*/
1411   /* but is not perhaps always true. */
1412   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1413   PetscFunctionReturn(0);
1414 }
1415 
1416 /*
1417   This only works correctly for square matrices where the subblock A->A is the
1418    diagonal block
1419 */
1420 #undef __FUNCT__
1421 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1422 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1423 {
1424   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1425   PetscErrorCode ierr;
1426 
1427   PetscFunctionBegin;
1428   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1429   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1430   PetscFunctionReturn(0);
1431 }
1432 
1433 #undef __FUNCT__
1434 #define __FUNCT__ "MatScale_MPIBAIJ"
1435 PetscErrorCode MatScale_MPIBAIJ(const PetscScalar *aa,Mat A)
1436 {
1437   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1438   PetscErrorCode ierr;
1439 
1440   PetscFunctionBegin;
1441   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1442   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1443   PetscFunctionReturn(0);
1444 }
1445 
1446 #undef __FUNCT__
1447 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1448 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1449 {
1450   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1451   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1452   PetscErrorCode ierr;
1453   PetscInt       bs = matin->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1454   PetscInt       nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1455   PetscInt       *cmap,*idx_p,cstart = mat->cstart;
1456 
1457   PetscFunctionBegin;
1458   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1459   mat->getrowactive = PETSC_TRUE;
1460 
1461   if (!mat->rowvalues && (idx || v)) {
1462     /*
1463         allocate enough space to hold information from the longest row.
1464     */
1465     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1466     PetscInt     max = 1,mbs = mat->mbs,tmp;
1467     for (i=0; i<mbs; i++) {
1468       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1469       if (max < tmp) { max = tmp; }
1470     }
1471     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1472     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1473   }
1474 
1475   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1476   lrow = row - brstart;
1477 
1478   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1479   if (!v)   {pvA = 0; pvB = 0;}
1480   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1481   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1482   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1483   nztot = nzA + nzB;
1484 
1485   cmap  = mat->garray;
1486   if (v  || idx) {
1487     if (nztot) {
1488       /* Sort by increasing column numbers, assuming A and B already sorted */
1489       PetscInt imark = -1;
1490       if (v) {
1491         *v = v_p = mat->rowvalues;
1492         for (i=0; i<nzB; i++) {
1493           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1494           else break;
1495         }
1496         imark = i;
1497         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1498         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1499       }
1500       if (idx) {
1501         *idx = idx_p = mat->rowindices;
1502         if (imark > -1) {
1503           for (i=0; i<imark; i++) {
1504             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1505           }
1506         } else {
1507           for (i=0; i<nzB; i++) {
1508             if (cmap[cworkB[i]/bs] < cstart)
1509               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1510             else break;
1511           }
1512           imark = i;
1513         }
1514         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1515         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1516       }
1517     } else {
1518       if (idx) *idx = 0;
1519       if (v)   *v   = 0;
1520     }
1521   }
1522   *nz = nztot;
1523   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1524   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1525   PetscFunctionReturn(0);
1526 }
1527 
1528 #undef __FUNCT__
1529 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1530 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1531 {
1532   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1533 
1534   PetscFunctionBegin;
1535   if (!baij->getrowactive) {
1536     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1537   }
1538   baij->getrowactive = PETSC_FALSE;
1539   PetscFunctionReturn(0);
1540 }
1541 
1542 #undef __FUNCT__
1543 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1544 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1545 {
1546   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1547   PetscErrorCode ierr;
1548 
1549   PetscFunctionBegin;
1550   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1551   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1552   PetscFunctionReturn(0);
1553 }
1554 
1555 #undef __FUNCT__
1556 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1557 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1558 {
1559   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1560   Mat            A = a->A,B = a->B;
1561   PetscErrorCode ierr;
1562   PetscReal      isend[5],irecv[5];
1563 
1564   PetscFunctionBegin;
1565   info->block_size     = (PetscReal)matin->bs;
1566   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1567   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1568   isend[3] = info->memory;  isend[4] = info->mallocs;
1569   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1570   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1571   isend[3] += info->memory;  isend[4] += info->mallocs;
1572   if (flag == MAT_LOCAL) {
1573     info->nz_used      = isend[0];
1574     info->nz_allocated = isend[1];
1575     info->nz_unneeded  = isend[2];
1576     info->memory       = isend[3];
1577     info->mallocs      = isend[4];
1578   } else if (flag == MAT_GLOBAL_MAX) {
1579     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1580     info->nz_used      = irecv[0];
1581     info->nz_allocated = irecv[1];
1582     info->nz_unneeded  = irecv[2];
1583     info->memory       = irecv[3];
1584     info->mallocs      = irecv[4];
1585   } else if (flag == MAT_GLOBAL_SUM) {
1586     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1587     info->nz_used      = irecv[0];
1588     info->nz_allocated = irecv[1];
1589     info->nz_unneeded  = irecv[2];
1590     info->memory       = irecv[3];
1591     info->mallocs      = irecv[4];
1592   } else {
1593     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1594   }
1595   info->rows_global       = (PetscReal)A->M;
1596   info->columns_global    = (PetscReal)A->N;
1597   info->rows_local        = (PetscReal)A->m;
1598   info->columns_local     = (PetscReal)A->N;
1599   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1600   info->fill_ratio_needed = 0;
1601   info->factor_mallocs    = 0;
1602   PetscFunctionReturn(0);
1603 }
1604 
1605 #undef __FUNCT__
1606 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1607 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op)
1608 {
1609   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1610   PetscErrorCode ierr;
1611 
1612   PetscFunctionBegin;
1613   switch (op) {
1614   case MAT_NO_NEW_NONZERO_LOCATIONS:
1615   case MAT_YES_NEW_NONZERO_LOCATIONS:
1616   case MAT_COLUMNS_UNSORTED:
1617   case MAT_COLUMNS_SORTED:
1618   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1619   case MAT_KEEP_ZEROED_ROWS:
1620   case MAT_NEW_NONZERO_LOCATION_ERR:
1621     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1622     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1623     break;
1624   case MAT_ROW_ORIENTED:
1625     a->roworiented = PETSC_TRUE;
1626     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1627     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1628     break;
1629   case MAT_ROWS_SORTED:
1630   case MAT_ROWS_UNSORTED:
1631   case MAT_YES_NEW_DIAGONALS:
1632     ierr = PetscLogInfo((A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"));CHKERRQ(ierr);
1633     break;
1634   case MAT_COLUMN_ORIENTED:
1635     a->roworiented = PETSC_FALSE;
1636     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1637     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1638     break;
1639   case MAT_IGNORE_OFF_PROC_ENTRIES:
1640     a->donotstash = PETSC_TRUE;
1641     break;
1642   case MAT_NO_NEW_DIAGONALS:
1643     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1644   case MAT_USE_HASH_TABLE:
1645     a->ht_flag = PETSC_TRUE;
1646     break;
1647   case MAT_SYMMETRIC:
1648   case MAT_STRUCTURALLY_SYMMETRIC:
1649   case MAT_HERMITIAN:
1650   case MAT_SYMMETRY_ETERNAL:
1651     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1652     break;
1653   case MAT_NOT_SYMMETRIC:
1654   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1655   case MAT_NOT_HERMITIAN:
1656   case MAT_NOT_SYMMETRY_ETERNAL:
1657     break;
1658   default:
1659     SETERRQ(PETSC_ERR_SUP,"unknown option");
1660   }
1661   PetscFunctionReturn(0);
1662 }
1663 
1664 #undef __FUNCT__
1665 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1666 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1667 {
1668   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1669   Mat_SeqBAIJ    *Aloc;
1670   Mat            B;
1671   PetscErrorCode ierr;
1672   PetscInt       M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col;
1673   PetscInt       bs=A->bs,mbs=baij->mbs;
1674   MatScalar      *a;
1675 
1676   PetscFunctionBegin;
1677   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1678   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1679   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1680   ierr = MatMPIBAIJSetPreallocation(B,A->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1681 
1682   /* copy over the A part */
1683   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1684   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1685   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1686 
1687   for (i=0; i<mbs; i++) {
1688     rvals[0] = bs*(baij->rstart + i);
1689     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1690     for (j=ai[i]; j<ai[i+1]; j++) {
1691       col = (baij->cstart+aj[j])*bs;
1692       for (k=0; k<bs; k++) {
1693         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1694         col++; a += bs;
1695       }
1696     }
1697   }
1698   /* copy over the B part */
1699   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1700   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1701   for (i=0; i<mbs; i++) {
1702     rvals[0] = bs*(baij->rstart + i);
1703     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1704     for (j=ai[i]; j<ai[i+1]; j++) {
1705       col = baij->garray[aj[j]]*bs;
1706       for (k=0; k<bs; k++) {
1707         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1708         col++; a += bs;
1709       }
1710     }
1711   }
1712   ierr = PetscFree(rvals);CHKERRQ(ierr);
1713   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1714   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1715 
1716   if (matout) {
1717     *matout = B;
1718   } else {
1719     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1720   }
1721   PetscFunctionReturn(0);
1722 }
1723 
1724 #undef __FUNCT__
1725 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1726 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1727 {
1728   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1729   Mat            a = baij->A,b = baij->B;
1730   PetscErrorCode ierr;
1731   PetscInt       s1,s2,s3;
1732 
1733   PetscFunctionBegin;
1734   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1735   if (rr) {
1736     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1737     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1738     /* Overlap communication with computation. */
1739     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1740   }
1741   if (ll) {
1742     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1743     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1744     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1745   }
1746   /* scale  the diagonal block */
1747   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1748 
1749   if (rr) {
1750     /* Do a scatter end and then right scale the off-diagonal block */
1751     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1752     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1753   }
1754 
1755   PetscFunctionReturn(0);
1756 }
1757 
1758 #undef __FUNCT__
1759 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1760 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,IS is,const PetscScalar *diag)
1761 {
1762   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1763   PetscErrorCode ierr;
1764   PetscMPIInt    imdex,size = l->size,n,rank = l->rank;
1765   PetscInt       i,N,*rows,*owners = l->rowners;
1766   PetscInt       *nprocs,j,idx,nsends,row;
1767   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
1768   PetscInt       *rvalues,tag = A->tag,count,base,slen,*source,lastidx = -1;
1769   PetscInt       *lens,*lrows,*values,bs=A->bs,rstart_bs=l->rstart_bs;
1770   MPI_Comm       comm = A->comm;
1771   MPI_Request    *send_waits,*recv_waits;
1772   MPI_Status     recv_status,*send_status;
1773   IS             istmp;
1774 #if defined(PETSC_DEBUG)
1775   PetscTruth     found = PETSC_FALSE;
1776 #endif
1777 
1778   PetscFunctionBegin;
1779   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
1780   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1781 
1782   /*  first count number of contributors to each processor */
1783   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1784   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1785   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1786   j = 0;
1787   for (i=0; i<N; i++) {
1788     if (lastidx > (idx = rows[i])) j = 0;
1789     lastidx = idx;
1790     for (; j<size; j++) {
1791       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1792         nprocs[2*j]++;
1793         nprocs[2*j+1] = 1;
1794         owner[i] = j;
1795 #if defined(PETSC_DEBUG)
1796         found = PETSC_TRUE;
1797 #endif
1798         break;
1799       }
1800     }
1801 #if defined(PETSC_DEBUG)
1802     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1803     found = PETSC_FALSE;
1804 #endif
1805   }
1806   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1807 
1808   /* inform other processors of number of messages and max length*/
1809   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1810 
1811   /* post receives:   */
1812   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1813   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1814   for (i=0; i<nrecvs; i++) {
1815     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1816   }
1817 
1818   /* do sends:
1819      1) starts[i] gives the starting index in svalues for stuff going to
1820      the ith processor
1821   */
1822   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1823   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1824   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1825   starts[0]  = 0;
1826   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1827   for (i=0; i<N; i++) {
1828     svalues[starts[owner[i]]++] = rows[i];
1829   }
1830   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
1831 
1832   starts[0] = 0;
1833   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1834   count = 0;
1835   for (i=0; i<size; i++) {
1836     if (nprocs[2*i+1]) {
1837       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1838     }
1839   }
1840   ierr = PetscFree(starts);CHKERRQ(ierr);
1841 
1842   base = owners[rank]*bs;
1843 
1844   /*  wait on receives */
1845   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
1846   source = lens + nrecvs;
1847   count  = nrecvs; slen = 0;
1848   while (count) {
1849     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1850     /* unpack receives into our local space */
1851     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1852     source[imdex]  = recv_status.MPI_SOURCE;
1853     lens[imdex]    = n;
1854     slen          += n;
1855     count--;
1856   }
1857   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1858 
1859   /* move the data into the send scatter */
1860   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1861   count = 0;
1862   for (i=0; i<nrecvs; i++) {
1863     values = rvalues + i*nmax;
1864     for (j=0; j<lens[i]; j++) {
1865       lrows[count++] = values[j] - base;
1866     }
1867   }
1868   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1869   ierr = PetscFree(lens);CHKERRQ(ierr);
1870   ierr = PetscFree(owner);CHKERRQ(ierr);
1871   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1872 
1873   /* actually zap the local rows */
1874   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1875   ierr = PetscLogObjectParent(A,istmp);CHKERRQ(ierr);
1876 
1877   /*
1878         Zero the required rows. If the "diagonal block" of the matrix
1879      is square and the user wishes to set the diagonal we use seperate
1880      code so that MatSetValues() is not called for each diagonal allocating
1881      new memory, thus calling lots of mallocs and slowing things down.
1882 
1883        Contributed by: Mathew Knepley
1884   */
1885   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1886   ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr);
1887   if (diag && (l->A->M == l->A->N)) {
1888     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr);
1889   } else if (diag) {
1890     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1891     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1892       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1893 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1894     }
1895     for (i=0; i<slen; i++) {
1896       row  = lrows[i] + rstart_bs;
1897       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1898     }
1899     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1900     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1901   } else {
1902     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1903   }
1904 
1905   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1906   ierr = PetscFree(lrows);CHKERRQ(ierr);
1907 
1908   /* wait on sends */
1909   if (nsends) {
1910     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1911     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1912     ierr = PetscFree(send_status);CHKERRQ(ierr);
1913   }
1914   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1915   ierr = PetscFree(svalues);CHKERRQ(ierr);
1916 
1917   PetscFunctionReturn(0);
1918 }
1919 
1920 #undef __FUNCT__
1921 #define __FUNCT__ "MatPrintHelp_MPIBAIJ"
1922 PetscErrorCode MatPrintHelp_MPIBAIJ(Mat A)
1923 {
1924   Mat_MPIBAIJ       *a   = (Mat_MPIBAIJ*)A->data;
1925   MPI_Comm          comm = A->comm;
1926   static PetscTruth called = PETSC_FALSE;
1927   PetscErrorCode    ierr;
1928 
1929   PetscFunctionBegin;
1930   if (!a->rank) {
1931     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1932   }
1933   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1934   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1935   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1936   PetscFunctionReturn(0);
1937 }
1938 
1939 #undef __FUNCT__
1940 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1941 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1942 {
1943   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1944   PetscErrorCode ierr;
1945 
1946   PetscFunctionBegin;
1947   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1948   PetscFunctionReturn(0);
1949 }
1950 
1951 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1952 
1953 #undef __FUNCT__
1954 #define __FUNCT__ "MatEqual_MPIBAIJ"
1955 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1956 {
1957   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1958   Mat            a,b,c,d;
1959   PetscTruth     flg;
1960   PetscErrorCode ierr;
1961 
1962   PetscFunctionBegin;
1963   a = matA->A; b = matA->B;
1964   c = matB->A; d = matB->B;
1965 
1966   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1967   if (flg) {
1968     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1969   }
1970   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1971   PetscFunctionReturn(0);
1972 }
1973 
1974 
1975 #undef __FUNCT__
1976 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1977 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1978 {
1979   PetscErrorCode ierr;
1980 
1981   PetscFunctionBegin;
1982   ierr =  MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1983   PetscFunctionReturn(0);
1984 }
1985 
1986 /* -------------------------------------------------------------------*/
1987 static struct _MatOps MatOps_Values = {
1988        MatSetValues_MPIBAIJ,
1989        MatGetRow_MPIBAIJ,
1990        MatRestoreRow_MPIBAIJ,
1991        MatMult_MPIBAIJ,
1992 /* 4*/ MatMultAdd_MPIBAIJ,
1993        MatMultTranspose_MPIBAIJ,
1994        MatMultTransposeAdd_MPIBAIJ,
1995        0,
1996        0,
1997        0,
1998 /*10*/ 0,
1999        0,
2000        0,
2001        0,
2002        MatTranspose_MPIBAIJ,
2003 /*15*/ MatGetInfo_MPIBAIJ,
2004        MatEqual_MPIBAIJ,
2005        MatGetDiagonal_MPIBAIJ,
2006        MatDiagonalScale_MPIBAIJ,
2007        MatNorm_MPIBAIJ,
2008 /*20*/ MatAssemblyBegin_MPIBAIJ,
2009        MatAssemblyEnd_MPIBAIJ,
2010        0,
2011        MatSetOption_MPIBAIJ,
2012        MatZeroEntries_MPIBAIJ,
2013 /*25*/ MatZeroRows_MPIBAIJ,
2014        0,
2015        0,
2016        0,
2017        0,
2018 /*30*/ MatSetUpPreallocation_MPIBAIJ,
2019        0,
2020        0,
2021        0,
2022        0,
2023 /*35*/ MatDuplicate_MPIBAIJ,
2024        0,
2025        0,
2026        0,
2027        0,
2028 /*40*/ 0,
2029        MatGetSubMatrices_MPIBAIJ,
2030        MatIncreaseOverlap_MPIBAIJ,
2031        MatGetValues_MPIBAIJ,
2032        0,
2033 /*45*/ MatPrintHelp_MPIBAIJ,
2034        MatScale_MPIBAIJ,
2035        0,
2036        0,
2037        0,
2038 /*50*/ 0,
2039        0,
2040        0,
2041        0,
2042        0,
2043 /*55*/ 0,
2044        0,
2045        MatSetUnfactored_MPIBAIJ,
2046        0,
2047        MatSetValuesBlocked_MPIBAIJ,
2048 /*60*/ 0,
2049        MatDestroy_MPIBAIJ,
2050        MatView_MPIBAIJ,
2051        MatGetPetscMaps_Petsc,
2052        0,
2053 /*65*/ 0,
2054        0,
2055        0,
2056        0,
2057        0,
2058 /*70*/ MatGetRowMax_MPIBAIJ,
2059        0,
2060        0,
2061        0,
2062        0,
2063 /*75*/ 0,
2064        0,
2065        0,
2066        0,
2067        0,
2068 /*80*/ 0,
2069        0,
2070        0,
2071        0,
2072        MatLoad_MPIBAIJ,
2073 /*85*/ 0,
2074        0,
2075        0,
2076        0,
2077        0,
2078 /*90*/ 0,
2079        0,
2080        0,
2081        0,
2082        0,
2083 /*95*/ 0,
2084        0,
2085        0,
2086        0};
2087 
2088 
2089 EXTERN_C_BEGIN
2090 #undef __FUNCT__
2091 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2092 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
2093 {
2094   PetscFunctionBegin;
2095   *a      = ((Mat_MPIBAIJ *)A->data)->A;
2096   *iscopy = PETSC_FALSE;
2097   PetscFunctionReturn(0);
2098 }
2099 EXTERN_C_END
2100 
2101 EXTERN_C_BEGIN
2102 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat,const MatType,MatReuse,Mat*);
2103 EXTERN_C_END
2104 
2105 #undef __FUNCT__
2106 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2107 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt I[],const PetscInt J[],const PetscScalar v[])
2108 {
2109   Mat_MPIBAIJ    *b  = (Mat_MPIBAIJ *)B->data;
2110   PetscInt       m = B->m/bs,cstart = b->cstart, cend = b->cend,j,nnz,i,d;
2111   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = b->rstart,ii;
2112   const PetscInt *JJ;
2113   PetscScalar    *values;
2114   PetscErrorCode ierr;
2115 
2116   PetscFunctionBegin;
2117 #if defined(PETSC_OPT_g)
2118   if (I[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"I[0] must be 0 it is %D",I[0]);
2119 #endif
2120   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2121   o_nnz = d_nnz + m;
2122 
2123   for (i=0; i<m; i++) {
2124     nnz     = I[i+1]- I[i];
2125     JJ      = J + I[i];
2126     nnz_max = PetscMax(nnz_max,nnz);
2127 #if defined(PETSC_OPT_g)
2128     if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2129 #endif
2130     for (j=0; j<nnz; j++) {
2131       if (*JJ >= cstart) break;
2132       JJ++;
2133     }
2134     d = 0;
2135     for (; j<nnz; j++) {
2136       if (*JJ++ >= cend) break;
2137       d++;
2138     }
2139     d_nnz[i] = d;
2140     o_nnz[i] = nnz - d;
2141   }
2142   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2143   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2144 
2145   if (v) values = (PetscScalar*)v;
2146   else {
2147     ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2148     ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2149   }
2150 
2151   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2152   for (i=0; i<m; i++) {
2153     ii   = i + rstart;
2154     nnz  = I[i+1]- I[i];
2155     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+I[i],values,INSERT_VALUES);CHKERRQ(ierr);
2156   }
2157   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2158   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2159   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2160 
2161   if (!v) {
2162     ierr = PetscFree(values);CHKERRQ(ierr);
2163   }
2164   PetscFunctionReturn(0);
2165 }
2166 
2167 #undef __FUNCT__
2168 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2169 /*@C
2170    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2171    (the default parallel PETSc format).
2172 
2173    Collective on MPI_Comm
2174 
2175    Input Parameters:
2176 +  A - the matrix
2177 .  i - the indices into j for the start of each local row (starts with zero)
2178 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2179 -  v - optional values in the matrix
2180 
2181    Level: developer
2182 
2183 .keywords: matrix, aij, compressed row, sparse, parallel
2184 
2185 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2186 @*/
2187 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2188 {
2189   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
2190 
2191   PetscFunctionBegin;
2192   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2193   if (f) {
2194     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
2195   }
2196   PetscFunctionReturn(0);
2197 }
2198 
2199 EXTERN_C_BEGIN
2200 #undef __FUNCT__
2201 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2202 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2203 {
2204   Mat_MPIBAIJ    *b;
2205   PetscErrorCode ierr;
2206   PetscInt       i;
2207 
2208   PetscFunctionBegin;
2209   B->preallocated = PETSC_TRUE;
2210   ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2211 
2212   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2213   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2214   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2215   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2216   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2217   if (d_nnz) {
2218   for (i=0; i<B->m/bs; i++) {
2219       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2220     }
2221   }
2222   if (o_nnz) {
2223     for (i=0; i<B->m/bs; i++) {
2224       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2225     }
2226   }
2227 
2228   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
2229   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
2230   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
2231   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
2232 
2233   b = (Mat_MPIBAIJ*)B->data;
2234   B->bs  = bs;
2235   b->bs2 = bs*bs;
2236   b->mbs = B->m/bs;
2237   b->nbs = B->n/bs;
2238   b->Mbs = B->M/bs;
2239   b->Nbs = B->N/bs;
2240 
2241   ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
2242   b->rowners[0]    = 0;
2243   for (i=2; i<=b->size; i++) {
2244     b->rowners[i] += b->rowners[i-1];
2245   }
2246   b->rstart    = b->rowners[b->rank];
2247   b->rend      = b->rowners[b->rank+1];
2248 
2249   ierr = MPI_Allgather(&b->nbs,1,MPIU_INT,b->cowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
2250   b->cowners[0] = 0;
2251   for (i=2; i<=b->size; i++) {
2252     b->cowners[i] += b->cowners[i-1];
2253   }
2254   b->cstart    = b->cowners[b->rank];
2255   b->cend      = b->cowners[b->rank+1];
2256 
2257   for (i=0; i<=b->size; i++) {
2258     b->rowners_bs[i] = b->rowners[i]*bs;
2259   }
2260   b->rstart_bs = b->rstart*bs;
2261   b->rend_bs   = b->rend*bs;
2262   b->cstart_bs = b->cstart*bs;
2263   b->cend_bs   = b->cend*bs;
2264 
2265   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
2266   ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2267   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2268   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2269   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
2270   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2271   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2272   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2273 
2274   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
2275 
2276   PetscFunctionReturn(0);
2277 }
2278 EXTERN_C_END
2279 
2280 EXTERN_C_BEGIN
2281 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2282 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2283 EXTERN_C_END
2284 
2285 /*MC
2286    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2287 
2288    Options Database Keys:
2289 . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2290 
2291   Level: beginner
2292 
2293 .seealso: MatCreateMPIBAIJ
2294 M*/
2295 
2296 EXTERN_C_BEGIN
2297 #undef __FUNCT__
2298 #define __FUNCT__ "MatCreate_MPIBAIJ"
2299 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B)
2300 {
2301   Mat_MPIBAIJ    *b;
2302   PetscErrorCode ierr;
2303   PetscTruth     flg;
2304 
2305   PetscFunctionBegin;
2306   ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr);
2307   B->data = (void*)b;
2308 
2309   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2310   B->mapping    = 0;
2311   B->factor     = 0;
2312   B->assembled  = PETSC_FALSE;
2313 
2314   B->insertmode = NOT_SET_VALUES;
2315   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
2316   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
2317 
2318   /* build local table of row and column ownerships */
2319   ierr          = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
2320   ierr = PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
2321   b->cowners    = b->rowners + b->size + 2;
2322   b->rowners_bs = b->cowners + b->size + 2;
2323 
2324   /* build cache for off array entries formed */
2325   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
2326   b->donotstash  = PETSC_FALSE;
2327   b->colmap      = PETSC_NULL;
2328   b->garray      = PETSC_NULL;
2329   b->roworiented = PETSC_TRUE;
2330 
2331 #if defined(PETSC_USE_MAT_SINGLE)
2332   /* stuff for MatSetValues_XXX in single precision */
2333   b->setvalueslen     = 0;
2334   b->setvaluescopy    = PETSC_NULL;
2335 #endif
2336 
2337   /* stuff used in block assembly */
2338   b->barray       = 0;
2339 
2340   /* stuff used for matrix vector multiply */
2341   b->lvec         = 0;
2342   b->Mvctx        = 0;
2343 
2344   /* stuff for MatGetRow() */
2345   b->rowindices   = 0;
2346   b->rowvalues    = 0;
2347   b->getrowactive = PETSC_FALSE;
2348 
2349   /* hash table stuff */
2350   b->ht           = 0;
2351   b->hd           = 0;
2352   b->ht_size      = 0;
2353   b->ht_flag      = PETSC_FALSE;
2354   b->ht_fact      = 0;
2355   b->ht_total_ct  = 0;
2356   b->ht_insert_ct = 0;
2357 
2358   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2359   if (flg) {
2360     PetscReal fact = 1.39;
2361     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2362     ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2363     if (fact <= 1.0) fact = 1.39;
2364     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2365     ierr = PetscLogInfo((0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact));CHKERRQ(ierr);
2366   }
2367   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2368                                      "MatStoreValues_MPIBAIJ",
2369                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2370   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2371                                      "MatRetrieveValues_MPIBAIJ",
2372                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2373   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2374                                      "MatGetDiagonalBlock_MPIBAIJ",
2375                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2376   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
2377                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
2378                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2379   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
2380 				     "MatMPIBAIJSetPreallocationCSR_MPIAIJ",
2381 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
2382   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
2383                                      "MatDiagonalScaleLocal_MPIBAIJ",
2384                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
2385   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
2386                                      "MatSetHashTableFactor_MPIBAIJ",
2387                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2388   PetscFunctionReturn(0);
2389 }
2390 EXTERN_C_END
2391 
2392 /*MC
2393    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2394 
2395    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2396    and MATMPIBAIJ otherwise.
2397 
2398    Options Database Keys:
2399 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2400 
2401   Level: beginner
2402 
2403 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2404 M*/
2405 
2406 EXTERN_C_BEGIN
2407 #undef __FUNCT__
2408 #define __FUNCT__ "MatCreate_BAIJ"
2409 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A)
2410 {
2411   PetscErrorCode ierr;
2412   PetscMPIInt    size;
2413 
2414   PetscFunctionBegin;
2415   ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr);
2416   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2417   if (size == 1) {
2418     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
2419   } else {
2420     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
2421   }
2422   PetscFunctionReturn(0);
2423 }
2424 EXTERN_C_END
2425 
2426 #undef __FUNCT__
2427 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
2428 /*@C
2429    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
2430    (block compressed row).  For good matrix assembly performance
2431    the user should preallocate the matrix storage by setting the parameters
2432    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2433    performance can be increased by more than a factor of 50.
2434 
2435    Collective on Mat
2436 
2437    Input Parameters:
2438 +  A - the matrix
2439 .  bs   - size of blockk
2440 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2441            submatrix  (same for all local rows)
2442 .  d_nnz - array containing the number of block nonzeros in the various block rows
2443            of the in diagonal portion of the local (possibly different for each block
2444            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2445 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2446            submatrix (same for all local rows).
2447 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2448            off-diagonal portion of the local submatrix (possibly different for
2449            each block row) or PETSC_NULL.
2450 
2451    If the *_nnz parameter is given then the *_nz parameter is ignored
2452 
2453    Options Database Keys:
2454 .   -mat_no_unroll - uses code that does not unroll the loops in the
2455                      block calculations (much slower)
2456 .   -mat_block_size - size of the blocks to use
2457 
2458    Notes:
2459    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2460    than it must be used on all processors that share the object for that argument.
2461 
2462    Storage Information:
2463    For a square global matrix we define each processor's diagonal portion
2464    to be its local rows and the corresponding columns (a square submatrix);
2465    each processor's off-diagonal portion encompasses the remainder of the
2466    local matrix (a rectangular submatrix).
2467 
2468    The user can specify preallocated storage for the diagonal part of
2469    the local submatrix with either d_nz or d_nnz (not both).  Set
2470    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2471    memory allocation.  Likewise, specify preallocated storage for the
2472    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2473 
2474    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2475    the figure below we depict these three local rows and all columns (0-11).
2476 
2477 .vb
2478            0 1 2 3 4 5 6 7 8 9 10 11
2479           -------------------
2480    row 3  |  o o o d d d o o o o o o
2481    row 4  |  o o o d d d o o o o o o
2482    row 5  |  o o o d d d o o o o o o
2483           -------------------
2484 .ve
2485 
2486    Thus, any entries in the d locations are stored in the d (diagonal)
2487    submatrix, and any entries in the o locations are stored in the
2488    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2489    stored simply in the MATSEQBAIJ format for compressed row storage.
2490 
2491    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2492    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2493    In general, for PDE problems in which most nonzeros are near the diagonal,
2494    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2495    or you will get TERRIBLE performance; see the users' manual chapter on
2496    matrices.
2497 
2498    Level: intermediate
2499 
2500 .keywords: matrix, block, aij, compressed row, sparse, parallel
2501 
2502 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
2503 @*/
2504 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2505 {
2506   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2507 
2508   PetscFunctionBegin;
2509   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2510   if (f) {
2511     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2512   }
2513   PetscFunctionReturn(0);
2514 }
2515 
2516 #undef __FUNCT__
2517 #define __FUNCT__ "MatCreateMPIBAIJ"
2518 /*@C
2519    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
2520    (block compressed row).  For good matrix assembly performance
2521    the user should preallocate the matrix storage by setting the parameters
2522    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2523    performance can be increased by more than a factor of 50.
2524 
2525    Collective on MPI_Comm
2526 
2527    Input Parameters:
2528 +  comm - MPI communicator
2529 .  bs   - size of blockk
2530 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2531            This value should be the same as the local size used in creating the
2532            y vector for the matrix-vector product y = Ax.
2533 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2534            This value should be the same as the local size used in creating the
2535            x vector for the matrix-vector product y = Ax.
2536 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2537 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2538 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
2539            submatrix  (same for all local rows)
2540 .  d_nnz - array containing the number of nonzero blocks in the various block rows
2541            of the in diagonal portion of the local (possibly different for each block
2542            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2543 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
2544            submatrix (same for all local rows).
2545 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
2546            off-diagonal portion of the local submatrix (possibly different for
2547            each block row) or PETSC_NULL.
2548 
2549    Output Parameter:
2550 .  A - the matrix
2551 
2552    Options Database Keys:
2553 .   -mat_no_unroll - uses code that does not unroll the loops in the
2554                      block calculations (much slower)
2555 .   -mat_block_size - size of the blocks to use
2556 
2557    Notes:
2558    If the *_nnz parameter is given then the *_nz parameter is ignored
2559 
2560    A nonzero block is any block that as 1 or more nonzeros in it
2561 
2562    The user MUST specify either the local or global matrix dimensions
2563    (possibly both).
2564 
2565    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2566    than it must be used on all processors that share the object for that argument.
2567 
2568    Storage Information:
2569    For a square global matrix we define each processor's diagonal portion
2570    to be its local rows and the corresponding columns (a square submatrix);
2571    each processor's off-diagonal portion encompasses the remainder of the
2572    local matrix (a rectangular submatrix).
2573 
2574    The user can specify preallocated storage for the diagonal part of
2575    the local submatrix with either d_nz or d_nnz (not both).  Set
2576    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2577    memory allocation.  Likewise, specify preallocated storage for the
2578    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2579 
2580    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2581    the figure below we depict these three local rows and all columns (0-11).
2582 
2583 .vb
2584            0 1 2 3 4 5 6 7 8 9 10 11
2585           -------------------
2586    row 3  |  o o o d d d o o o o o o
2587    row 4  |  o o o d d d o o o o o o
2588    row 5  |  o o o d d d o o o o o o
2589           -------------------
2590 .ve
2591 
2592    Thus, any entries in the d locations are stored in the d (diagonal)
2593    submatrix, and any entries in the o locations are stored in the
2594    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2595    stored simply in the MATSEQBAIJ format for compressed row storage.
2596 
2597    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2598    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2599    In general, for PDE problems in which most nonzeros are near the diagonal,
2600    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2601    or you will get TERRIBLE performance; see the users' manual chapter on
2602    matrices.
2603 
2604    Level: intermediate
2605 
2606 .keywords: matrix, block, aij, compressed row, sparse, parallel
2607 
2608 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2609 @*/
2610 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2611 {
2612   PetscErrorCode ierr;
2613   PetscMPIInt    size;
2614 
2615   PetscFunctionBegin;
2616   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2617   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2618   if (size > 1) {
2619     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
2620     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2621   } else {
2622     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2623     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2624   }
2625   PetscFunctionReturn(0);
2626 }
2627 
2628 #undef __FUNCT__
2629 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
2630 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2631 {
2632   Mat            mat;
2633   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2634   PetscErrorCode ierr;
2635   PetscInt       len=0;
2636 
2637   PetscFunctionBegin;
2638   *newmat       = 0;
2639   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2640   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2641   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2642 
2643   mat->factor       = matin->factor;
2644   mat->preallocated = PETSC_TRUE;
2645   mat->assembled    = PETSC_TRUE;
2646   mat->insertmode   = NOT_SET_VALUES;
2647 
2648   a      = (Mat_MPIBAIJ*)mat->data;
2649   mat->bs  = matin->bs;
2650   a->bs2   = oldmat->bs2;
2651   a->mbs   = oldmat->mbs;
2652   a->nbs   = oldmat->nbs;
2653   a->Mbs   = oldmat->Mbs;
2654   a->Nbs   = oldmat->Nbs;
2655 
2656   a->rstart       = oldmat->rstart;
2657   a->rend         = oldmat->rend;
2658   a->cstart       = oldmat->cstart;
2659   a->cend         = oldmat->cend;
2660   a->size         = oldmat->size;
2661   a->rank         = oldmat->rank;
2662   a->donotstash   = oldmat->donotstash;
2663   a->roworiented  = oldmat->roworiented;
2664   a->rowindices   = 0;
2665   a->rowvalues    = 0;
2666   a->getrowactive = PETSC_FALSE;
2667   a->barray       = 0;
2668   a->rstart_bs    = oldmat->rstart_bs;
2669   a->rend_bs      = oldmat->rend_bs;
2670   a->cstart_bs    = oldmat->cstart_bs;
2671   a->cend_bs      = oldmat->cend_bs;
2672 
2673   /* hash table stuff */
2674   a->ht           = 0;
2675   a->hd           = 0;
2676   a->ht_size      = 0;
2677   a->ht_flag      = oldmat->ht_flag;
2678   a->ht_fact      = oldmat->ht_fact;
2679   a->ht_total_ct  = 0;
2680   a->ht_insert_ct = 0;
2681 
2682   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2683   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2684   ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr);
2685   if (oldmat->colmap) {
2686 #if defined (PETSC_USE_CTABLE)
2687   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2688 #else
2689   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2690   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2691   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2692 #endif
2693   } else a->colmap = 0;
2694 
2695   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2696     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2697     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2698     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2699   } else a->garray = 0;
2700 
2701   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2702   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2703   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2704   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2705 
2706   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2707   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2708   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2709   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2710   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2711   *newmat = mat;
2712 
2713   PetscFunctionReturn(0);
2714 }
2715 
2716 #include "petscsys.h"
2717 
2718 #undef __FUNCT__
2719 #define __FUNCT__ "MatLoad_MPIBAIJ"
2720 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
2721 {
2722   Mat            A;
2723   PetscErrorCode ierr;
2724   int            fd;
2725   PetscInt       i,nz,j,rstart,rend;
2726   PetscScalar    *vals,*buf;
2727   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2728   MPI_Status     status;
2729   PetscMPIInt    rank,size,maxnz;
2730   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
2731   PetscInt       *locrowlens,*procsnz = 0,*browners;
2732   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
2733   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
2734   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2735   PetscInt       dcount,kmax,k,nzcount,tmp,mend;
2736 
2737   PetscFunctionBegin;
2738   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2739 
2740   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2741   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2742   if (!rank) {
2743     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2744     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2745     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2746   }
2747 
2748   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2749   M = header[1]; N = header[2];
2750 
2751   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2752 
2753   /*
2754      This code adds extra rows to make sure the number of rows is
2755      divisible by the blocksize
2756   */
2757   Mbs        = M/bs;
2758   extra_rows = bs - M + bs*Mbs;
2759   if (extra_rows == bs) extra_rows = 0;
2760   else                  Mbs++;
2761   if (extra_rows && !rank) {
2762     ierr = PetscLogInfo((0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr);
2763   }
2764 
2765   /* determine ownership of all rows */
2766   mbs        = Mbs/size + ((Mbs % size) > rank);
2767   m          = mbs*bs;
2768   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
2769   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2770 
2771   /* process 0 needs enough room for process with most rows */
2772   if (!rank) {
2773     mmax = rowners[1];
2774     for (i=2; i<size; i++) {
2775       mmax = PetscMax(mmax,rowners[i]);
2776     }
2777     mmax*=bs;
2778   } else mmax = m;
2779 
2780   rowners[0] = 0;
2781   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
2782   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2783   rstart = rowners[rank];
2784   rend   = rowners[rank+1];
2785 
2786   /* distribute row lengths to all processors */
2787   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
2788   if (!rank) {
2789     mend = m;
2790     if (size == 1) mend = mend - extra_rows;
2791     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
2792     for (j=mend; j<m; j++) locrowlens[j] = 1;
2793     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2794     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2795     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2796     for (j=0; j<m; j++) {
2797       procsnz[0] += locrowlens[j];
2798     }
2799     for (i=1; i<size; i++) {
2800       mend = browners[i+1] - browners[i];
2801       if (i == size-1) mend = mend - extra_rows;
2802       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
2803       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
2804       /* calculate the number of nonzeros on each processor */
2805       for (j=0; j<browners[i+1]-browners[i]; j++) {
2806         procsnz[i] += rowlengths[j];
2807       }
2808       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2809     }
2810     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2811   } else {
2812     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2813   }
2814 
2815   if (!rank) {
2816     /* determine max buffer needed and allocate it */
2817     maxnz = procsnz[0];
2818     for (i=1; i<size; i++) {
2819       maxnz = PetscMax(maxnz,procsnz[i]);
2820     }
2821     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2822 
2823     /* read in my part of the matrix column indices  */
2824     nz     = procsnz[0];
2825     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2826     mycols = ibuf;
2827     if (size == 1)  nz -= extra_rows;
2828     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2829     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2830 
2831     /* read in every ones (except the last) and ship off */
2832     for (i=1; i<size-1; i++) {
2833       nz   = procsnz[i];
2834       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2835       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2836     }
2837     /* read in the stuff for the last proc */
2838     if (size != 1) {
2839       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2840       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2841       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2842       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2843     }
2844     ierr = PetscFree(cols);CHKERRQ(ierr);
2845   } else {
2846     /* determine buffer space needed for message */
2847     nz = 0;
2848     for (i=0; i<m; i++) {
2849       nz += locrowlens[i];
2850     }
2851     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2852     mycols = ibuf;
2853     /* receive message of column indices*/
2854     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2855     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2856     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2857   }
2858 
2859   /* loop over local rows, determining number of off diagonal entries */
2860   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2861   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2862   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2863   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2864   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2865   rowcount = 0; nzcount = 0;
2866   for (i=0; i<mbs; i++) {
2867     dcount  = 0;
2868     odcount = 0;
2869     for (j=0; j<bs; j++) {
2870       kmax = locrowlens[rowcount];
2871       for (k=0; k<kmax; k++) {
2872         tmp = mycols[nzcount++]/bs;
2873         if (!mask[tmp]) {
2874           mask[tmp] = 1;
2875           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2876           else masked1[dcount++] = tmp;
2877         }
2878       }
2879       rowcount++;
2880     }
2881 
2882     dlens[i]  = dcount;
2883     odlens[i] = odcount;
2884 
2885     /* zero out the mask elements we set */
2886     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2887     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2888   }
2889 
2890   /* create our matrix */
2891   ierr = MatCreate(comm,m,m,M+extra_rows,N+extra_rows,&A);CHKERRQ(ierr);
2892   ierr = MatSetType(A,type);CHKERRQ(ierr)
2893   ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2894 
2895   /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */
2896   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2897 
2898   if (!rank) {
2899     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2900     /* read in my part of the matrix numerical values  */
2901     nz = procsnz[0];
2902     vals = buf;
2903     mycols = ibuf;
2904     if (size == 1)  nz -= extra_rows;
2905     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2906     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2907 
2908     /* insert into matrix */
2909     jj      = rstart*bs;
2910     for (i=0; i<m; i++) {
2911       ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2912       mycols += locrowlens[i];
2913       vals   += locrowlens[i];
2914       jj++;
2915     }
2916     /* read in other processors (except the last one) and ship out */
2917     for (i=1; i<size-1; i++) {
2918       nz   = procsnz[i];
2919       vals = buf;
2920       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2921       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2922     }
2923     /* the last proc */
2924     if (size != 1){
2925       nz   = procsnz[i] - extra_rows;
2926       vals = buf;
2927       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2928       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2929       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2930     }
2931     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2932   } else {
2933     /* receive numeric values */
2934     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2935 
2936     /* receive message of values*/
2937     vals   = buf;
2938     mycols = ibuf;
2939     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2940     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2941     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2942 
2943     /* insert into matrix */
2944     jj      = rstart*bs;
2945     for (i=0; i<m; i++) {
2946       ierr    = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2947       mycols += locrowlens[i];
2948       vals   += locrowlens[i];
2949       jj++;
2950     }
2951   }
2952   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2953   ierr = PetscFree(buf);CHKERRQ(ierr);
2954   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2955   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2956   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2957   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2958   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2959   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2960 
2961   *newmat = A;
2962   PetscFunctionReturn(0);
2963 }
2964 
2965 #undef __FUNCT__
2966 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
2967 /*@
2968    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2969 
2970    Input Parameters:
2971 .  mat  - the matrix
2972 .  fact - factor
2973 
2974    Collective on Mat
2975 
2976    Level: advanced
2977 
2978   Notes:
2979    This can also be set by the command line option: -mat_use_hash_table fact
2980 
2981 .keywords: matrix, hashtable, factor, HT
2982 
2983 .seealso: MatSetOption()
2984 @*/
2985 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2986 {
2987   PetscErrorCode ierr,(*f)(Mat,PetscReal);
2988 
2989   PetscFunctionBegin;
2990   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
2991   if (f) {
2992     ierr = (*f)(mat,fact);CHKERRQ(ierr);
2993   }
2994   PetscFunctionReturn(0);
2995 }
2996 
2997 EXTERN_C_BEGIN
2998 #undef __FUNCT__
2999 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3000 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3001 {
3002   Mat_MPIBAIJ *baij;
3003 
3004   PetscFunctionBegin;
3005   baij = (Mat_MPIBAIJ*)mat->data;
3006   baij->ht_fact = fact;
3007   PetscFunctionReturn(0);
3008 }
3009 EXTERN_C_END
3010 
3011 #undef __FUNCT__
3012 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3013 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3014 {
3015   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3016   PetscFunctionBegin;
3017   *Ad     = a->A;
3018   *Ao     = a->B;
3019   *colmap = a->garray;
3020   PetscFunctionReturn(0);
3021 }
3022