xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 4e0d8c253c5baa1c48327ddb2b78bf7a43f58497)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4 
5 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
7 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
8 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
9 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
10 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
11 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
12 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
14 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar);
15 
16 /*  UGLY, ugly, ugly
17    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
18    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
19    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
20    converts the entries into single precision and then calls ..._MatScalar() to put them
21    into the single precision data structures.
22 */
23 #if defined(PETSC_USE_MAT_SINGLE)
24 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
25 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
26 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
27 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
28 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
29 #else
30 #define MatSetValuesBlocked_SeqBAIJ_MatScalar      MatSetValuesBlocked_SeqBAIJ
31 #define MatSetValues_MPIBAIJ_MatScalar             MatSetValues_MPIBAIJ
32 #define MatSetValuesBlocked_MPIBAIJ_MatScalar      MatSetValuesBlocked_MPIBAIJ
33 #define MatSetValues_MPIBAIJ_HT_MatScalar          MatSetValues_MPIBAIJ_HT
34 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar   MatSetValuesBlocked_MPIBAIJ_HT
35 #endif
36 
37 #undef __FUNCT__
38 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
39 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
40 {
41   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
42   PetscErrorCode ierr;
43   PetscInt       i,*idxb = 0;
44   PetscScalar    *va,*vb;
45   Vec            vtmp;
46 
47   PetscFunctionBegin;
48 
49   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
50   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
51   if (idx) {
52     for (i=0; i<A->cmap.n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap.rstart;}
53   }
54 
55   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap.n,&vtmp);CHKERRQ(ierr);
56   if (idx) {ierr = PetscMalloc(A->rmap.n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
57   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
58   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
59 
60   for (i=0; i<A->rmap.n; i++){
61     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap.bs*a->garray[idxb[i]/A->cmap.bs] + (idxb[i] % A->cmap.bs);}
62   }
63 
64   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
65   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
66   if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);}
67   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
68   PetscFunctionReturn(0);
69 }
70 
71 EXTERN_C_BEGIN
72 #undef __FUNCT__
73 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
74 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat)
75 {
76   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
77   PetscErrorCode ierr;
78 
79   PetscFunctionBegin;
80   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
81   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
82   PetscFunctionReturn(0);
83 }
84 EXTERN_C_END
85 
86 EXTERN_C_BEGIN
87 #undef __FUNCT__
88 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
89 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat)
90 {
91   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
92   PetscErrorCode ierr;
93 
94   PetscFunctionBegin;
95   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
96   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
97   PetscFunctionReturn(0);
98 }
99 EXTERN_C_END
100 
101 /*
102      Local utility routine that creates a mapping from the global column
103    number to the local number in the off-diagonal part of the local
104    storage of the matrix.  This is done in a non scable way since the
105    length of colmap equals the global matrix length.
106 */
107 #undef __FUNCT__
108 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
109 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
110 {
111   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
112   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
113   PetscErrorCode ierr;
114   PetscInt       nbs = B->nbs,i,bs=mat->rmap.bs;
115 
116   PetscFunctionBegin;
117 #if defined (PETSC_USE_CTABLE)
118   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
119   for (i=0; i<nbs; i++){
120     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
121   }
122 #else
123   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
124   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
125   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
126   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
127 #endif
128   PetscFunctionReturn(0);
129 }
130 
131 #define CHUNKSIZE  10
132 
133 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
134 { \
135  \
136     brow = row/bs;  \
137     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
138     rmax = aimax[brow]; nrow = ailen[brow]; \
139       bcol = col/bs; \
140       ridx = row % bs; cidx = col % bs; \
141       low = 0; high = nrow; \
142       while (high-low > 3) { \
143         t = (low+high)/2; \
144         if (rp[t] > bcol) high = t; \
145         else              low  = t; \
146       } \
147       for (_i=low; _i<high; _i++) { \
148         if (rp[_i] > bcol) break; \
149         if (rp[_i] == bcol) { \
150           bap  = ap +  bs2*_i + bs*cidx + ridx; \
151           if (addv == ADD_VALUES) *bap += value;  \
152           else                    *bap  = value;  \
153           goto a_noinsert; \
154         } \
155       } \
156       if (a->nonew == 1) goto a_noinsert; \
157       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
158       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
159       N = nrow++ - 1;  \
160       /* shift up all the later entries in this row */ \
161       for (ii=N; ii>=_i; ii--) { \
162         rp[ii+1] = rp[ii]; \
163         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
164       } \
165       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
166       rp[_i]                      = bcol;  \
167       ap[bs2*_i + bs*cidx + ridx] = value;  \
168       a_noinsert:; \
169     ailen[brow] = nrow; \
170 }
171 
172 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
173 { \
174     brow = row/bs;  \
175     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
176     rmax = bimax[brow]; nrow = bilen[brow]; \
177       bcol = col/bs; \
178       ridx = row % bs; cidx = col % bs; \
179       low = 0; high = nrow; \
180       while (high-low > 3) { \
181         t = (low+high)/2; \
182         if (rp[t] > bcol) high = t; \
183         else              low  = t; \
184       } \
185       for (_i=low; _i<high; _i++) { \
186         if (rp[_i] > bcol) break; \
187         if (rp[_i] == bcol) { \
188           bap  = ap +  bs2*_i + bs*cidx + ridx; \
189           if (addv == ADD_VALUES) *bap += value;  \
190           else                    *bap  = value;  \
191           goto b_noinsert; \
192         } \
193       } \
194       if (b->nonew == 1) goto b_noinsert; \
195       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
196       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
197       CHKMEMQ;\
198       N = nrow++ - 1;  \
199       /* shift up all the later entries in this row */ \
200       for (ii=N; ii>=_i; ii--) { \
201         rp[ii+1] = rp[ii]; \
202         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
203       } \
204       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
205       rp[_i]                      = bcol;  \
206       ap[bs2*_i + bs*cidx + ridx] = value;  \
207       b_noinsert:; \
208     bilen[brow] = nrow; \
209 }
210 
211 #if defined(PETSC_USE_MAT_SINGLE)
212 #undef __FUNCT__
213 #define __FUNCT__ "MatSetValues_MPIBAIJ"
214 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
215 {
216   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
217   PetscErrorCode ierr;
218   PetscInt       i,N = m*n;
219   MatScalar      *vsingle;
220 
221   PetscFunctionBegin;
222   if (N > b->setvalueslen) {
223     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
224     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
225     b->setvalueslen  = N;
226   }
227   vsingle = b->setvaluescopy;
228 
229   for (i=0; i<N; i++) {
230     vsingle[i] = v[i];
231   }
232   ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
233   PetscFunctionReturn(0);
234 }
235 
236 #undef __FUNCT__
237 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
238 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
239 {
240   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
241   PetscErrorCode ierr;
242   PetscInt       i,N = m*n*b->bs2;
243   MatScalar      *vsingle;
244 
245   PetscFunctionBegin;
246   if (N > b->setvalueslen) {
247     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
248     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
249     b->setvalueslen  = N;
250   }
251   vsingle = b->setvaluescopy;
252   for (i=0; i<N; i++) {
253     vsingle[i] = v[i];
254   }
255   ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
256   PetscFunctionReturn(0);
257 }
258 
259 #undef __FUNCT__
260 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
261 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
262 {
263   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
264   PetscErrorCode ierr;
265   PetscInt       i,N = m*n;
266   MatScalar      *vsingle;
267 
268   PetscFunctionBegin;
269   if (N > b->setvalueslen) {
270     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
271     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
272     b->setvalueslen  = N;
273   }
274   vsingle = b->setvaluescopy;
275   for (i=0; i<N; i++) {
276     vsingle[i] = v[i];
277   }
278   ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
279   PetscFunctionReturn(0);
280 }
281 
282 #undef __FUNCT__
283 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
284 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
285 {
286   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
287   PetscErrorCode ierr;
288   PetscInt       i,N = m*n*b->bs2;
289   MatScalar      *vsingle;
290 
291   PetscFunctionBegin;
292   if (N > b->setvalueslen) {
293     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
294     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
295     b->setvalueslen  = N;
296   }
297   vsingle = b->setvaluescopy;
298   for (i=0; i<N; i++) {
299     vsingle[i] = v[i];
300   }
301   ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
302   PetscFunctionReturn(0);
303 }
304 #endif
305 
306 #undef __FUNCT__
307 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
308 PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
309 {
310   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
311   MatScalar      value;
312   PetscTruth     roworiented = baij->roworiented;
313   PetscErrorCode ierr;
314   PetscInt       i,j,row,col;
315   PetscInt       rstart_orig=mat->rmap.rstart;
316   PetscInt       rend_orig=mat->rmap.rend,cstart_orig=mat->cmap.rstart;
317   PetscInt       cend_orig=mat->cmap.rend,bs=mat->rmap.bs;
318 
319   /* Some Variables required in the macro */
320   Mat            A = baij->A;
321   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
322   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
323   MatScalar      *aa=a->a;
324 
325   Mat            B = baij->B;
326   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
327   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
328   MatScalar      *ba=b->a;
329 
330   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
331   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
332   MatScalar      *ap,*bap;
333 
334   PetscFunctionBegin;
335   for (i=0; i<m; i++) {
336     if (im[i] < 0) continue;
337 #if defined(PETSC_USE_DEBUG)
338     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
339 #endif
340     if (im[i] >= rstart_orig && im[i] < rend_orig) {
341       row = im[i] - rstart_orig;
342       for (j=0; j<n; j++) {
343         if (in[j] >= cstart_orig && in[j] < cend_orig){
344           col = in[j] - cstart_orig;
345           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
346           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
347           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
348         } else if (in[j] < 0) continue;
349 #if defined(PETSC_USE_DEBUG)
350         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap.N-1);}
351 #endif
352         else {
353           if (mat->was_assembled) {
354             if (!baij->colmap) {
355               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
356             }
357 #if defined (PETSC_USE_CTABLE)
358             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
359             col  = col - 1;
360 #else
361             col = baij->colmap[in[j]/bs] - 1;
362 #endif
363             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
364               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
365               col =  in[j];
366               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
367               B = baij->B;
368               b = (Mat_SeqBAIJ*)(B)->data;
369               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
370               ba=b->a;
371             } else col += in[j]%bs;
372           } else col = in[j];
373           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
374           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
375           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
376         }
377       }
378     } else {
379       if (!baij->donotstash) {
380         if (roworiented) {
381           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
382         } else {
383           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
384         }
385       }
386     }
387   }
388   PetscFunctionReturn(0);
389 }
390 
391 #undef __FUNCT__
392 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar"
393 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
394 {
395   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
396   const MatScalar *value;
397   MatScalar       *barray=baij->barray;
398   PetscTruth      roworiented = baij->roworiented;
399   PetscErrorCode  ierr;
400   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
401   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
402   PetscInt        cend=baij->cendbs,bs=mat->rmap.bs,bs2=baij->bs2;
403 
404   PetscFunctionBegin;
405   if(!barray) {
406     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
407     baij->barray = barray;
408   }
409 
410   if (roworiented) {
411     stepval = (n-1)*bs;
412   } else {
413     stepval = (m-1)*bs;
414   }
415   for (i=0; i<m; i++) {
416     if (im[i] < 0) continue;
417 #if defined(PETSC_USE_DEBUG)
418     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
419 #endif
420     if (im[i] >= rstart && im[i] < rend) {
421       row = im[i] - rstart;
422       for (j=0; j<n; j++) {
423         /* If NumCol = 1 then a copy is not required */
424         if ((roworiented) && (n == 1)) {
425           barray = (MatScalar*)v + i*bs2;
426         } else if((!roworiented) && (m == 1)) {
427           barray = (MatScalar*)v + j*bs2;
428         } else { /* Here a copy is required */
429           if (roworiented) {
430             value = v + i*(stepval+bs)*bs + j*bs;
431           } else {
432             value = v + j*(stepval+bs)*bs + i*bs;
433           }
434           for (ii=0; ii<bs; ii++,value+=stepval) {
435             for (jj=0; jj<bs; jj++) {
436               *barray++  = *value++;
437             }
438           }
439           barray -=bs2;
440         }
441 
442         if (in[j] >= cstart && in[j] < cend){
443           col  = in[j] - cstart;
444           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
445         }
446         else if (in[j] < 0) continue;
447 #if defined(PETSC_USE_DEBUG)
448         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
449 #endif
450         else {
451           if (mat->was_assembled) {
452             if (!baij->colmap) {
453               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
454             }
455 
456 #if defined(PETSC_USE_DEBUG)
457 #if defined (PETSC_USE_CTABLE)
458             { PetscInt data;
459               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
460               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
461             }
462 #else
463             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
464 #endif
465 #endif
466 #if defined (PETSC_USE_CTABLE)
467 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
468             col  = (col - 1)/bs;
469 #else
470             col = (baij->colmap[in[j]] - 1)/bs;
471 #endif
472             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
473               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
474               col =  in[j];
475             }
476           }
477           else col = in[j];
478           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
479         }
480       }
481     } else {
482       if (!baij->donotstash) {
483         if (roworiented) {
484           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
485         } else {
486           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
487         }
488       }
489     }
490   }
491   PetscFunctionReturn(0);
492 }
493 
494 #define HASH_KEY 0.6180339887
495 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
496 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
497 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
498 #undef __FUNCT__
499 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar"
500 PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
501 {
502   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
503   PetscTruth     roworiented = baij->roworiented;
504   PetscErrorCode ierr;
505   PetscInt       i,j,row,col;
506   PetscInt       rstart_orig=mat->rmap.rstart;
507   PetscInt       rend_orig=mat->rmap.rend,Nbs=baij->Nbs;
508   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap.bs,*HT=baij->ht,idx;
509   PetscReal      tmp;
510   MatScalar      **HD = baij->hd,value;
511 #if defined(PETSC_USE_DEBUG)
512   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
513 #endif
514 
515   PetscFunctionBegin;
516 
517   for (i=0; i<m; i++) {
518 #if defined(PETSC_USE_DEBUG)
519     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
520     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
521 #endif
522       row = im[i];
523     if (row >= rstart_orig && row < rend_orig) {
524       for (j=0; j<n; j++) {
525         col = in[j];
526         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
527         /* Look up PetscInto the Hash Table */
528         key = (row/bs)*Nbs+(col/bs)+1;
529         h1  = HASH(size,key,tmp);
530 
531 
532         idx = h1;
533 #if defined(PETSC_USE_DEBUG)
534         insert_ct++;
535         total_ct++;
536         if (HT[idx] != key) {
537           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
538           if (idx == size) {
539             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
540             if (idx == h1) {
541               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
542             }
543           }
544         }
545 #else
546         if (HT[idx] != key) {
547           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
548           if (idx == size) {
549             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
550             if (idx == h1) {
551               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
552             }
553           }
554         }
555 #endif
556         /* A HASH table entry is found, so insert the values at the correct address */
557         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
558         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
559       }
560     } else {
561       if (!baij->donotstash) {
562         if (roworiented) {
563           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
564         } else {
565           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
566         }
567       }
568     }
569   }
570 #if defined(PETSC_USE_DEBUG)
571   baij->ht_total_ct = total_ct;
572   baij->ht_insert_ct = insert_ct;
573 #endif
574   PetscFunctionReturn(0);
575 }
576 
577 #undef __FUNCT__
578 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar"
579 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
580 {
581   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
582   PetscTruth      roworiented = baij->roworiented;
583   PetscErrorCode  ierr;
584   PetscInt        i,j,ii,jj,row,col;
585   PetscInt        rstart=baij->rstartbs;
586   PetscInt        rend=mat->rmap.rend,stepval,bs=mat->rmap.bs,bs2=baij->bs2,nbs2=n*bs2;
587   PetscInt        h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
588   PetscReal       tmp;
589   MatScalar       **HD = baij->hd,*baij_a;
590   const MatScalar *v_t,*value;
591 #if defined(PETSC_USE_DEBUG)
592   PetscInt        total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
593 #endif
594 
595   PetscFunctionBegin;
596 
597   if (roworiented) {
598     stepval = (n-1)*bs;
599   } else {
600     stepval = (m-1)*bs;
601   }
602   for (i=0; i<m; i++) {
603 #if defined(PETSC_USE_DEBUG)
604     if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
605     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
606 #endif
607     row   = im[i];
608     v_t   = v + i*nbs2;
609     if (row >= rstart && row < rend) {
610       for (j=0; j<n; j++) {
611         col = in[j];
612 
613         /* Look up into the Hash Table */
614         key = row*Nbs+col+1;
615         h1  = HASH(size,key,tmp);
616 
617         idx = h1;
618 #if defined(PETSC_USE_DEBUG)
619         total_ct++;
620         insert_ct++;
621        if (HT[idx] != key) {
622           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
623           if (idx == size) {
624             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
625             if (idx == h1) {
626               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
627             }
628           }
629         }
630 #else
631         if (HT[idx] != key) {
632           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
633           if (idx == size) {
634             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
635             if (idx == h1) {
636               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
637             }
638           }
639         }
640 #endif
641         baij_a = HD[idx];
642         if (roworiented) {
643           /*value = v + i*(stepval+bs)*bs + j*bs;*/
644           /* value = v + (i*(stepval+bs)+j)*bs; */
645           value = v_t;
646           v_t  += bs;
647           if (addv == ADD_VALUES) {
648             for (ii=0; ii<bs; ii++,value+=stepval) {
649               for (jj=ii; jj<bs2; jj+=bs) {
650                 baij_a[jj]  += *value++;
651               }
652             }
653           } else {
654             for (ii=0; ii<bs; ii++,value+=stepval) {
655               for (jj=ii; jj<bs2; jj+=bs) {
656                 baij_a[jj]  = *value++;
657               }
658             }
659           }
660         } else {
661           value = v + j*(stepval+bs)*bs + i*bs;
662           if (addv == ADD_VALUES) {
663             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
664               for (jj=0; jj<bs; jj++) {
665                 baij_a[jj]  += *value++;
666               }
667             }
668           } else {
669             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
670               for (jj=0; jj<bs; jj++) {
671                 baij_a[jj]  = *value++;
672               }
673             }
674           }
675         }
676       }
677     } else {
678       if (!baij->donotstash) {
679         if (roworiented) {
680           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
681         } else {
682           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
683         }
684       }
685     }
686   }
687 #if defined(PETSC_USE_DEBUG)
688   baij->ht_total_ct = total_ct;
689   baij->ht_insert_ct = insert_ct;
690 #endif
691   PetscFunctionReturn(0);
692 }
693 
694 #undef __FUNCT__
695 #define __FUNCT__ "MatGetValues_MPIBAIJ"
696 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
697 {
698   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
699   PetscErrorCode ierr;
700   PetscInt       bs=mat->rmap.bs,i,j,bsrstart = mat->rmap.rstart,bsrend = mat->rmap.rend;
701   PetscInt       bscstart = mat->cmap.rstart,bscend = mat->cmap.rend,row,col,data;
702 
703   PetscFunctionBegin;
704   for (i=0; i<m; i++) {
705     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
706     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
707     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
708       row = idxm[i] - bsrstart;
709       for (j=0; j<n; j++) {
710         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
711         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
712         if (idxn[j] >= bscstart && idxn[j] < bscend){
713           col = idxn[j] - bscstart;
714           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
715         } else {
716           if (!baij->colmap) {
717             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
718           }
719 #if defined (PETSC_USE_CTABLE)
720           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
721           data --;
722 #else
723           data = baij->colmap[idxn[j]/bs]-1;
724 #endif
725           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
726           else {
727             col  = data + idxn[j]%bs;
728             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
729           }
730         }
731       }
732     } else {
733       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
734     }
735   }
736  PetscFunctionReturn(0);
737 }
738 
739 #undef __FUNCT__
740 #define __FUNCT__ "MatNorm_MPIBAIJ"
741 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
742 {
743   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
744   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
745   PetscErrorCode ierr;
746   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap.bs,nz,row,col;
747   PetscReal      sum = 0.0;
748   MatScalar      *v;
749 
750   PetscFunctionBegin;
751   if (baij->size == 1) {
752     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
753   } else {
754     if (type == NORM_FROBENIUS) {
755       v = amat->a;
756       nz = amat->nz*bs2;
757       for (i=0; i<nz; i++) {
758 #if defined(PETSC_USE_COMPLEX)
759         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
760 #else
761         sum += (*v)*(*v); v++;
762 #endif
763       }
764       v = bmat->a;
765       nz = bmat->nz*bs2;
766       for (i=0; i<nz; i++) {
767 #if defined(PETSC_USE_COMPLEX)
768         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
769 #else
770         sum += (*v)*(*v); v++;
771 #endif
772       }
773       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
774       *nrm = sqrt(*nrm);
775     } else if (type == NORM_1) { /* max column sum */
776       PetscReal *tmp,*tmp2;
777       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
778       ierr = PetscMalloc((2*mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
779       tmp2 = tmp + mat->cmap.N;
780       ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
781       v = amat->a; jj = amat->j;
782       for (i=0; i<amat->nz; i++) {
783         for (j=0; j<bs; j++){
784           col = bs*(cstart + *jj) + j; /* column index */
785           for (row=0; row<bs; row++){
786             tmp[col] += PetscAbsScalar(*v);  v++;
787           }
788         }
789         jj++;
790       }
791       v = bmat->a; jj = bmat->j;
792       for (i=0; i<bmat->nz; i++) {
793         for (j=0; j<bs; j++){
794           col = bs*garray[*jj] + j;
795           for (row=0; row<bs; row++){
796             tmp[col] += PetscAbsScalar(*v); v++;
797           }
798         }
799         jj++;
800       }
801       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
802       *nrm = 0.0;
803       for (j=0; j<mat->cmap.N; j++) {
804         if (tmp2[j] > *nrm) *nrm = tmp2[j];
805       }
806       ierr = PetscFree(tmp);CHKERRQ(ierr);
807     } else if (type == NORM_INFINITY) { /* max row sum */
808       PetscReal *sums;
809       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr)
810       sum = 0.0;
811       for (j=0; j<amat->mbs; j++) {
812         for (row=0; row<bs; row++) sums[row] = 0.0;
813         v = amat->a + bs2*amat->i[j];
814         nz = amat->i[j+1]-amat->i[j];
815         for (i=0; i<nz; i++) {
816           for (col=0; col<bs; col++){
817             for (row=0; row<bs; row++){
818               sums[row] += PetscAbsScalar(*v); v++;
819             }
820           }
821         }
822         v = bmat->a + bs2*bmat->i[j];
823         nz = bmat->i[j+1]-bmat->i[j];
824         for (i=0; i<nz; i++) {
825           for (col=0; col<bs; col++){
826             for (row=0; row<bs; row++){
827               sums[row] += PetscAbsScalar(*v); v++;
828             }
829           }
830         }
831         for (row=0; row<bs; row++){
832           if (sums[row] > sum) sum = sums[row];
833         }
834       }
835       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
836       ierr = PetscFree(sums);CHKERRQ(ierr);
837     } else {
838       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
839     }
840   }
841   PetscFunctionReturn(0);
842 }
843 
844 /*
845   Creates the hash table, and sets the table
846   This table is created only once.
847   If new entried need to be added to the matrix
848   then the hash table has to be destroyed and
849   recreated.
850 */
851 #undef __FUNCT__
852 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
853 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
854 {
855   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
856   Mat            A = baij->A,B=baij->B;
857   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
858   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
859   PetscErrorCode ierr;
860   PetscInt       size,bs2=baij->bs2,rstart=baij->rstartbs;
861   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
862   PetscInt       *HT,key;
863   MatScalar      **HD;
864   PetscReal      tmp;
865 #if defined(PETSC_USE_INFO)
866   PetscInt       ct=0,max=0;
867 #endif
868 
869   PetscFunctionBegin;
870   baij->ht_size=(PetscInt)(factor*nz);
871   size = baij->ht_size;
872 
873   if (baij->ht) {
874     PetscFunctionReturn(0);
875   }
876 
877   /* Allocate Memory for Hash Table */
878   ierr     = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr);
879   baij->ht = (PetscInt*)(baij->hd + size);
880   HD       = baij->hd;
881   HT       = baij->ht;
882 
883 
884   ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr);
885 
886 
887   /* Loop Over A */
888   for (i=0; i<a->mbs; i++) {
889     for (j=ai[i]; j<ai[i+1]; j++) {
890       row = i+rstart;
891       col = aj[j]+cstart;
892 
893       key = row*Nbs + col + 1;
894       h1  = HASH(size,key,tmp);
895       for (k=0; k<size; k++){
896         if (!HT[(h1+k)%size]) {
897           HT[(h1+k)%size] = key;
898           HD[(h1+k)%size] = a->a + j*bs2;
899           break;
900 #if defined(PETSC_USE_INFO)
901         } else {
902           ct++;
903 #endif
904         }
905       }
906 #if defined(PETSC_USE_INFO)
907       if (k> max) max = k;
908 #endif
909     }
910   }
911   /* Loop Over B */
912   for (i=0; i<b->mbs; i++) {
913     for (j=bi[i]; j<bi[i+1]; j++) {
914       row = i+rstart;
915       col = garray[bj[j]];
916       key = row*Nbs + col + 1;
917       h1  = HASH(size,key,tmp);
918       for (k=0; k<size; k++){
919         if (!HT[(h1+k)%size]) {
920           HT[(h1+k)%size] = key;
921           HD[(h1+k)%size] = b->a + j*bs2;
922           break;
923 #if defined(PETSC_USE_INFO)
924         } else {
925           ct++;
926 #endif
927         }
928       }
929 #if defined(PETSC_USE_INFO)
930       if (k> max) max = k;
931 #endif
932     }
933   }
934 
935   /* Print Summary */
936 #if defined(PETSC_USE_INFO)
937   for (i=0,j=0; i<size; i++) {
938     if (HT[i]) {j++;}
939   }
940   ierr = PetscInfo2(0,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
941 #endif
942   PetscFunctionReturn(0);
943 }
944 
945 #undef __FUNCT__
946 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
947 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
948 {
949   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
950   PetscErrorCode ierr;
951   PetscInt       nstash,reallocs;
952   InsertMode     addv;
953 
954   PetscFunctionBegin;
955   if (baij->donotstash) {
956     PetscFunctionReturn(0);
957   }
958 
959   /* make sure all processors are either in INSERTMODE or ADDMODE */
960   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
961   if (addv == (ADD_VALUES|INSERT_VALUES)) {
962     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
963   }
964   mat->insertmode = addv; /* in case this processor had no cache */
965 
966   ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr);
967   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rangebs);CHKERRQ(ierr);
968   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
969   ierr = PetscInfo2(0,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
970   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
971   ierr = PetscInfo2(0,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
972   PetscFunctionReturn(0);
973 }
974 
975 #undef __FUNCT__
976 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
977 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
978 {
979   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
980   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
981   PetscErrorCode ierr;
982   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
983   PetscInt       *row,*col,other_disassembled;
984   PetscTruth     r1,r2,r3;
985   MatScalar      *val;
986   InsertMode     addv = mat->insertmode;
987   PetscMPIInt    n;
988 
989   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
990   PetscFunctionBegin;
991   if (!baij->donotstash) {
992     while (1) {
993       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
994       if (!flg) break;
995 
996       for (i=0; i<n;) {
997         /* Now identify the consecutive vals belonging to the same row */
998         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
999         if (j < n) ncols = j-i;
1000         else       ncols = n-i;
1001         /* Now assemble all these values with a single function call */
1002         ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
1003         i = j;
1004       }
1005     }
1006     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
1007     /* Now process the block-stash. Since the values are stashed column-oriented,
1008        set the roworiented flag to column oriented, and after MatSetValues()
1009        restore the original flags */
1010     r1 = baij->roworiented;
1011     r2 = a->roworiented;
1012     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
1013     baij->roworiented = PETSC_FALSE;
1014     a->roworiented    = PETSC_FALSE;
1015     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
1016     while (1) {
1017       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1018       if (!flg) break;
1019 
1020       for (i=0; i<n;) {
1021         /* Now identify the consecutive vals belonging to the same row */
1022         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1023         if (j < n) ncols = j-i;
1024         else       ncols = n-i;
1025         ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
1026         i = j;
1027       }
1028     }
1029     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
1030     baij->roworiented = r1;
1031     a->roworiented    = r2;
1032     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
1033   }
1034 
1035   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
1036   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
1037 
1038   /* determine if any processor has disassembled, if so we must
1039      also disassemble ourselfs, in order that we may reassemble. */
1040   /*
1041      if nonzero structure of submatrix B cannot change then we know that
1042      no processor disassembled thus we can skip this stuff
1043   */
1044   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
1045     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
1046     if (mat->was_assembled && !other_disassembled) {
1047       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
1048     }
1049   }
1050 
1051   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1052     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
1053   }
1054   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
1055   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1056   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1057 
1058 #if defined(PETSC_USE_INFO)
1059   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1060     ierr = PetscInfo1(0,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
1061     baij->ht_total_ct  = 0;
1062     baij->ht_insert_ct = 0;
1063   }
1064 #endif
1065   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1066     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1067     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1068     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1069   }
1070 
1071   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1072   baij->rowvalues = 0;
1073   PetscFunctionReturn(0);
1074 }
1075 
1076 #undef __FUNCT__
1077 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
1078 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1079 {
1080   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
1081   PetscErrorCode    ierr;
1082   PetscMPIInt       size = baij->size,rank = baij->rank;
1083   PetscInt          bs = mat->rmap.bs;
1084   PetscTruth        iascii,isdraw;
1085   PetscViewer       sviewer;
1086   PetscViewerFormat format;
1087 
1088   PetscFunctionBegin;
1089   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1090   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1091   if (iascii) {
1092     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1093     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1094       MatInfo info;
1095       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
1096       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1097       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
1098               rank,mat->rmap.N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
1099               mat->rmap.bs,(PetscInt)info.memory);CHKERRQ(ierr);
1100       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1101       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
1102       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1103       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
1104       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1105       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1106       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1107       PetscFunctionReturn(0);
1108     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1109       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1110       PetscFunctionReturn(0);
1111     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1112       PetscFunctionReturn(0);
1113     }
1114   }
1115 
1116   if (isdraw) {
1117     PetscDraw       draw;
1118     PetscTruth isnull;
1119     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1120     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1121   }
1122 
1123   if (size == 1) {
1124     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
1125     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1126   } else {
1127     /* assemble the entire matrix onto first processor. */
1128     Mat         A;
1129     Mat_SeqBAIJ *Aloc;
1130     PetscInt    M = mat->rmap.N,N = mat->cmap.N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1131     MatScalar   *a;
1132 
1133     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1134     /* Perhaps this should be the type of mat? */
1135     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
1136     if (!rank) {
1137       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1138     } else {
1139       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1140     }
1141     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1142     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1143     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1144 
1145     /* copy over the A part */
1146     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1147     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1148     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1149 
1150     for (i=0; i<mbs; i++) {
1151       rvals[0] = bs*(baij->rstartbs + i);
1152       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1153       for (j=ai[i]; j<ai[i+1]; j++) {
1154         col = (baij->cstartbs+aj[j])*bs;
1155         for (k=0; k<bs; k++) {
1156           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1157           col++; a += bs;
1158         }
1159       }
1160     }
1161     /* copy over the B part */
1162     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1163     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1164     for (i=0; i<mbs; i++) {
1165       rvals[0] = bs*(baij->rstartbs + i);
1166       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1167       for (j=ai[i]; j<ai[i+1]; j++) {
1168         col = baij->garray[aj[j]]*bs;
1169         for (k=0; k<bs; k++) {
1170           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1171           col++; a += bs;
1172         }
1173       }
1174     }
1175     ierr = PetscFree(rvals);CHKERRQ(ierr);
1176     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1177     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1178     /*
1179        Everyone has to call to draw the matrix since the graphics waits are
1180        synchronized across all processors that share the PetscDraw object
1181     */
1182     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1183     if (!rank) {
1184       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1185       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1186     }
1187     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1188     ierr = MatDestroy(A);CHKERRQ(ierr);
1189   }
1190   PetscFunctionReturn(0);
1191 }
1192 
1193 #undef __FUNCT__
1194 #define __FUNCT__ "MatView_MPIBAIJ"
1195 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1196 {
1197   PetscErrorCode ierr;
1198   PetscTruth     iascii,isdraw,issocket,isbinary;
1199 
1200   PetscFunctionBegin;
1201   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1202   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1203   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1204   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1205   if (iascii || isdraw || issocket || isbinary) {
1206     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1207   } else {
1208     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1209   }
1210   PetscFunctionReturn(0);
1211 }
1212 
1213 #undef __FUNCT__
1214 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1215 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1216 {
1217   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1218   PetscErrorCode ierr;
1219 
1220   PetscFunctionBegin;
1221 #if defined(PETSC_USE_LOG)
1222   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap.N,mat->cmap.N);
1223 #endif
1224   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1225   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1226   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1227   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1228 #if defined (PETSC_USE_CTABLE)
1229   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
1230 #else
1231   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1232 #endif
1233   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1234   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1235   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1236   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1237   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1238   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
1239 #if defined(PETSC_USE_MAT_SINGLE)
1240   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
1241 #endif
1242   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1243   ierr = PetscFree(baij);CHKERRQ(ierr);
1244 
1245   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1246   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1247   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1248   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1249   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1250   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1251   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1252   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1253   PetscFunctionReturn(0);
1254 }
1255 
1256 #undef __FUNCT__
1257 #define __FUNCT__ "MatMult_MPIBAIJ"
1258 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1259 {
1260   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1261   PetscErrorCode ierr;
1262   PetscInt       nt;
1263 
1264   PetscFunctionBegin;
1265   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1266   if (nt != A->cmap.n) {
1267     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1268   }
1269   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1270   if (nt != A->rmap.n) {
1271     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1272   }
1273   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1274   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1275   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1276   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1277   PetscFunctionReturn(0);
1278 }
1279 
1280 #undef __FUNCT__
1281 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1282 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1283 {
1284   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1285   PetscErrorCode ierr;
1286 
1287   PetscFunctionBegin;
1288   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1289   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1290   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1291   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1292   PetscFunctionReturn(0);
1293 }
1294 
1295 #undef __FUNCT__
1296 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1297 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1298 {
1299   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1300   PetscErrorCode ierr;
1301   PetscTruth     merged;
1302 
1303   PetscFunctionBegin;
1304   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1305   /* do nondiagonal part */
1306   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1307   if (!merged) {
1308     /* send it on its way */
1309     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1310     /* do local part */
1311     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1312     /* receive remote parts: note this assumes the values are not actually */
1313     /* inserted in yy until the next line */
1314     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1315   } else {
1316     /* do local part */
1317     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1318     /* send it on its way */
1319     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1320     /* values actually were received in the Begin() but we need to call this nop */
1321     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1322   }
1323   PetscFunctionReturn(0);
1324 }
1325 
1326 #undef __FUNCT__
1327 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1328 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1329 {
1330   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1331   PetscErrorCode ierr;
1332 
1333   PetscFunctionBegin;
1334   /* do nondiagonal part */
1335   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1336   /* send it on its way */
1337   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1338   /* do local part */
1339   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1340   /* receive remote parts: note this assumes the values are not actually */
1341   /* inserted in yy until the next line, which is true for my implementation*/
1342   /* but is not perhaps always true. */
1343   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1344   PetscFunctionReturn(0);
1345 }
1346 
1347 /*
1348   This only works correctly for square matrices where the subblock A->A is the
1349    diagonal block
1350 */
1351 #undef __FUNCT__
1352 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1353 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1354 {
1355   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1356   PetscErrorCode ierr;
1357 
1358   PetscFunctionBegin;
1359   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1360   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1361   PetscFunctionReturn(0);
1362 }
1363 
1364 #undef __FUNCT__
1365 #define __FUNCT__ "MatScale_MPIBAIJ"
1366 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1367 {
1368   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1369   PetscErrorCode ierr;
1370 
1371   PetscFunctionBegin;
1372   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1373   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1374   PetscFunctionReturn(0);
1375 }
1376 
1377 #undef __FUNCT__
1378 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1379 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1380 {
1381   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1382   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1383   PetscErrorCode ierr;
1384   PetscInt       bs = matin->rmap.bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1385   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap.rstart,brend = matin->rmap.rend;
1386   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1387 
1388   PetscFunctionBegin;
1389   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1390   mat->getrowactive = PETSC_TRUE;
1391 
1392   if (!mat->rowvalues && (idx || v)) {
1393     /*
1394         allocate enough space to hold information from the longest row.
1395     */
1396     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1397     PetscInt     max = 1,mbs = mat->mbs,tmp;
1398     for (i=0; i<mbs; i++) {
1399       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1400       if (max < tmp) { max = tmp; }
1401     }
1402     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1403     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1404   }
1405 
1406   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1407   lrow = row - brstart;
1408 
1409   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1410   if (!v)   {pvA = 0; pvB = 0;}
1411   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1412   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1413   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1414   nztot = nzA + nzB;
1415 
1416   cmap  = mat->garray;
1417   if (v  || idx) {
1418     if (nztot) {
1419       /* Sort by increasing column numbers, assuming A and B already sorted */
1420       PetscInt imark = -1;
1421       if (v) {
1422         *v = v_p = mat->rowvalues;
1423         for (i=0; i<nzB; i++) {
1424           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1425           else break;
1426         }
1427         imark = i;
1428         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1429         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1430       }
1431       if (idx) {
1432         *idx = idx_p = mat->rowindices;
1433         if (imark > -1) {
1434           for (i=0; i<imark; i++) {
1435             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1436           }
1437         } else {
1438           for (i=0; i<nzB; i++) {
1439             if (cmap[cworkB[i]/bs] < cstart)
1440               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1441             else break;
1442           }
1443           imark = i;
1444         }
1445         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1446         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1447       }
1448     } else {
1449       if (idx) *idx = 0;
1450       if (v)   *v   = 0;
1451     }
1452   }
1453   *nz = nztot;
1454   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1455   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1456   PetscFunctionReturn(0);
1457 }
1458 
1459 #undef __FUNCT__
1460 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1461 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1462 {
1463   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1464 
1465   PetscFunctionBegin;
1466   if (!baij->getrowactive) {
1467     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1468   }
1469   baij->getrowactive = PETSC_FALSE;
1470   PetscFunctionReturn(0);
1471 }
1472 
1473 #undef __FUNCT__
1474 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1475 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1476 {
1477   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1478   PetscErrorCode ierr;
1479 
1480   PetscFunctionBegin;
1481   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1482   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1483   PetscFunctionReturn(0);
1484 }
1485 
1486 #undef __FUNCT__
1487 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1488 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1489 {
1490   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1491   Mat            A = a->A,B = a->B;
1492   PetscErrorCode ierr;
1493   PetscReal      isend[5],irecv[5];
1494 
1495   PetscFunctionBegin;
1496   info->block_size     = (PetscReal)matin->rmap.bs;
1497   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1498   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1499   isend[3] = info->memory;  isend[4] = info->mallocs;
1500   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1501   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1502   isend[3] += info->memory;  isend[4] += info->mallocs;
1503   if (flag == MAT_LOCAL) {
1504     info->nz_used      = isend[0];
1505     info->nz_allocated = isend[1];
1506     info->nz_unneeded  = isend[2];
1507     info->memory       = isend[3];
1508     info->mallocs      = isend[4];
1509   } else if (flag == MAT_GLOBAL_MAX) {
1510     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1511     info->nz_used      = irecv[0];
1512     info->nz_allocated = irecv[1];
1513     info->nz_unneeded  = irecv[2];
1514     info->memory       = irecv[3];
1515     info->mallocs      = irecv[4];
1516   } else if (flag == MAT_GLOBAL_SUM) {
1517     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1518     info->nz_used      = irecv[0];
1519     info->nz_allocated = irecv[1];
1520     info->nz_unneeded  = irecv[2];
1521     info->memory       = irecv[3];
1522     info->mallocs      = irecv[4];
1523   } else {
1524     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1525   }
1526   info->rows_global       = (PetscReal)A->rmap.N;
1527   info->columns_global    = (PetscReal)A->cmap.N;
1528   info->rows_local        = (PetscReal)A->rmap.N;
1529   info->columns_local     = (PetscReal)A->cmap.N;
1530   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1531   info->fill_ratio_needed = 0;
1532   info->factor_mallocs    = 0;
1533   PetscFunctionReturn(0);
1534 }
1535 
1536 #undef __FUNCT__
1537 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1538 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscTruth flg)
1539 {
1540   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1541   PetscErrorCode ierr;
1542 
1543   PetscFunctionBegin;
1544   switch (op) {
1545   case MAT_NO_NEW_NONZERO_LOCATIONS:
1546   case MAT_COLUMNS_SORTED:
1547   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1548   case MAT_KEEP_ZEROED_ROWS:
1549   case MAT_NEW_NONZERO_LOCATION_ERR:
1550     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1551     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1552     break;
1553   case MAT_ROW_ORIENTED:
1554     a->roworiented = flg;
1555     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1556     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1557     break;
1558   case MAT_ROWS_SORTED:
1559   case MAT_NEW_DIAGONALS:
1560     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1561     break;
1562   case MAT_IGNORE_OFF_PROC_ENTRIES:
1563     a->donotstash = flg;
1564     break;
1565   case MAT_USE_HASH_TABLE:
1566     a->ht_flag = flg;
1567     break;
1568   case MAT_SYMMETRIC:
1569   case MAT_STRUCTURALLY_SYMMETRIC:
1570   case MAT_HERMITIAN:
1571   case MAT_SYMMETRY_ETERNAL:
1572     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1573     break;
1574   default:
1575     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1576   }
1577   PetscFunctionReturn(0);
1578 }
1579 
1580 #undef __FUNCT__
1581 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1582 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1583 {
1584   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1585   Mat_SeqBAIJ    *Aloc;
1586   Mat            B;
1587   PetscErrorCode ierr;
1588   PetscInt       M=A->rmap.N,N=A->cmap.N,*ai,*aj,i,*rvals,j,k,col;
1589   PetscInt       bs=A->rmap.bs,mbs=baij->mbs;
1590   MatScalar      *a;
1591 
1592   PetscFunctionBegin;
1593   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1594   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
1595   ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr);
1596   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1597   ierr = MatMPIBAIJSetPreallocation(B,A->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1598 
1599   /* copy over the A part */
1600   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1601   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1602   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1603 
1604   for (i=0; i<mbs; i++) {
1605     rvals[0] = bs*(baij->rstartbs + i);
1606     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1607     for (j=ai[i]; j<ai[i+1]; j++) {
1608       col = (baij->cstartbs+aj[j])*bs;
1609       for (k=0; k<bs; k++) {
1610         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1611         col++; a += bs;
1612       }
1613     }
1614   }
1615   /* copy over the B part */
1616   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1617   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1618   for (i=0; i<mbs; i++) {
1619     rvals[0] = bs*(baij->rstartbs + i);
1620     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1621     for (j=ai[i]; j<ai[i+1]; j++) {
1622       col = baij->garray[aj[j]]*bs;
1623       for (k=0; k<bs; k++) {
1624         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1625         col++; a += bs;
1626       }
1627     }
1628   }
1629   ierr = PetscFree(rvals);CHKERRQ(ierr);
1630   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1631   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1632 
1633   if (matout) {
1634     *matout = B;
1635   } else {
1636     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1637   }
1638   PetscFunctionReturn(0);
1639 }
1640 
1641 #undef __FUNCT__
1642 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1643 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1644 {
1645   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1646   Mat            a = baij->A,b = baij->B;
1647   PetscErrorCode ierr;
1648   PetscInt       s1,s2,s3;
1649 
1650   PetscFunctionBegin;
1651   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1652   if (rr) {
1653     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1654     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1655     /* Overlap communication with computation. */
1656     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1657   }
1658   if (ll) {
1659     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1660     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1661     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1662   }
1663   /* scale  the diagonal block */
1664   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1665 
1666   if (rr) {
1667     /* Do a scatter end and then right scale the off-diagonal block */
1668     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1669     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1670   }
1671 
1672   PetscFunctionReturn(0);
1673 }
1674 
1675 #undef __FUNCT__
1676 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1677 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
1678 {
1679   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1680   PetscErrorCode ierr;
1681   PetscMPIInt    imdex,size = l->size,n,rank = l->rank;
1682   PetscInt       i,*owners = A->rmap.range;
1683   PetscInt       *nprocs,j,idx,nsends,row;
1684   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
1685   PetscInt       *rvalues,tag = A->tag,count,base,slen,*source,lastidx = -1;
1686   PetscInt       *lens,*lrows,*values,rstart_bs=A->rmap.rstart;
1687   MPI_Comm       comm = A->comm;
1688   MPI_Request    *send_waits,*recv_waits;
1689   MPI_Status     recv_status,*send_status;
1690 #if defined(PETSC_DEBUG)
1691   PetscTruth     found = PETSC_FALSE;
1692 #endif
1693 
1694   PetscFunctionBegin;
1695   /*  first count number of contributors to each processor */
1696   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1697   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1698   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1699   j = 0;
1700   for (i=0; i<N; i++) {
1701     if (lastidx > (idx = rows[i])) j = 0;
1702     lastidx = idx;
1703     for (; j<size; j++) {
1704       if (idx >= owners[j] && idx < owners[j+1]) {
1705         nprocs[2*j]++;
1706         nprocs[2*j+1] = 1;
1707         owner[i] = j;
1708 #if defined(PETSC_DEBUG)
1709         found = PETSC_TRUE;
1710 #endif
1711         break;
1712       }
1713     }
1714 #if defined(PETSC_DEBUG)
1715     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1716     found = PETSC_FALSE;
1717 #endif
1718   }
1719   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1720 
1721   /* inform other processors of number of messages and max length*/
1722   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1723 
1724   /* post receives:   */
1725   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1726   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1727   for (i=0; i<nrecvs; i++) {
1728     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1729   }
1730 
1731   /* do sends:
1732      1) starts[i] gives the starting index in svalues for stuff going to
1733      the ith processor
1734   */
1735   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1736   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1737   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1738   starts[0]  = 0;
1739   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1740   for (i=0; i<N; i++) {
1741     svalues[starts[owner[i]]++] = rows[i];
1742   }
1743 
1744   starts[0] = 0;
1745   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1746   count = 0;
1747   for (i=0; i<size; i++) {
1748     if (nprocs[2*i+1]) {
1749       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1750     }
1751   }
1752   ierr = PetscFree(starts);CHKERRQ(ierr);
1753 
1754   base = owners[rank];
1755 
1756   /*  wait on receives */
1757   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
1758   source = lens + nrecvs;
1759   count  = nrecvs; slen = 0;
1760   while (count) {
1761     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1762     /* unpack receives into our local space */
1763     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1764     source[imdex]  = recv_status.MPI_SOURCE;
1765     lens[imdex]    = n;
1766     slen          += n;
1767     count--;
1768   }
1769   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1770 
1771   /* move the data into the send scatter */
1772   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1773   count = 0;
1774   for (i=0; i<nrecvs; i++) {
1775     values = rvalues + i*nmax;
1776     for (j=0; j<lens[i]; j++) {
1777       lrows[count++] = values[j] - base;
1778     }
1779   }
1780   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1781   ierr = PetscFree(lens);CHKERRQ(ierr);
1782   ierr = PetscFree(owner);CHKERRQ(ierr);
1783   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1784 
1785   /* actually zap the local rows */
1786   /*
1787         Zero the required rows. If the "diagonal block" of the matrix
1788      is square and the user wishes to set the diagonal we use separate
1789      code so that MatSetValues() is not called for each diagonal allocating
1790      new memory, thus calling lots of mallocs and slowing things down.
1791 
1792        Contributed by: Matthew Knepley
1793   */
1794   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1795   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr);
1796   if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
1797     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr);
1798   } else if (diag != 0.0) {
1799     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1800     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1801       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1802 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1803     }
1804     for (i=0; i<slen; i++) {
1805       row  = lrows[i] + rstart_bs;
1806       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1807     }
1808     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1809     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1810   } else {
1811     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1812   }
1813 
1814   ierr = PetscFree(lrows);CHKERRQ(ierr);
1815 
1816   /* wait on sends */
1817   if (nsends) {
1818     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1819     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1820     ierr = PetscFree(send_status);CHKERRQ(ierr);
1821   }
1822   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1823   ierr = PetscFree(svalues);CHKERRQ(ierr);
1824 
1825   PetscFunctionReturn(0);
1826 }
1827 
1828 #undef __FUNCT__
1829 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1830 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1831 {
1832   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1833   PetscErrorCode ierr;
1834 
1835   PetscFunctionBegin;
1836   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1837   PetscFunctionReturn(0);
1838 }
1839 
1840 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1841 
1842 #undef __FUNCT__
1843 #define __FUNCT__ "MatEqual_MPIBAIJ"
1844 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1845 {
1846   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1847   Mat            a,b,c,d;
1848   PetscTruth     flg;
1849   PetscErrorCode ierr;
1850 
1851   PetscFunctionBegin;
1852   a = matA->A; b = matA->B;
1853   c = matB->A; d = matB->B;
1854 
1855   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1856   if (flg) {
1857     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1858   }
1859   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1860   PetscFunctionReturn(0);
1861 }
1862 
1863 #undef __FUNCT__
1864 #define __FUNCT__ "MatCopy_MPIBAIJ"
1865 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1866 {
1867   PetscErrorCode ierr;
1868   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1869   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1870 
1871   PetscFunctionBegin;
1872   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1873   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1874     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1875   } else {
1876     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1877     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1878   }
1879   PetscFunctionReturn(0);
1880 }
1881 
1882 #undef __FUNCT__
1883 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1884 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1885 {
1886   PetscErrorCode ierr;
1887 
1888   PetscFunctionBegin;
1889   ierr =  MatMPIBAIJSetPreallocation(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1890   PetscFunctionReturn(0);
1891 }
1892 
1893 #include "petscblaslapack.h"
1894 #undef __FUNCT__
1895 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1896 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1897 {
1898   PetscErrorCode ierr;
1899   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1900   PetscBLASInt   bnz,one=1;
1901   Mat_SeqBAIJ    *x,*y;
1902 
1903   PetscFunctionBegin;
1904   if (str == SAME_NONZERO_PATTERN) {
1905     PetscScalar alpha = a;
1906     x = (Mat_SeqBAIJ *)xx->A->data;
1907     y = (Mat_SeqBAIJ *)yy->A->data;
1908     bnz = (PetscBLASInt)x->nz;
1909     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1910     x = (Mat_SeqBAIJ *)xx->B->data;
1911     y = (Mat_SeqBAIJ *)yy->B->data;
1912     bnz = (PetscBLASInt)x->nz;
1913     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1914   } else {
1915     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1916   }
1917   PetscFunctionReturn(0);
1918 }
1919 
1920 #undef __FUNCT__
1921 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1922 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1923 {
1924   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1925   PetscErrorCode ierr;
1926 
1927   PetscFunctionBegin;
1928   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1929   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1930   PetscFunctionReturn(0);
1931 }
1932 
1933 #undef __FUNCT__
1934 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
1935 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1936 {
1937   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1938   PetscErrorCode ierr;
1939 
1940   PetscFunctionBegin;
1941   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1942   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1943   PetscFunctionReturn(0);
1944 }
1945 
1946 /* -------------------------------------------------------------------*/
1947 static struct _MatOps MatOps_Values = {
1948        MatSetValues_MPIBAIJ,
1949        MatGetRow_MPIBAIJ,
1950        MatRestoreRow_MPIBAIJ,
1951        MatMult_MPIBAIJ,
1952 /* 4*/ MatMultAdd_MPIBAIJ,
1953        MatMultTranspose_MPIBAIJ,
1954        MatMultTransposeAdd_MPIBAIJ,
1955        0,
1956        0,
1957        0,
1958 /*10*/ 0,
1959        0,
1960        0,
1961        0,
1962        MatTranspose_MPIBAIJ,
1963 /*15*/ MatGetInfo_MPIBAIJ,
1964        MatEqual_MPIBAIJ,
1965        MatGetDiagonal_MPIBAIJ,
1966        MatDiagonalScale_MPIBAIJ,
1967        MatNorm_MPIBAIJ,
1968 /*20*/ MatAssemblyBegin_MPIBAIJ,
1969        MatAssemblyEnd_MPIBAIJ,
1970        0,
1971        MatSetOption_MPIBAIJ,
1972        MatZeroEntries_MPIBAIJ,
1973 /*25*/ MatZeroRows_MPIBAIJ,
1974        0,
1975        0,
1976        0,
1977        0,
1978 /*30*/ MatSetUpPreallocation_MPIBAIJ,
1979        0,
1980        0,
1981        0,
1982        0,
1983 /*35*/ MatDuplicate_MPIBAIJ,
1984        0,
1985        0,
1986        0,
1987        0,
1988 /*40*/ MatAXPY_MPIBAIJ,
1989        MatGetSubMatrices_MPIBAIJ,
1990        MatIncreaseOverlap_MPIBAIJ,
1991        MatGetValues_MPIBAIJ,
1992        MatCopy_MPIBAIJ,
1993 /*45*/ 0,
1994        MatScale_MPIBAIJ,
1995        0,
1996        0,
1997        0,
1998 /*50*/ 0,
1999        0,
2000        0,
2001        0,
2002        0,
2003 /*55*/ 0,
2004        0,
2005        MatSetUnfactored_MPIBAIJ,
2006        0,
2007        MatSetValuesBlocked_MPIBAIJ,
2008 /*60*/ 0,
2009        MatDestroy_MPIBAIJ,
2010        MatView_MPIBAIJ,
2011        0,
2012        0,
2013 /*65*/ 0,
2014        0,
2015        0,
2016        0,
2017        0,
2018 /*70*/ MatGetRowMaxAbs_MPIBAIJ,
2019        0,
2020        0,
2021        0,
2022        0,
2023 /*75*/ 0,
2024        0,
2025        0,
2026        0,
2027        0,
2028 /*80*/ 0,
2029        0,
2030        0,
2031        0,
2032        MatLoad_MPIBAIJ,
2033 /*85*/ 0,
2034        0,
2035        0,
2036        0,
2037        0,
2038 /*90*/ 0,
2039        0,
2040        0,
2041        0,
2042        0,
2043 /*95*/ 0,
2044        0,
2045        0,
2046        0,
2047        0,
2048 /*100*/0,
2049        0,
2050        0,
2051        0,
2052        0,
2053 /*105*/0,
2054        MatRealPart_MPIBAIJ,
2055        MatImaginaryPart_MPIBAIJ};
2056 
2057 
2058 EXTERN_C_BEGIN
2059 #undef __FUNCT__
2060 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2061 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
2062 {
2063   PetscFunctionBegin;
2064   *a      = ((Mat_MPIBAIJ *)A->data)->A;
2065   *iscopy = PETSC_FALSE;
2066   PetscFunctionReturn(0);
2067 }
2068 EXTERN_C_END
2069 
2070 EXTERN_C_BEGIN
2071 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2072 EXTERN_C_END
2073 
2074 #undef __FUNCT__
2075 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2076 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
2077 {
2078   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)B->data;
2079   PetscInt       m = B->rmap.n/bs,cstart = baij->cstartbs, cend = baij->cendbs,j,nnz,i,d;
2080   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = baij->rstartbs,ii;
2081   const PetscInt *JJ;
2082   PetscScalar    *values;
2083   PetscErrorCode ierr;
2084 
2085   PetscFunctionBegin;
2086 #if defined(PETSC_OPT_g)
2087   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"Ii[0] must be 0 it is %D",Ii[0]);
2088 #endif
2089   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2090   o_nnz = d_nnz + m;
2091 
2092   for (i=0; i<m; i++) {
2093     nnz     = Ii[i+1]- Ii[i];
2094     JJ      = J + Ii[i];
2095     nnz_max = PetscMax(nnz_max,nnz);
2096 #if defined(PETSC_OPT_g)
2097     if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2098 #endif
2099     for (j=0; j<nnz; j++) {
2100       if (*JJ >= cstart) break;
2101       JJ++;
2102     }
2103     d = 0;
2104     for (; j<nnz; j++) {
2105       if (*JJ++ >= cend) break;
2106       d++;
2107     }
2108     d_nnz[i] = d;
2109     o_nnz[i] = nnz - d;
2110   }
2111   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2112   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2113 
2114   if (v) values = (PetscScalar*)v;
2115   else {
2116     ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2117     ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2118   }
2119 
2120   ierr = MatSetOption(B,MAT_COLUMNS_SORTED,PETSC_TRUE);CHKERRQ(ierr);
2121   for (i=0; i<m; i++) {
2122     ii   = i + rstart;
2123     nnz  = Ii[i+1]- Ii[i];
2124     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2125   }
2126   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2127   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2128   ierr = MatSetOption(B,MAT_COLUMNS_SORTED,PETSC_FALSE);CHKERRQ(ierr);
2129 
2130   if (!v) {
2131     ierr = PetscFree(values);CHKERRQ(ierr);
2132   }
2133   PetscFunctionReturn(0);
2134 }
2135 
2136 #undef __FUNCT__
2137 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2138 /*@C
2139    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2140    (the default parallel PETSc format).
2141 
2142    Collective on MPI_Comm
2143 
2144    Input Parameters:
2145 +  A - the matrix
2146 .  i - the indices into j for the start of each local row (starts with zero)
2147 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2148 -  v - optional values in the matrix
2149 
2150    Level: developer
2151 
2152 .keywords: matrix, aij, compressed row, sparse, parallel
2153 
2154 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2155 @*/
2156 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2157 {
2158   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
2159 
2160   PetscFunctionBegin;
2161   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2162   if (f) {
2163     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
2164   }
2165   PetscFunctionReturn(0);
2166 }
2167 
2168 EXTERN_C_BEGIN
2169 #undef __FUNCT__
2170 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2171 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2172 {
2173   Mat_MPIBAIJ    *b;
2174   PetscErrorCode ierr;
2175   PetscInt       i;
2176 
2177   PetscFunctionBegin;
2178   B->preallocated = PETSC_TRUE;
2179   ierr = PetscOptionsBegin(B->comm,B->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr);
2180     ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2181   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2182 
2183   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2184   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2185   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2186   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2187   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2188 
2189   B->rmap.bs  = bs;
2190   B->cmap.bs  = bs;
2191   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2192   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2193 
2194   if (d_nnz) {
2195     for (i=0; i<B->rmap.n/bs; i++) {
2196       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2197     }
2198   }
2199   if (o_nnz) {
2200     for (i=0; i<B->rmap.n/bs; i++) {
2201       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2202     }
2203   }
2204 
2205   b = (Mat_MPIBAIJ*)B->data;
2206   b->bs2 = bs*bs;
2207   b->mbs = B->rmap.n/bs;
2208   b->nbs = B->cmap.n/bs;
2209   b->Mbs = B->rmap.N/bs;
2210   b->Nbs = B->cmap.N/bs;
2211 
2212   for (i=0; i<=b->size; i++) {
2213     b->rangebs[i] = B->rmap.range[i]/bs;
2214   }
2215   b->rstartbs = B->rmap.rstart/bs;
2216   b->rendbs   = B->rmap.rend/bs;
2217   b->cstartbs = B->cmap.rstart/bs;
2218   b->cendbs   = B->cmap.rend/bs;
2219 
2220   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2221   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
2222   ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2223   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2224   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2225   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2226   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
2227   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2228   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2229   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2230 
2231   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
2232 
2233   PetscFunctionReturn(0);
2234 }
2235 EXTERN_C_END
2236 
2237 EXTERN_C_BEGIN
2238 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2239 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2240 EXTERN_C_END
2241 
2242 /*MC
2243    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2244 
2245    Options Database Keys:
2246 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2247 . -mat_block_size <bs> - set the blocksize used to store the matrix
2248 - -mat_use_hash_table <fact>
2249 
2250   Level: beginner
2251 
2252 .seealso: MatCreateMPIBAIJ
2253 M*/
2254 
2255 EXTERN_C_BEGIN
2256 #undef __FUNCT__
2257 #define __FUNCT__ "MatCreate_MPIBAIJ"
2258 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B)
2259 {
2260   Mat_MPIBAIJ    *b;
2261   PetscErrorCode ierr;
2262   PetscTruth     flg;
2263 
2264   PetscFunctionBegin;
2265   ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
2266   B->data = (void*)b;
2267 
2268 
2269   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2270   B->mapping    = 0;
2271   B->factor     = 0;
2272   B->assembled  = PETSC_FALSE;
2273 
2274   B->insertmode = NOT_SET_VALUES;
2275   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
2276   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
2277 
2278   /* build local table of row and column ownerships */
2279   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
2280 
2281   /* build cache for off array entries formed */
2282   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
2283   b->donotstash  = PETSC_FALSE;
2284   b->colmap      = PETSC_NULL;
2285   b->garray      = PETSC_NULL;
2286   b->roworiented = PETSC_TRUE;
2287 
2288 #if defined(PETSC_USE_MAT_SINGLE)
2289   /* stuff for MatSetValues_XXX in single precision */
2290   b->setvalueslen     = 0;
2291   b->setvaluescopy    = PETSC_NULL;
2292 #endif
2293 
2294   /* stuff used in block assembly */
2295   b->barray       = 0;
2296 
2297   /* stuff used for matrix vector multiply */
2298   b->lvec         = 0;
2299   b->Mvctx        = 0;
2300 
2301   /* stuff for MatGetRow() */
2302   b->rowindices   = 0;
2303   b->rowvalues    = 0;
2304   b->getrowactive = PETSC_FALSE;
2305 
2306   /* hash table stuff */
2307   b->ht           = 0;
2308   b->hd           = 0;
2309   b->ht_size      = 0;
2310   b->ht_flag      = PETSC_FALSE;
2311   b->ht_fact      = 0;
2312   b->ht_total_ct  = 0;
2313   b->ht_insert_ct = 0;
2314 
2315   ierr = PetscOptionsBegin(B->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
2316     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2317     if (flg) {
2318       PetscReal fact = 1.39;
2319       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2320       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
2321       if (fact <= 1.0) fact = 1.39;
2322       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2323       ierr = PetscInfo1(0,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2324     }
2325   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2326 
2327   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2328                                      "MatStoreValues_MPIBAIJ",
2329                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2330   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2331                                      "MatRetrieveValues_MPIBAIJ",
2332                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2333   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2334                                      "MatGetDiagonalBlock_MPIBAIJ",
2335                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2336   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
2337                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
2338                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2339   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
2340 				     "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
2341 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
2342   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
2343                                      "MatDiagonalScaleLocal_MPIBAIJ",
2344                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
2345   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
2346                                      "MatSetHashTableFactor_MPIBAIJ",
2347                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2348   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
2349   PetscFunctionReturn(0);
2350 }
2351 EXTERN_C_END
2352 
2353 /*MC
2354    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2355 
2356    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2357    and MATMPIBAIJ otherwise.
2358 
2359    Options Database Keys:
2360 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2361 
2362   Level: beginner
2363 
2364 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2365 M*/
2366 
2367 EXTERN_C_BEGIN
2368 #undef __FUNCT__
2369 #define __FUNCT__ "MatCreate_BAIJ"
2370 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A)
2371 {
2372   PetscErrorCode ierr;
2373   PetscMPIInt    size;
2374 
2375   PetscFunctionBegin;
2376   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2377   if (size == 1) {
2378     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
2379   } else {
2380     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
2381   }
2382   PetscFunctionReturn(0);
2383 }
2384 EXTERN_C_END
2385 
2386 #undef __FUNCT__
2387 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
2388 /*@C
2389    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
2390    (block compressed row).  For good matrix assembly performance
2391    the user should preallocate the matrix storage by setting the parameters
2392    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2393    performance can be increased by more than a factor of 50.
2394 
2395    Collective on Mat
2396 
2397    Input Parameters:
2398 +  A - the matrix
2399 .  bs   - size of blockk
2400 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2401            submatrix  (same for all local rows)
2402 .  d_nnz - array containing the number of block nonzeros in the various block rows
2403            of the in diagonal portion of the local (possibly different for each block
2404            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2405 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2406            submatrix (same for all local rows).
2407 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2408            off-diagonal portion of the local submatrix (possibly different for
2409            each block row) or PETSC_NULL.
2410 
2411    If the *_nnz parameter is given then the *_nz parameter is ignored
2412 
2413    Options Database Keys:
2414 +   -mat_block_size - size of the blocks to use
2415 -   -mat_use_hash_table <fact>
2416 
2417    Notes:
2418    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2419    than it must be used on all processors that share the object for that argument.
2420 
2421    Storage Information:
2422    For a square global matrix we define each processor's diagonal portion
2423    to be its local rows and the corresponding columns (a square submatrix);
2424    each processor's off-diagonal portion encompasses the remainder of the
2425    local matrix (a rectangular submatrix).
2426 
2427    The user can specify preallocated storage for the diagonal part of
2428    the local submatrix with either d_nz or d_nnz (not both).  Set
2429    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2430    memory allocation.  Likewise, specify preallocated storage for the
2431    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2432 
2433    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2434    the figure below we depict these three local rows and all columns (0-11).
2435 
2436 .vb
2437            0 1 2 3 4 5 6 7 8 9 10 11
2438           -------------------
2439    row 3  |  o o o d d d o o o o o o
2440    row 4  |  o o o d d d o o o o o o
2441    row 5  |  o o o d d d o o o o o o
2442           -------------------
2443 .ve
2444 
2445    Thus, any entries in the d locations are stored in the d (diagonal)
2446    submatrix, and any entries in the o locations are stored in the
2447    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2448    stored simply in the MATSEQBAIJ format for compressed row storage.
2449 
2450    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2451    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2452    In general, for PDE problems in which most nonzeros are near the diagonal,
2453    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2454    or you will get TERRIBLE performance; see the users' manual chapter on
2455    matrices.
2456 
2457    Level: intermediate
2458 
2459 .keywords: matrix, block, aij, compressed row, sparse, parallel
2460 
2461 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
2462 @*/
2463 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2464 {
2465   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2466 
2467   PetscFunctionBegin;
2468   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2469   if (f) {
2470     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2471   }
2472   PetscFunctionReturn(0);
2473 }
2474 
2475 #undef __FUNCT__
2476 #define __FUNCT__ "MatCreateMPIBAIJ"
2477 /*@C
2478    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
2479    (block compressed row).  For good matrix assembly performance
2480    the user should preallocate the matrix storage by setting the parameters
2481    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2482    performance can be increased by more than a factor of 50.
2483 
2484    Collective on MPI_Comm
2485 
2486    Input Parameters:
2487 +  comm - MPI communicator
2488 .  bs   - size of blockk
2489 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2490            This value should be the same as the local size used in creating the
2491            y vector for the matrix-vector product y = Ax.
2492 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2493            This value should be the same as the local size used in creating the
2494            x vector for the matrix-vector product y = Ax.
2495 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2496 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2497 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
2498            submatrix  (same for all local rows)
2499 .  d_nnz - array containing the number of nonzero blocks in the various block rows
2500            of the in diagonal portion of the local (possibly different for each block
2501            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2502 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
2503            submatrix (same for all local rows).
2504 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
2505            off-diagonal portion of the local submatrix (possibly different for
2506            each block row) or PETSC_NULL.
2507 
2508    Output Parameter:
2509 .  A - the matrix
2510 
2511    Options Database Keys:
2512 +   -mat_block_size - size of the blocks to use
2513 -   -mat_use_hash_table <fact>
2514 
2515    Notes:
2516    If the *_nnz parameter is given then the *_nz parameter is ignored
2517 
2518    A nonzero block is any block that as 1 or more nonzeros in it
2519 
2520    The user MUST specify either the local or global matrix dimensions
2521    (possibly both).
2522 
2523    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2524    than it must be used on all processors that share the object for that argument.
2525 
2526    Storage Information:
2527    For a square global matrix we define each processor's diagonal portion
2528    to be its local rows and the corresponding columns (a square submatrix);
2529    each processor's off-diagonal portion encompasses the remainder of the
2530    local matrix (a rectangular submatrix).
2531 
2532    The user can specify preallocated storage for the diagonal part of
2533    the local submatrix with either d_nz or d_nnz (not both).  Set
2534    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2535    memory allocation.  Likewise, specify preallocated storage for the
2536    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2537 
2538    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2539    the figure below we depict these three local rows and all columns (0-11).
2540 
2541 .vb
2542            0 1 2 3 4 5 6 7 8 9 10 11
2543           -------------------
2544    row 3  |  o o o d d d o o o o o o
2545    row 4  |  o o o d d d o o o o o o
2546    row 5  |  o o o d d d o o o o o o
2547           -------------------
2548 .ve
2549 
2550    Thus, any entries in the d locations are stored in the d (diagonal)
2551    submatrix, and any entries in the o locations are stored in the
2552    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2553    stored simply in the MATSEQBAIJ format for compressed row storage.
2554 
2555    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2556    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2557    In general, for PDE problems in which most nonzeros are near the diagonal,
2558    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2559    or you will get TERRIBLE performance; see the users' manual chapter on
2560    matrices.
2561 
2562    Level: intermediate
2563 
2564 .keywords: matrix, block, aij, compressed row, sparse, parallel
2565 
2566 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2567 @*/
2568 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2569 {
2570   PetscErrorCode ierr;
2571   PetscMPIInt    size;
2572 
2573   PetscFunctionBegin;
2574   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2575   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2576   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2577   if (size > 1) {
2578     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
2579     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2580   } else {
2581     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2582     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2583   }
2584   PetscFunctionReturn(0);
2585 }
2586 
2587 #undef __FUNCT__
2588 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
2589 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2590 {
2591   Mat            mat;
2592   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2593   PetscErrorCode ierr;
2594   PetscInt       len=0;
2595 
2596   PetscFunctionBegin;
2597   *newmat       = 0;
2598   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
2599   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
2600   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2601   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2602 
2603   mat->factor       = matin->factor;
2604   mat->preallocated = PETSC_TRUE;
2605   mat->assembled    = PETSC_TRUE;
2606   mat->insertmode   = NOT_SET_VALUES;
2607 
2608   a      = (Mat_MPIBAIJ*)mat->data;
2609   mat->rmap.bs  = matin->rmap.bs;
2610   a->bs2   = oldmat->bs2;
2611   a->mbs   = oldmat->mbs;
2612   a->nbs   = oldmat->nbs;
2613   a->Mbs   = oldmat->Mbs;
2614   a->Nbs   = oldmat->Nbs;
2615 
2616   ierr = PetscMapCopy(matin->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
2617   ierr = PetscMapCopy(matin->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
2618 
2619   a->size         = oldmat->size;
2620   a->rank         = oldmat->rank;
2621   a->donotstash   = oldmat->donotstash;
2622   a->roworiented  = oldmat->roworiented;
2623   a->rowindices   = 0;
2624   a->rowvalues    = 0;
2625   a->getrowactive = PETSC_FALSE;
2626   a->barray       = 0;
2627   a->rstartbs     = oldmat->rstartbs;
2628   a->rendbs       = oldmat->rendbs;
2629   a->cstartbs     = oldmat->cstartbs;
2630   a->cendbs       = oldmat->cendbs;
2631 
2632   /* hash table stuff */
2633   a->ht           = 0;
2634   a->hd           = 0;
2635   a->ht_size      = 0;
2636   a->ht_flag      = oldmat->ht_flag;
2637   a->ht_fact      = oldmat->ht_fact;
2638   a->ht_total_ct  = 0;
2639   a->ht_insert_ct = 0;
2640 
2641   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
2642   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2643   ierr = MatStashCreate_Private(matin->comm,matin->rmap.bs,&mat->bstash);CHKERRQ(ierr);
2644   if (oldmat->colmap) {
2645 #if defined (PETSC_USE_CTABLE)
2646   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2647 #else
2648   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2649   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2650   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2651 #endif
2652   } else a->colmap = 0;
2653 
2654   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2655     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2656     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2657     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2658   } else a->garray = 0;
2659 
2660   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2661   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2662   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2663   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2664 
2665   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2666   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2667   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2668   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2669   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2670   *newmat = mat;
2671 
2672   PetscFunctionReturn(0);
2673 }
2674 
2675 #include "petscsys.h"
2676 
2677 #undef __FUNCT__
2678 #define __FUNCT__ "MatLoad_MPIBAIJ"
2679 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2680 {
2681   Mat            A;
2682   PetscErrorCode ierr;
2683   int            fd;
2684   PetscInt       i,nz,j,rstart,rend;
2685   PetscScalar    *vals,*buf;
2686   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2687   MPI_Status     status;
2688   PetscMPIInt    rank,size,maxnz;
2689   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
2690   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
2691   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
2692   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
2693   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
2694   PetscInt       dcount,kmax,k,nzcount,tmp,mend;
2695 
2696   PetscFunctionBegin;
2697   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
2698     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2699   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2700 
2701   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2702   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2703   if (!rank) {
2704     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2705     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2706     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2707   }
2708 
2709   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2710   M = header[1]; N = header[2];
2711 
2712   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2713 
2714   /*
2715      This code adds extra rows to make sure the number of rows is
2716      divisible by the blocksize
2717   */
2718   Mbs        = M/bs;
2719   extra_rows = bs - M + bs*Mbs;
2720   if (extra_rows == bs) extra_rows = 0;
2721   else                  Mbs++;
2722   if (extra_rows && !rank) {
2723     ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2724   }
2725 
2726   /* determine ownership of all rows */
2727   mbs        = Mbs/size + ((Mbs % size) > rank);
2728   m          = mbs*bs;
2729   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
2730   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2731 
2732   /* process 0 needs enough room for process with most rows */
2733   if (!rank) {
2734     mmax = rowners[1];
2735     for (i=2; i<size; i++) {
2736       mmax = PetscMax(mmax,rowners[i]);
2737     }
2738     mmax*=bs;
2739   } else mmax = m;
2740 
2741   rowners[0] = 0;
2742   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
2743   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2744   rstart = rowners[rank];
2745   rend   = rowners[rank+1];
2746 
2747   /* distribute row lengths to all processors */
2748   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
2749   if (!rank) {
2750     mend = m;
2751     if (size == 1) mend = mend - extra_rows;
2752     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
2753     for (j=mend; j<m; j++) locrowlens[j] = 1;
2754     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2755     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2756     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2757     for (j=0; j<m; j++) {
2758       procsnz[0] += locrowlens[j];
2759     }
2760     for (i=1; i<size; i++) {
2761       mend = browners[i+1] - browners[i];
2762       if (i == size-1) mend = mend - extra_rows;
2763       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
2764       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
2765       /* calculate the number of nonzeros on each processor */
2766       for (j=0; j<browners[i+1]-browners[i]; j++) {
2767         procsnz[i] += rowlengths[j];
2768       }
2769       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2770     }
2771     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2772   } else {
2773     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2774   }
2775 
2776   if (!rank) {
2777     /* determine max buffer needed and allocate it */
2778     maxnz = procsnz[0];
2779     for (i=1; i<size; i++) {
2780       maxnz = PetscMax(maxnz,procsnz[i]);
2781     }
2782     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2783 
2784     /* read in my part of the matrix column indices  */
2785     nz     = procsnz[0];
2786     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2787     mycols = ibuf;
2788     if (size == 1)  nz -= extra_rows;
2789     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2790     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2791 
2792     /* read in every ones (except the last) and ship off */
2793     for (i=1; i<size-1; i++) {
2794       nz   = procsnz[i];
2795       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2796       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2797     }
2798     /* read in the stuff for the last proc */
2799     if (size != 1) {
2800       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2801       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2802       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2803       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2804     }
2805     ierr = PetscFree(cols);CHKERRQ(ierr);
2806   } else {
2807     /* determine buffer space needed for message */
2808     nz = 0;
2809     for (i=0; i<m; i++) {
2810       nz += locrowlens[i];
2811     }
2812     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2813     mycols = ibuf;
2814     /* receive message of column indices*/
2815     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2816     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2817     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2818   }
2819 
2820   /* loop over local rows, determining number of off diagonal entries */
2821   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2822   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2823   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2824   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2825   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2826   rowcount = 0; nzcount = 0;
2827   for (i=0; i<mbs; i++) {
2828     dcount  = 0;
2829     odcount = 0;
2830     for (j=0; j<bs; j++) {
2831       kmax = locrowlens[rowcount];
2832       for (k=0; k<kmax; k++) {
2833         tmp = mycols[nzcount++]/bs;
2834         if (!mask[tmp]) {
2835           mask[tmp] = 1;
2836           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2837           else masked1[dcount++] = tmp;
2838         }
2839       }
2840       rowcount++;
2841     }
2842 
2843     dlens[i]  = dcount;
2844     odlens[i] = odcount;
2845 
2846     /* zero out the mask elements we set */
2847     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2848     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2849   }
2850 
2851   /* create our matrix */
2852   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2853   ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2854   ierr = MatSetType(A,type);CHKERRQ(ierr)
2855   ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2856 
2857   /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */
2858   ierr = MatSetOption(A,MAT_COLUMNS_SORTED,PETSC_TRUE);CHKERRQ(ierr);
2859 
2860   if (!rank) {
2861     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2862     /* read in my part of the matrix numerical values  */
2863     nz = procsnz[0];
2864     vals = buf;
2865     mycols = ibuf;
2866     if (size == 1)  nz -= extra_rows;
2867     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2868     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2869 
2870     /* insert into matrix */
2871     jj      = rstart*bs;
2872     for (i=0; i<m; i++) {
2873       ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2874       mycols += locrowlens[i];
2875       vals   += locrowlens[i];
2876       jj++;
2877     }
2878     /* read in other processors (except the last one) and ship out */
2879     for (i=1; i<size-1; i++) {
2880       nz   = procsnz[i];
2881       vals = buf;
2882       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2883       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2884     }
2885     /* the last proc */
2886     if (size != 1){
2887       nz   = procsnz[i] - extra_rows;
2888       vals = buf;
2889       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2890       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2891       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2892     }
2893     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2894   } else {
2895     /* receive numeric values */
2896     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2897 
2898     /* receive message of values*/
2899     vals   = buf;
2900     mycols = ibuf;
2901     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2902     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2903     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2904 
2905     /* insert into matrix */
2906     jj      = rstart*bs;
2907     for (i=0; i<m; i++) {
2908       ierr    = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2909       mycols += locrowlens[i];
2910       vals   += locrowlens[i];
2911       jj++;
2912     }
2913   }
2914   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2915   ierr = PetscFree(buf);CHKERRQ(ierr);
2916   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2917   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2918   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2919   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2920   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2921   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2922 
2923   *newmat = A;
2924   PetscFunctionReturn(0);
2925 }
2926 
2927 #undef __FUNCT__
2928 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
2929 /*@
2930    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2931 
2932    Input Parameters:
2933 .  mat  - the matrix
2934 .  fact - factor
2935 
2936    Collective on Mat
2937 
2938    Level: advanced
2939 
2940   Notes:
2941    This can also be set by the command line option: -mat_use_hash_table <fact>
2942 
2943 .keywords: matrix, hashtable, factor, HT
2944 
2945 .seealso: MatSetOption()
2946 @*/
2947 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2948 {
2949   PetscErrorCode ierr,(*f)(Mat,PetscReal);
2950 
2951   PetscFunctionBegin;
2952   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
2953   if (f) {
2954     ierr = (*f)(mat,fact);CHKERRQ(ierr);
2955   }
2956   PetscFunctionReturn(0);
2957 }
2958 
2959 EXTERN_C_BEGIN
2960 #undef __FUNCT__
2961 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
2962 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
2963 {
2964   Mat_MPIBAIJ *baij;
2965 
2966   PetscFunctionBegin;
2967   baij = (Mat_MPIBAIJ*)mat->data;
2968   baij->ht_fact = fact;
2969   PetscFunctionReturn(0);
2970 }
2971 EXTERN_C_END
2972 
2973 #undef __FUNCT__
2974 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
2975 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2976 {
2977   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2978   PetscFunctionBegin;
2979   *Ad     = a->A;
2980   *Ao     = a->B;
2981   *colmap = a->garray;
2982   PetscFunctionReturn(0);
2983 }
2984