xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 71f87433c2d5ca8a2055e7bac4e3ff8159d57028)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4 
5 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
7 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
8 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
9 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
10 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
11 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
12 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
14 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar);
15 
16 /*  UGLY, ugly, ugly
17    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
18    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
19    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
20    converts the entries into single precision and then calls ..._MatScalar() to put them
21    into the single precision data structures.
22 */
23 #if defined(PETSC_USE_MAT_SINGLE)
24 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
25 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
26 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
27 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
28 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
29 #else
30 #define MatSetValuesBlocked_SeqBAIJ_MatScalar      MatSetValuesBlocked_SeqBAIJ
31 #define MatSetValues_MPIBAIJ_MatScalar             MatSetValues_MPIBAIJ
32 #define MatSetValuesBlocked_MPIBAIJ_MatScalar      MatSetValuesBlocked_MPIBAIJ
33 #define MatSetValues_MPIBAIJ_HT_MatScalar          MatSetValues_MPIBAIJ_HT
34 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar   MatSetValuesBlocked_MPIBAIJ_HT
35 #endif
36 
37 #undef __FUNCT__
38 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
39 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
40 {
41   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
42   PetscErrorCode ierr;
43   PetscInt       i,*idxb = 0;
44   PetscScalar    *va,*vb;
45   Vec            vtmp;
46 
47   PetscFunctionBegin;
48 
49   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
50   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
51   if (idx) {
52     for (i=0; i<A->cmap.n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap.rstart;}
53   }
54 
55   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap.n,&vtmp);CHKERRQ(ierr);
56   if (idx) {ierr = PetscMalloc(A->rmap.n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
57   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
58   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
59 
60   for (i=0; i<A->rmap.n; i++){
61     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap.bs*a->garray[idxb[i]/A->cmap.bs] + (idxb[i] % A->cmap.bs);}
62   }
63 
64   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
65   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
66   if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);}
67   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
68   PetscFunctionReturn(0);
69 }
70 
71 EXTERN_C_BEGIN
72 #undef __FUNCT__
73 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
74 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat)
75 {
76   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
77   PetscErrorCode ierr;
78 
79   PetscFunctionBegin;
80   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
81   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
82   PetscFunctionReturn(0);
83 }
84 EXTERN_C_END
85 
86 EXTERN_C_BEGIN
87 #undef __FUNCT__
88 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
89 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat)
90 {
91   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
92   PetscErrorCode ierr;
93 
94   PetscFunctionBegin;
95   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
96   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
97   PetscFunctionReturn(0);
98 }
99 EXTERN_C_END
100 
101 /*
102      Local utility routine that creates a mapping from the global column
103    number to the local number in the off-diagonal part of the local
104    storage of the matrix.  This is done in a non scable way since the
105    length of colmap equals the global matrix length.
106 */
107 #undef __FUNCT__
108 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
109 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
110 {
111   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
112   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
113   PetscErrorCode ierr;
114   PetscInt       nbs = B->nbs,i,bs=mat->rmap.bs;
115 
116   PetscFunctionBegin;
117 #if defined (PETSC_USE_CTABLE)
118   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
119   for (i=0; i<nbs; i++){
120     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
121   }
122 #else
123   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
124   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
125   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
126   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
127 #endif
128   PetscFunctionReturn(0);
129 }
130 
131 #define CHUNKSIZE  10
132 
133 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
134 { \
135  \
136     brow = row/bs;  \
137     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
138     rmax = aimax[brow]; nrow = ailen[brow]; \
139       bcol = col/bs; \
140       ridx = row % bs; cidx = col % bs; \
141       low = 0; high = nrow; \
142       while (high-low > 3) { \
143         t = (low+high)/2; \
144         if (rp[t] > bcol) high = t; \
145         else              low  = t; \
146       } \
147       for (_i=low; _i<high; _i++) { \
148         if (rp[_i] > bcol) break; \
149         if (rp[_i] == bcol) { \
150           bap  = ap +  bs2*_i + bs*cidx + ridx; \
151           if (addv == ADD_VALUES) *bap += value;  \
152           else                    *bap  = value;  \
153           goto a_noinsert; \
154         } \
155       } \
156       if (a->nonew == 1) goto a_noinsert; \
157       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
158       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
159       N = nrow++ - 1;  \
160       /* shift up all the later entries in this row */ \
161       for (ii=N; ii>=_i; ii--) { \
162         rp[ii+1] = rp[ii]; \
163         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
164       } \
165       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
166       rp[_i]                      = bcol;  \
167       ap[bs2*_i + bs*cidx + ridx] = value;  \
168       a_noinsert:; \
169     ailen[brow] = nrow; \
170 }
171 
172 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
173 { \
174     brow = row/bs;  \
175     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
176     rmax = bimax[brow]; nrow = bilen[brow]; \
177       bcol = col/bs; \
178       ridx = row % bs; cidx = col % bs; \
179       low = 0; high = nrow; \
180       while (high-low > 3) { \
181         t = (low+high)/2; \
182         if (rp[t] > bcol) high = t; \
183         else              low  = t; \
184       } \
185       for (_i=low; _i<high; _i++) { \
186         if (rp[_i] > bcol) break; \
187         if (rp[_i] == bcol) { \
188           bap  = ap +  bs2*_i + bs*cidx + ridx; \
189           if (addv == ADD_VALUES) *bap += value;  \
190           else                    *bap  = value;  \
191           goto b_noinsert; \
192         } \
193       } \
194       if (b->nonew == 1) goto b_noinsert; \
195       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
196       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
197       CHKMEMQ;\
198       N = nrow++ - 1;  \
199       /* shift up all the later entries in this row */ \
200       for (ii=N; ii>=_i; ii--) { \
201         rp[ii+1] = rp[ii]; \
202         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
203       } \
204       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
205       rp[_i]                      = bcol;  \
206       ap[bs2*_i + bs*cidx + ridx] = value;  \
207       b_noinsert:; \
208     bilen[brow] = nrow; \
209 }
210 
211 #if defined(PETSC_USE_MAT_SINGLE)
212 #undef __FUNCT__
213 #define __FUNCT__ "MatSetValues_MPIBAIJ"
214 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
215 {
216   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
217   PetscErrorCode ierr;
218   PetscInt       i,N = m*n;
219   MatScalar      *vsingle;
220 
221   PetscFunctionBegin;
222   if (N > b->setvalueslen) {
223     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
224     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
225     b->setvalueslen  = N;
226   }
227   vsingle = b->setvaluescopy;
228 
229   for (i=0; i<N; i++) {
230     vsingle[i] = v[i];
231   }
232   ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
233   PetscFunctionReturn(0);
234 }
235 
236 #undef __FUNCT__
237 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
238 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
239 {
240   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
241   PetscErrorCode ierr;
242   PetscInt       i,N = m*n*b->bs2;
243   MatScalar      *vsingle;
244 
245   PetscFunctionBegin;
246   if (N > b->setvalueslen) {
247     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
248     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
249     b->setvalueslen  = N;
250   }
251   vsingle = b->setvaluescopy;
252   for (i=0; i<N; i++) {
253     vsingle[i] = v[i];
254   }
255   ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
256   PetscFunctionReturn(0);
257 }
258 
259 #undef __FUNCT__
260 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
261 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
262 {
263   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
264   PetscErrorCode ierr;
265   PetscInt       i,N = m*n;
266   MatScalar      *vsingle;
267 
268   PetscFunctionBegin;
269   if (N > b->setvalueslen) {
270     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
271     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
272     b->setvalueslen  = N;
273   }
274   vsingle = b->setvaluescopy;
275   for (i=0; i<N; i++) {
276     vsingle[i] = v[i];
277   }
278   ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
279   PetscFunctionReturn(0);
280 }
281 
282 #undef __FUNCT__
283 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
284 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
285 {
286   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
287   PetscErrorCode ierr;
288   PetscInt       i,N = m*n*b->bs2;
289   MatScalar      *vsingle;
290 
291   PetscFunctionBegin;
292   if (N > b->setvalueslen) {
293     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
294     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
295     b->setvalueslen  = N;
296   }
297   vsingle = b->setvaluescopy;
298   for (i=0; i<N; i++) {
299     vsingle[i] = v[i];
300   }
301   ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
302   PetscFunctionReturn(0);
303 }
304 #endif
305 
306 #undef __FUNCT__
307 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
308 PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
309 {
310   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
311   MatScalar      value;
312   PetscTruth     roworiented = baij->roworiented;
313   PetscErrorCode ierr;
314   PetscInt       i,j,row,col;
315   PetscInt       rstart_orig=mat->rmap.rstart;
316   PetscInt       rend_orig=mat->rmap.rend,cstart_orig=mat->cmap.rstart;
317   PetscInt       cend_orig=mat->cmap.rend,bs=mat->rmap.bs;
318 
319   /* Some Variables required in the macro */
320   Mat            A = baij->A;
321   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
322   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
323   MatScalar      *aa=a->a;
324 
325   Mat            B = baij->B;
326   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
327   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
328   MatScalar      *ba=b->a;
329 
330   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
331   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
332   MatScalar      *ap,*bap;
333 
334   PetscFunctionBegin;
335   for (i=0; i<m; i++) {
336     if (im[i] < 0) continue;
337 #if defined(PETSC_USE_DEBUG)
338     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
339 #endif
340     if (im[i] >= rstart_orig && im[i] < rend_orig) {
341       row = im[i] - rstart_orig;
342       for (j=0; j<n; j++) {
343         if (in[j] >= cstart_orig && in[j] < cend_orig){
344           col = in[j] - cstart_orig;
345           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
346           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
347           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
348         } else if (in[j] < 0) continue;
349 #if defined(PETSC_USE_DEBUG)
350         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap.N-1);}
351 #endif
352         else {
353           if (mat->was_assembled) {
354             if (!baij->colmap) {
355               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
356             }
357 #if defined (PETSC_USE_CTABLE)
358             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
359             col  = col - 1;
360 #else
361             col = baij->colmap[in[j]/bs] - 1;
362 #endif
363             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
364               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
365               col =  in[j];
366               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
367               B = baij->B;
368               b = (Mat_SeqBAIJ*)(B)->data;
369               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
370               ba=b->a;
371             } else col += in[j]%bs;
372           } else col = in[j];
373           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
374           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
375           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
376         }
377       }
378     } else {
379       if (!baij->donotstash) {
380         if (roworiented) {
381           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
382         } else {
383           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
384         }
385       }
386     }
387   }
388   PetscFunctionReturn(0);
389 }
390 
391 #undef __FUNCT__
392 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar"
393 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
394 {
395   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
396   const MatScalar *value;
397   MatScalar       *barray=baij->barray;
398   PetscTruth      roworiented = baij->roworiented;
399   PetscErrorCode  ierr;
400   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
401   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
402   PetscInt        cend=baij->cendbs,bs=mat->rmap.bs,bs2=baij->bs2;
403 
404   PetscFunctionBegin;
405   if(!barray) {
406     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
407     baij->barray = barray;
408   }
409 
410   if (roworiented) {
411     stepval = (n-1)*bs;
412   } else {
413     stepval = (m-1)*bs;
414   }
415   for (i=0; i<m; i++) {
416     if (im[i] < 0) continue;
417 #if defined(PETSC_USE_DEBUG)
418     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
419 #endif
420     if (im[i] >= rstart && im[i] < rend) {
421       row = im[i] - rstart;
422       for (j=0; j<n; j++) {
423         /* If NumCol = 1 then a copy is not required */
424         if ((roworiented) && (n == 1)) {
425           barray = (MatScalar*)v + i*bs2;
426         } else if((!roworiented) && (m == 1)) {
427           barray = (MatScalar*)v + j*bs2;
428         } else { /* Here a copy is required */
429           if (roworiented) {
430             value = v + i*(stepval+bs)*bs + j*bs;
431           } else {
432             value = v + j*(stepval+bs)*bs + i*bs;
433           }
434           for (ii=0; ii<bs; ii++,value+=stepval) {
435             for (jj=0; jj<bs; jj++) {
436               *barray++  = *value++;
437             }
438           }
439           barray -=bs2;
440         }
441 
442         if (in[j] >= cstart && in[j] < cend){
443           col  = in[j] - cstart;
444           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
445         }
446         else if (in[j] < 0) continue;
447 #if defined(PETSC_USE_DEBUG)
448         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
449 #endif
450         else {
451           if (mat->was_assembled) {
452             if (!baij->colmap) {
453               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
454             }
455 
456 #if defined(PETSC_USE_DEBUG)
457 #if defined (PETSC_USE_CTABLE)
458             { PetscInt data;
459               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
460               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
461             }
462 #else
463             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
464 #endif
465 #endif
466 #if defined (PETSC_USE_CTABLE)
467 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
468             col  = (col - 1)/bs;
469 #else
470             col = (baij->colmap[in[j]] - 1)/bs;
471 #endif
472             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
473               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
474               col =  in[j];
475             }
476           }
477           else col = in[j];
478           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
479         }
480       }
481     } else {
482       if (!baij->donotstash) {
483         if (roworiented) {
484           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
485         } else {
486           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
487         }
488       }
489     }
490   }
491   PetscFunctionReturn(0);
492 }
493 
494 #define HASH_KEY 0.6180339887
495 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
496 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
497 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
498 #undef __FUNCT__
499 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar"
500 PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
501 {
502   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
503   PetscTruth     roworiented = baij->roworiented;
504   PetscErrorCode ierr;
505   PetscInt       i,j,row,col;
506   PetscInt       rstart_orig=mat->rmap.rstart;
507   PetscInt       rend_orig=mat->rmap.rend,Nbs=baij->Nbs;
508   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap.bs,*HT=baij->ht,idx;
509   PetscReal      tmp;
510   MatScalar      **HD = baij->hd,value;
511 #if defined(PETSC_USE_DEBUG)
512   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
513 #endif
514 
515   PetscFunctionBegin;
516 
517   for (i=0; i<m; i++) {
518 #if defined(PETSC_USE_DEBUG)
519     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
520     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
521 #endif
522       row = im[i];
523     if (row >= rstart_orig && row < rend_orig) {
524       for (j=0; j<n; j++) {
525         col = in[j];
526         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
527         /* Look up PetscInto the Hash Table */
528         key = (row/bs)*Nbs+(col/bs)+1;
529         h1  = HASH(size,key,tmp);
530 
531 
532         idx = h1;
533 #if defined(PETSC_USE_DEBUG)
534         insert_ct++;
535         total_ct++;
536         if (HT[idx] != key) {
537           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
538           if (idx == size) {
539             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
540             if (idx == h1) {
541               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
542             }
543           }
544         }
545 #else
546         if (HT[idx] != key) {
547           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
548           if (idx == size) {
549             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
550             if (idx == h1) {
551               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
552             }
553           }
554         }
555 #endif
556         /* A HASH table entry is found, so insert the values at the correct address */
557         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
558         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
559       }
560     } else {
561       if (!baij->donotstash) {
562         if (roworiented) {
563           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
564         } else {
565           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
566         }
567       }
568     }
569   }
570 #if defined(PETSC_USE_DEBUG)
571   baij->ht_total_ct = total_ct;
572   baij->ht_insert_ct = insert_ct;
573 #endif
574   PetscFunctionReturn(0);
575 }
576 
577 #undef __FUNCT__
578 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar"
579 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
580 {
581   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
582   PetscTruth      roworiented = baij->roworiented;
583   PetscErrorCode  ierr;
584   PetscInt        i,j,ii,jj,row,col;
585   PetscInt        rstart=baij->rstartbs;
586   PetscInt        rend=mat->rmap.rend,stepval,bs=mat->rmap.bs,bs2=baij->bs2;
587   PetscInt        h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
588   PetscReal       tmp;
589   MatScalar       **HD = baij->hd,*baij_a;
590   const MatScalar *v_t,*value;
591 #if defined(PETSC_USE_DEBUG)
592   PetscInt        total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
593 #endif
594 
595   PetscFunctionBegin;
596 
597   if (roworiented) {
598     stepval = (n-1)*bs;
599   } else {
600     stepval = (m-1)*bs;
601   }
602   for (i=0; i<m; i++) {
603 #if defined(PETSC_USE_DEBUG)
604     if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
605     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
606 #endif
607     row   = im[i];
608     v_t   = v + i*bs2;
609     if (row >= rstart && row < rend) {
610       for (j=0; j<n; j++) {
611         col = in[j];
612 
613         /* Look up into the Hash Table */
614         key = row*Nbs+col+1;
615         h1  = HASH(size,key,tmp);
616 
617         idx = h1;
618 #if defined(PETSC_USE_DEBUG)
619         total_ct++;
620         insert_ct++;
621        if (HT[idx] != key) {
622           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
623           if (idx == size) {
624             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
625             if (idx == h1) {
626               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
627             }
628           }
629         }
630 #else
631         if (HT[idx] != key) {
632           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
633           if (idx == size) {
634             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
635             if (idx == h1) {
636               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
637             }
638           }
639         }
640 #endif
641         baij_a = HD[idx];
642         if (roworiented) {
643           /*value = v + i*(stepval+bs)*bs + j*bs;*/
644           /* value = v + (i*(stepval+bs)+j)*bs; */
645           value = v_t;
646           v_t  += bs;
647           if (addv == ADD_VALUES) {
648             for (ii=0; ii<bs; ii++,value+=stepval) {
649               for (jj=ii; jj<bs2; jj+=bs) {
650                 baij_a[jj]  += *value++;
651               }
652             }
653           } else {
654             for (ii=0; ii<bs; ii++,value+=stepval) {
655               for (jj=ii; jj<bs2; jj+=bs) {
656                 baij_a[jj]  = *value++;
657               }
658             }
659           }
660         } else {
661           value = v + j*(stepval+bs)*bs + i*bs;
662           if (addv == ADD_VALUES) {
663             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
664               for (jj=0; jj<bs; jj++) {
665                 baij_a[jj]  += *value++;
666               }
667             }
668           } else {
669             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
670               for (jj=0; jj<bs; jj++) {
671                 baij_a[jj]  = *value++;
672               }
673             }
674           }
675         }
676       }
677     } else {
678       if (!baij->donotstash) {
679         if (roworiented) {
680           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
681         } else {
682           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
683         }
684       }
685     }
686   }
687 #if defined(PETSC_USE_DEBUG)
688   baij->ht_total_ct = total_ct;
689   baij->ht_insert_ct = insert_ct;
690 #endif
691   PetscFunctionReturn(0);
692 }
693 
694 #undef __FUNCT__
695 #define __FUNCT__ "MatGetValues_MPIBAIJ"
696 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
697 {
698   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
699   PetscErrorCode ierr;
700   PetscInt       bs=mat->rmap.bs,i,j,bsrstart = mat->rmap.rstart,bsrend = mat->rmap.rend;
701   PetscInt       bscstart = mat->cmap.rstart,bscend = mat->cmap.rend,row,col,data;
702 
703   PetscFunctionBegin;
704   for (i=0; i<m; i++) {
705     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
706     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
707     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
708       row = idxm[i] - bsrstart;
709       for (j=0; j<n; j++) {
710         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
711         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
712         if (idxn[j] >= bscstart && idxn[j] < bscend){
713           col = idxn[j] - bscstart;
714           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
715         } else {
716           if (!baij->colmap) {
717             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
718           }
719 #if defined (PETSC_USE_CTABLE)
720           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
721           data --;
722 #else
723           data = baij->colmap[idxn[j]/bs]-1;
724 #endif
725           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
726           else {
727             col  = data + idxn[j]%bs;
728             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
729           }
730         }
731       }
732     } else {
733       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
734     }
735   }
736  PetscFunctionReturn(0);
737 }
738 
739 #undef __FUNCT__
740 #define __FUNCT__ "MatNorm_MPIBAIJ"
741 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
742 {
743   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
744   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
745   PetscErrorCode ierr;
746   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap.bs,nz,row,col;
747   PetscReal      sum = 0.0;
748   MatScalar      *v;
749 
750   PetscFunctionBegin;
751   if (baij->size == 1) {
752     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
753   } else {
754     if (type == NORM_FROBENIUS) {
755       v = amat->a;
756       nz = amat->nz*bs2;
757       for (i=0; i<nz; i++) {
758 #if defined(PETSC_USE_COMPLEX)
759         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
760 #else
761         sum += (*v)*(*v); v++;
762 #endif
763       }
764       v = bmat->a;
765       nz = bmat->nz*bs2;
766       for (i=0; i<nz; i++) {
767 #if defined(PETSC_USE_COMPLEX)
768         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
769 #else
770         sum += (*v)*(*v); v++;
771 #endif
772       }
773       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
774       *nrm = sqrt(*nrm);
775     } else if (type == NORM_1) { /* max column sum */
776       PetscReal *tmp,*tmp2;
777       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
778       ierr = PetscMalloc((2*mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
779       tmp2 = tmp + mat->cmap.N;
780       ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
781       v = amat->a; jj = amat->j;
782       for (i=0; i<amat->nz; i++) {
783         for (j=0; j<bs; j++){
784           col = bs*(cstart + *jj) + j; /* column index */
785           for (row=0; row<bs; row++){
786             tmp[col] += PetscAbsScalar(*v);  v++;
787           }
788         }
789         jj++;
790       }
791       v = bmat->a; jj = bmat->j;
792       for (i=0; i<bmat->nz; i++) {
793         for (j=0; j<bs; j++){
794           col = bs*garray[*jj] + j;
795           for (row=0; row<bs; row++){
796             tmp[col] += PetscAbsScalar(*v); v++;
797           }
798         }
799         jj++;
800       }
801       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
802       *nrm = 0.0;
803       for (j=0; j<mat->cmap.N; j++) {
804         if (tmp2[j] > *nrm) *nrm = tmp2[j];
805       }
806       ierr = PetscFree(tmp);CHKERRQ(ierr);
807     } else if (type == NORM_INFINITY) { /* max row sum */
808       PetscReal *sums;
809       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr)
810       sum = 0.0;
811       for (j=0; j<amat->mbs; j++) {
812         for (row=0; row<bs; row++) sums[row] = 0.0;
813         v = amat->a + bs2*amat->i[j];
814         nz = amat->i[j+1]-amat->i[j];
815         for (i=0; i<nz; i++) {
816           for (col=0; col<bs; col++){
817             for (row=0; row<bs; row++){
818               sums[row] += PetscAbsScalar(*v); v++;
819             }
820           }
821         }
822         v = bmat->a + bs2*bmat->i[j];
823         nz = bmat->i[j+1]-bmat->i[j];
824         for (i=0; i<nz; i++) {
825           for (col=0; col<bs; col++){
826             for (row=0; row<bs; row++){
827               sums[row] += PetscAbsScalar(*v); v++;
828             }
829           }
830         }
831         for (row=0; row<bs; row++){
832           if (sums[row] > sum) sum = sums[row];
833         }
834       }
835       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
836       ierr = PetscFree(sums);CHKERRQ(ierr);
837     } else {
838       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
839     }
840   }
841   PetscFunctionReturn(0);
842 }
843 
844 /*
845   Creates the hash table, and sets the table
846   This table is created only once.
847   If new entried need to be added to the matrix
848   then the hash table has to be destroyed and
849   recreated.
850 */
851 #undef __FUNCT__
852 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
853 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
854 {
855   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
856   Mat            A = baij->A,B=baij->B;
857   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
858   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
859   PetscErrorCode ierr;
860   PetscInt       size,bs2=baij->bs2,rstart=baij->rstartbs;
861   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
862   PetscInt       *HT,key;
863   MatScalar      **HD;
864   PetscReal      tmp;
865 #if defined(PETSC_USE_INFO)
866   PetscInt       ct=0,max=0;
867 #endif
868 
869   PetscFunctionBegin;
870   baij->ht_size=(PetscInt)(factor*nz);
871   size = baij->ht_size;
872 
873   if (baij->ht) {
874     PetscFunctionReturn(0);
875   }
876 
877   /* Allocate Memory for Hash Table */
878   ierr     = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr);
879   baij->ht = (PetscInt*)(baij->hd + size);
880   HD       = baij->hd;
881   HT       = baij->ht;
882 
883 
884   ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr);
885 
886 
887   /* Loop Over A */
888   for (i=0; i<a->mbs; i++) {
889     for (j=ai[i]; j<ai[i+1]; j++) {
890       row = i+rstart;
891       col = aj[j]+cstart;
892 
893       key = row*Nbs + col + 1;
894       h1  = HASH(size,key,tmp);
895       for (k=0; k<size; k++){
896         if (!HT[(h1+k)%size]) {
897           HT[(h1+k)%size] = key;
898           HD[(h1+k)%size] = a->a + j*bs2;
899           break;
900 #if defined(PETSC_USE_INFO)
901         } else {
902           ct++;
903 #endif
904         }
905       }
906 #if defined(PETSC_USE_INFO)
907       if (k> max) max = k;
908 #endif
909     }
910   }
911   /* Loop Over B */
912   for (i=0; i<b->mbs; i++) {
913     for (j=bi[i]; j<bi[i+1]; j++) {
914       row = i+rstart;
915       col = garray[bj[j]];
916       key = row*Nbs + col + 1;
917       h1  = HASH(size,key,tmp);
918       for (k=0; k<size; k++){
919         if (!HT[(h1+k)%size]) {
920           HT[(h1+k)%size] = key;
921           HD[(h1+k)%size] = b->a + j*bs2;
922           break;
923 #if defined(PETSC_USE_INFO)
924         } else {
925           ct++;
926 #endif
927         }
928       }
929 #if defined(PETSC_USE_INFO)
930       if (k> max) max = k;
931 #endif
932     }
933   }
934 
935   /* Print Summary */
936 #if defined(PETSC_USE_INFO)
937   for (i=0,j=0; i<size; i++) {
938     if (HT[i]) {j++;}
939   }
940   ierr = PetscInfo2(0,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
941 #endif
942   PetscFunctionReturn(0);
943 }
944 
945 #undef __FUNCT__
946 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
947 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
948 {
949   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
950   PetscErrorCode ierr;
951   PetscInt       nstash,reallocs;
952   InsertMode     addv;
953 
954   PetscFunctionBegin;
955   if (baij->donotstash) {
956     PetscFunctionReturn(0);
957   }
958 
959   /* make sure all processors are either in INSERTMODE or ADDMODE */
960   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
961   if (addv == (ADD_VALUES|INSERT_VALUES)) {
962     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
963   }
964   mat->insertmode = addv; /* in case this processor had no cache */
965 
966   ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr);
967   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rangebs);CHKERRQ(ierr);
968   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
969   ierr = PetscInfo2(0,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
970   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
971   ierr = PetscInfo2(0,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
972   PetscFunctionReturn(0);
973 }
974 
975 #undef __FUNCT__
976 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
977 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
978 {
979   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
980   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
981   PetscErrorCode ierr;
982   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
983   PetscInt       *row,*col,other_disassembled;
984   PetscTruth     r1,r2,r3;
985   MatScalar      *val;
986   InsertMode     addv = mat->insertmode;
987   PetscMPIInt    n;
988 
989   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
990   PetscFunctionBegin;
991   if (!baij->donotstash) {
992     while (1) {
993       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
994       if (!flg) break;
995 
996       for (i=0; i<n;) {
997         /* Now identify the consecutive vals belonging to the same row */
998         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
999         if (j < n) ncols = j-i;
1000         else       ncols = n-i;
1001         /* Now assemble all these values with a single function call */
1002         ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
1003         i = j;
1004       }
1005     }
1006     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
1007     /* Now process the block-stash. Since the values are stashed column-oriented,
1008        set the roworiented flag to column oriented, and after MatSetValues()
1009        restore the original flags */
1010     r1 = baij->roworiented;
1011     r2 = a->roworiented;
1012     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
1013     baij->roworiented = PETSC_FALSE;
1014     a->roworiented    = PETSC_FALSE;
1015     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
1016     while (1) {
1017       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1018       if (!flg) break;
1019 
1020       for (i=0; i<n;) {
1021         /* Now identify the consecutive vals belonging to the same row */
1022         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1023         if (j < n) ncols = j-i;
1024         else       ncols = n-i;
1025         ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
1026         i = j;
1027       }
1028     }
1029     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
1030     baij->roworiented = r1;
1031     a->roworiented    = r2;
1032     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
1033   }
1034 
1035   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
1036   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
1037 
1038   /* determine if any processor has disassembled, if so we must
1039      also disassemble ourselfs, in order that we may reassemble. */
1040   /*
1041      if nonzero structure of submatrix B cannot change then we know that
1042      no processor disassembled thus we can skip this stuff
1043   */
1044   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
1045     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
1046     if (mat->was_assembled && !other_disassembled) {
1047       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
1048     }
1049   }
1050 
1051   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1052     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
1053   }
1054   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
1055   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1056   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1057 
1058 #if defined(PETSC_USE_INFO)
1059   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1060     ierr = PetscInfo1(0,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
1061     baij->ht_total_ct  = 0;
1062     baij->ht_insert_ct = 0;
1063   }
1064 #endif
1065   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1066     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1067     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1068     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1069   }
1070 
1071   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1072   baij->rowvalues = 0;
1073   PetscFunctionReturn(0);
1074 }
1075 
1076 #undef __FUNCT__
1077 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
1078 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1079 {
1080   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
1081   PetscErrorCode    ierr;
1082   PetscMPIInt       size = baij->size,rank = baij->rank;
1083   PetscInt          bs = mat->rmap.bs;
1084   PetscTruth        iascii,isdraw;
1085   PetscViewer       sviewer;
1086   PetscViewerFormat format;
1087 
1088   PetscFunctionBegin;
1089   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1090   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1091   if (iascii) {
1092     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1093     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1094       MatInfo info;
1095       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
1096       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1097       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
1098               rank,mat->rmap.N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
1099               mat->rmap.bs,(PetscInt)info.memory);CHKERRQ(ierr);
1100       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1101       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
1102       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1103       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
1104       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1105       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1106       PetscFunctionReturn(0);
1107     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1108       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1109       PetscFunctionReturn(0);
1110     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1111       PetscFunctionReturn(0);
1112     }
1113   }
1114 
1115   if (isdraw) {
1116     PetscDraw       draw;
1117     PetscTruth isnull;
1118     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1119     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1120   }
1121 
1122   if (size == 1) {
1123     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
1124     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1125   } else {
1126     /* assemble the entire matrix onto first processor. */
1127     Mat         A;
1128     Mat_SeqBAIJ *Aloc;
1129     PetscInt    M = mat->rmap.N,N = mat->cmap.N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1130     MatScalar   *a;
1131 
1132     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1133     /* Perhaps this should be the type of mat? */
1134     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
1135     if (!rank) {
1136       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1137     } else {
1138       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1139     }
1140     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1141     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1142     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1143 
1144     /* copy over the A part */
1145     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1146     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1147     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1148 
1149     for (i=0; i<mbs; i++) {
1150       rvals[0] = bs*(baij->rstartbs + i);
1151       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1152       for (j=ai[i]; j<ai[i+1]; j++) {
1153         col = (baij->cstartbs+aj[j])*bs;
1154         for (k=0; k<bs; k++) {
1155           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1156           col++; a += bs;
1157         }
1158       }
1159     }
1160     /* copy over the B part */
1161     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1162     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1163     for (i=0; i<mbs; i++) {
1164       rvals[0] = bs*(baij->rstartbs + i);
1165       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1166       for (j=ai[i]; j<ai[i+1]; j++) {
1167         col = baij->garray[aj[j]]*bs;
1168         for (k=0; k<bs; k++) {
1169           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1170           col++; a += bs;
1171         }
1172       }
1173     }
1174     ierr = PetscFree(rvals);CHKERRQ(ierr);
1175     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1176     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1177     /*
1178        Everyone has to call to draw the matrix since the graphics waits are
1179        synchronized across all processors that share the PetscDraw object
1180     */
1181     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1182     if (!rank) {
1183       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1184       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1185     }
1186     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1187     ierr = MatDestroy(A);CHKERRQ(ierr);
1188   }
1189   PetscFunctionReturn(0);
1190 }
1191 
1192 #undef __FUNCT__
1193 #define __FUNCT__ "MatView_MPIBAIJ"
1194 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1195 {
1196   PetscErrorCode ierr;
1197   PetscTruth     iascii,isdraw,issocket,isbinary;
1198 
1199   PetscFunctionBegin;
1200   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1201   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1202   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1203   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1204   if (iascii || isdraw || issocket || isbinary) {
1205     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1206   } else {
1207     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1208   }
1209   PetscFunctionReturn(0);
1210 }
1211 
1212 #undef __FUNCT__
1213 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1214 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1215 {
1216   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1217   PetscErrorCode ierr;
1218 
1219   PetscFunctionBegin;
1220 #if defined(PETSC_USE_LOG)
1221   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap.N,mat->cmap.N);
1222 #endif
1223   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1224   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1225   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1226   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1227 #if defined (PETSC_USE_CTABLE)
1228   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1229 #else
1230   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1231 #endif
1232   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1233   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1234   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1235   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1236   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1237   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
1238 #if defined(PETSC_USE_MAT_SINGLE)
1239   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
1240 #endif
1241   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1242   ierr = PetscFree(baij);CHKERRQ(ierr);
1243 
1244   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1245   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1246   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1247   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1248   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1249   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1250   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1251   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 #undef __FUNCT__
1256 #define __FUNCT__ "MatMult_MPIBAIJ"
1257 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1258 {
1259   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1260   PetscErrorCode ierr;
1261   PetscInt       nt;
1262 
1263   PetscFunctionBegin;
1264   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1265   if (nt != A->cmap.n) {
1266     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1267   }
1268   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1269   if (nt != A->rmap.n) {
1270     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1271   }
1272   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1273   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1274   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1275   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1276   PetscFunctionReturn(0);
1277 }
1278 
1279 #undef __FUNCT__
1280 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1281 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1282 {
1283   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1284   PetscErrorCode ierr;
1285 
1286   PetscFunctionBegin;
1287   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1288   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1289   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1290   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1291   PetscFunctionReturn(0);
1292 }
1293 
1294 #undef __FUNCT__
1295 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1296 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1297 {
1298   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1299   PetscErrorCode ierr;
1300   PetscTruth     merged;
1301 
1302   PetscFunctionBegin;
1303   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1304   /* do nondiagonal part */
1305   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1306   if (!merged) {
1307     /* send it on its way */
1308     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1309     /* do local part */
1310     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1311     /* receive remote parts: note this assumes the values are not actually */
1312     /* inserted in yy until the next line */
1313     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1314   } else {
1315     /* do local part */
1316     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1317     /* send it on its way */
1318     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1319     /* values actually were received in the Begin() but we need to call this nop */
1320     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1321   }
1322   PetscFunctionReturn(0);
1323 }
1324 
1325 #undef __FUNCT__
1326 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1327 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1328 {
1329   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1330   PetscErrorCode ierr;
1331 
1332   PetscFunctionBegin;
1333   /* do nondiagonal part */
1334   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1335   /* send it on its way */
1336   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1337   /* do local part */
1338   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1339   /* receive remote parts: note this assumes the values are not actually */
1340   /* inserted in yy until the next line, which is true for my implementation*/
1341   /* but is not perhaps always true. */
1342   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1343   PetscFunctionReturn(0);
1344 }
1345 
1346 /*
1347   This only works correctly for square matrices where the subblock A->A is the
1348    diagonal block
1349 */
1350 #undef __FUNCT__
1351 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1352 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1353 {
1354   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1355   PetscErrorCode ierr;
1356 
1357   PetscFunctionBegin;
1358   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1359   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1360   PetscFunctionReturn(0);
1361 }
1362 
1363 #undef __FUNCT__
1364 #define __FUNCT__ "MatScale_MPIBAIJ"
1365 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1366 {
1367   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1368   PetscErrorCode ierr;
1369 
1370   PetscFunctionBegin;
1371   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1372   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1373   PetscFunctionReturn(0);
1374 }
1375 
1376 #undef __FUNCT__
1377 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1378 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1379 {
1380   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1381   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1382   PetscErrorCode ierr;
1383   PetscInt       bs = matin->rmap.bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1384   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap.rstart,brend = matin->rmap.rend;
1385   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1386 
1387   PetscFunctionBegin;
1388   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1389   mat->getrowactive = PETSC_TRUE;
1390 
1391   if (!mat->rowvalues && (idx || v)) {
1392     /*
1393         allocate enough space to hold information from the longest row.
1394     */
1395     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1396     PetscInt     max = 1,mbs = mat->mbs,tmp;
1397     for (i=0; i<mbs; i++) {
1398       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1399       if (max < tmp) { max = tmp; }
1400     }
1401     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1402     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1403   }
1404 
1405   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1406   lrow = row - brstart;
1407 
1408   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1409   if (!v)   {pvA = 0; pvB = 0;}
1410   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1411   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1412   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1413   nztot = nzA + nzB;
1414 
1415   cmap  = mat->garray;
1416   if (v  || idx) {
1417     if (nztot) {
1418       /* Sort by increasing column numbers, assuming A and B already sorted */
1419       PetscInt imark = -1;
1420       if (v) {
1421         *v = v_p = mat->rowvalues;
1422         for (i=0; i<nzB; i++) {
1423           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1424           else break;
1425         }
1426         imark = i;
1427         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1428         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1429       }
1430       if (idx) {
1431         *idx = idx_p = mat->rowindices;
1432         if (imark > -1) {
1433           for (i=0; i<imark; i++) {
1434             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1435           }
1436         } else {
1437           for (i=0; i<nzB; i++) {
1438             if (cmap[cworkB[i]/bs] < cstart)
1439               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1440             else break;
1441           }
1442           imark = i;
1443         }
1444         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1445         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1446       }
1447     } else {
1448       if (idx) *idx = 0;
1449       if (v)   *v   = 0;
1450     }
1451   }
1452   *nz = nztot;
1453   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1454   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1455   PetscFunctionReturn(0);
1456 }
1457 
1458 #undef __FUNCT__
1459 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1460 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1461 {
1462   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1463 
1464   PetscFunctionBegin;
1465   if (!baij->getrowactive) {
1466     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1467   }
1468   baij->getrowactive = PETSC_FALSE;
1469   PetscFunctionReturn(0);
1470 }
1471 
1472 #undef __FUNCT__
1473 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1474 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1475 {
1476   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1477   PetscErrorCode ierr;
1478 
1479   PetscFunctionBegin;
1480   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1481   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1482   PetscFunctionReturn(0);
1483 }
1484 
1485 #undef __FUNCT__
1486 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1487 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1488 {
1489   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1490   Mat            A = a->A,B = a->B;
1491   PetscErrorCode ierr;
1492   PetscReal      isend[5],irecv[5];
1493 
1494   PetscFunctionBegin;
1495   info->block_size     = (PetscReal)matin->rmap.bs;
1496   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1497   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1498   isend[3] = info->memory;  isend[4] = info->mallocs;
1499   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1500   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1501   isend[3] += info->memory;  isend[4] += info->mallocs;
1502   if (flag == MAT_LOCAL) {
1503     info->nz_used      = isend[0];
1504     info->nz_allocated = isend[1];
1505     info->nz_unneeded  = isend[2];
1506     info->memory       = isend[3];
1507     info->mallocs      = isend[4];
1508   } else if (flag == MAT_GLOBAL_MAX) {
1509     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1510     info->nz_used      = irecv[0];
1511     info->nz_allocated = irecv[1];
1512     info->nz_unneeded  = irecv[2];
1513     info->memory       = irecv[3];
1514     info->mallocs      = irecv[4];
1515   } else if (flag == MAT_GLOBAL_SUM) {
1516     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1517     info->nz_used      = irecv[0];
1518     info->nz_allocated = irecv[1];
1519     info->nz_unneeded  = irecv[2];
1520     info->memory       = irecv[3];
1521     info->mallocs      = irecv[4];
1522   } else {
1523     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1524   }
1525   info->rows_global       = (PetscReal)A->rmap.N;
1526   info->columns_global    = (PetscReal)A->cmap.N;
1527   info->rows_local        = (PetscReal)A->rmap.N;
1528   info->columns_local     = (PetscReal)A->cmap.N;
1529   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1530   info->fill_ratio_needed = 0;
1531   info->factor_mallocs    = 0;
1532   PetscFunctionReturn(0);
1533 }
1534 
1535 #undef __FUNCT__
1536 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1537 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op)
1538 {
1539   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1540   PetscErrorCode ierr;
1541 
1542   PetscFunctionBegin;
1543   switch (op) {
1544   case MAT_NO_NEW_NONZERO_LOCATIONS:
1545   case MAT_YES_NEW_NONZERO_LOCATIONS:
1546   case MAT_COLUMNS_UNSORTED:
1547   case MAT_COLUMNS_SORTED:
1548   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1549   case MAT_KEEP_ZEROED_ROWS:
1550   case MAT_NEW_NONZERO_LOCATION_ERR:
1551     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1552     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1553     break;
1554   case MAT_ROW_ORIENTED:
1555     a->roworiented = PETSC_TRUE;
1556     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1557     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1558     break;
1559   case MAT_ROWS_SORTED:
1560   case MAT_ROWS_UNSORTED:
1561   case MAT_YES_NEW_DIAGONALS:
1562     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1563     break;
1564   case MAT_COLUMN_ORIENTED:
1565     a->roworiented = PETSC_FALSE;
1566     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1567     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1568     break;
1569   case MAT_IGNORE_OFF_PROC_ENTRIES:
1570     a->donotstash = PETSC_TRUE;
1571     break;
1572   case MAT_NO_NEW_DIAGONALS:
1573     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1574   case MAT_USE_HASH_TABLE:
1575     a->ht_flag = PETSC_TRUE;
1576     break;
1577   case MAT_SYMMETRIC:
1578   case MAT_STRUCTURALLY_SYMMETRIC:
1579   case MAT_HERMITIAN:
1580   case MAT_SYMMETRY_ETERNAL:
1581     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1582     break;
1583   case MAT_NOT_SYMMETRIC:
1584   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1585   case MAT_NOT_HERMITIAN:
1586   case MAT_NOT_SYMMETRY_ETERNAL:
1587     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1588     break;
1589   default:
1590     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1591   }
1592   PetscFunctionReturn(0);
1593 }
1594 
1595 #undef __FUNCT__
1596 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1597 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1598 {
1599   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1600   Mat_SeqBAIJ    *Aloc;
1601   Mat            B;
1602   PetscErrorCode ierr;
1603   PetscInt       M=A->rmap.N,N=A->cmap.N,*ai,*aj,i,*rvals,j,k,col;
1604   PetscInt       bs=A->rmap.bs,mbs=baij->mbs;
1605   MatScalar      *a;
1606 
1607   PetscFunctionBegin;
1608   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1609   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
1610   ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr);
1611   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1612   ierr = MatMPIBAIJSetPreallocation(B,A->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1613 
1614   /* copy over the A part */
1615   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1616   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1617   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1618 
1619   for (i=0; i<mbs; i++) {
1620     rvals[0] = bs*(baij->rstartbs + i);
1621     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1622     for (j=ai[i]; j<ai[i+1]; j++) {
1623       col = (baij->cstartbs+aj[j])*bs;
1624       for (k=0; k<bs; k++) {
1625         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1626         col++; a += bs;
1627       }
1628     }
1629   }
1630   /* copy over the B part */
1631   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1632   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1633   for (i=0; i<mbs; i++) {
1634     rvals[0] = bs*(baij->rstartbs + i);
1635     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1636     for (j=ai[i]; j<ai[i+1]; j++) {
1637       col = baij->garray[aj[j]]*bs;
1638       for (k=0; k<bs; k++) {
1639         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1640         col++; a += bs;
1641       }
1642     }
1643   }
1644   ierr = PetscFree(rvals);CHKERRQ(ierr);
1645   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1646   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1647 
1648   if (matout) {
1649     *matout = B;
1650   } else {
1651     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1652   }
1653   PetscFunctionReturn(0);
1654 }
1655 
1656 #undef __FUNCT__
1657 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1658 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1659 {
1660   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1661   Mat            a = baij->A,b = baij->B;
1662   PetscErrorCode ierr;
1663   PetscInt       s1,s2,s3;
1664 
1665   PetscFunctionBegin;
1666   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1667   if (rr) {
1668     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1669     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1670     /* Overlap communication with computation. */
1671     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1672   }
1673   if (ll) {
1674     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1675     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1676     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1677   }
1678   /* scale  the diagonal block */
1679   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1680 
1681   if (rr) {
1682     /* Do a scatter end and then right scale the off-diagonal block */
1683     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1684     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1685   }
1686 
1687   PetscFunctionReturn(0);
1688 }
1689 
1690 #undef __FUNCT__
1691 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1692 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
1693 {
1694   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1695   PetscErrorCode ierr;
1696   PetscMPIInt    imdex,size = l->size,n,rank = l->rank;
1697   PetscInt       i,*owners = A->rmap.range;
1698   PetscInt       *nprocs,j,idx,nsends,row;
1699   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
1700   PetscInt       *rvalues,tag = A->tag,count,base,slen,*source,lastidx = -1;
1701   PetscInt       *lens,*lrows,*values,rstart_bs=A->rmap.rstart;
1702   MPI_Comm       comm = A->comm;
1703   MPI_Request    *send_waits,*recv_waits;
1704   MPI_Status     recv_status,*send_status;
1705 #if defined(PETSC_DEBUG)
1706   PetscTruth     found = PETSC_FALSE;
1707 #endif
1708 
1709   PetscFunctionBegin;
1710   /*  first count number of contributors to each processor */
1711   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1712   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1713   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1714   j = 0;
1715   for (i=0; i<N; i++) {
1716     if (lastidx > (idx = rows[i])) j = 0;
1717     lastidx = idx;
1718     for (; j<size; j++) {
1719       if (idx >= owners[j] && idx < owners[j+1]) {
1720         nprocs[2*j]++;
1721         nprocs[2*j+1] = 1;
1722         owner[i] = j;
1723 #if defined(PETSC_DEBUG)
1724         found = PETSC_TRUE;
1725 #endif
1726         break;
1727       }
1728     }
1729 #if defined(PETSC_DEBUG)
1730     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1731     found = PETSC_FALSE;
1732 #endif
1733   }
1734   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1735 
1736   /* inform other processors of number of messages and max length*/
1737   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1738 
1739   /* post receives:   */
1740   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1741   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1742   for (i=0; i<nrecvs; i++) {
1743     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1744   }
1745 
1746   /* do sends:
1747      1) starts[i] gives the starting index in svalues for stuff going to
1748      the ith processor
1749   */
1750   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1751   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1752   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1753   starts[0]  = 0;
1754   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1755   for (i=0; i<N; i++) {
1756     svalues[starts[owner[i]]++] = rows[i];
1757   }
1758 
1759   starts[0] = 0;
1760   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1761   count = 0;
1762   for (i=0; i<size; i++) {
1763     if (nprocs[2*i+1]) {
1764       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1765     }
1766   }
1767   ierr = PetscFree(starts);CHKERRQ(ierr);
1768 
1769   base = owners[rank];
1770 
1771   /*  wait on receives */
1772   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
1773   source = lens + nrecvs;
1774   count  = nrecvs; slen = 0;
1775   while (count) {
1776     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1777     /* unpack receives into our local space */
1778     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1779     source[imdex]  = recv_status.MPI_SOURCE;
1780     lens[imdex]    = n;
1781     slen          += n;
1782     count--;
1783   }
1784   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1785 
1786   /* move the data into the send scatter */
1787   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1788   count = 0;
1789   for (i=0; i<nrecvs; i++) {
1790     values = rvalues + i*nmax;
1791     for (j=0; j<lens[i]; j++) {
1792       lrows[count++] = values[j] - base;
1793     }
1794   }
1795   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1796   ierr = PetscFree(lens);CHKERRQ(ierr);
1797   ierr = PetscFree(owner);CHKERRQ(ierr);
1798   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1799 
1800   /* actually zap the local rows */
1801   /*
1802         Zero the required rows. If the "diagonal block" of the matrix
1803      is square and the user wishes to set the diagonal we use separate
1804      code so that MatSetValues() is not called for each diagonal allocating
1805      new memory, thus calling lots of mallocs and slowing things down.
1806 
1807        Contributed by: Matthew Knepley
1808   */
1809   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1810   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr);
1811   if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
1812     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr);
1813   } else if (diag != 0.0) {
1814     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1815     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1816       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1817 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1818     }
1819     for (i=0; i<slen; i++) {
1820       row  = lrows[i] + rstart_bs;
1821       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1822     }
1823     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1824     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1825   } else {
1826     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1827   }
1828 
1829   ierr = PetscFree(lrows);CHKERRQ(ierr);
1830 
1831   /* wait on sends */
1832   if (nsends) {
1833     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1834     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1835     ierr = PetscFree(send_status);CHKERRQ(ierr);
1836   }
1837   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1838   ierr = PetscFree(svalues);CHKERRQ(ierr);
1839 
1840   PetscFunctionReturn(0);
1841 }
1842 
1843 #undef __FUNCT__
1844 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1845 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1846 {
1847   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1848   PetscErrorCode ierr;
1849 
1850   PetscFunctionBegin;
1851   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1852   PetscFunctionReturn(0);
1853 }
1854 
1855 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1856 
1857 #undef __FUNCT__
1858 #define __FUNCT__ "MatEqual_MPIBAIJ"
1859 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1860 {
1861   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1862   Mat            a,b,c,d;
1863   PetscTruth     flg;
1864   PetscErrorCode ierr;
1865 
1866   PetscFunctionBegin;
1867   a = matA->A; b = matA->B;
1868   c = matB->A; d = matB->B;
1869 
1870   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1871   if (flg) {
1872     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1873   }
1874   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1875   PetscFunctionReturn(0);
1876 }
1877 
1878 #undef __FUNCT__
1879 #define __FUNCT__ "MatCopy_MPIBAIJ"
1880 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1881 {
1882   PetscErrorCode ierr;
1883   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1884   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1885 
1886   PetscFunctionBegin;
1887   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1888   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1889     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1890   } else {
1891     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1892     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1893   }
1894   PetscFunctionReturn(0);
1895 }
1896 
1897 #undef __FUNCT__
1898 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1899 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1900 {
1901   PetscErrorCode ierr;
1902 
1903   PetscFunctionBegin;
1904   ierr =  MatMPIBAIJSetPreallocation(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1905   PetscFunctionReturn(0);
1906 }
1907 
1908 #include "petscblaslapack.h"
1909 #undef __FUNCT__
1910 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1911 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1912 {
1913   PetscErrorCode ierr;
1914   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1915   PetscBLASInt   bnz,one=1;
1916   Mat_SeqBAIJ    *x,*y;
1917 
1918   PetscFunctionBegin;
1919   if (str == SAME_NONZERO_PATTERN) {
1920     PetscScalar alpha = a;
1921     x = (Mat_SeqBAIJ *)xx->A->data;
1922     y = (Mat_SeqBAIJ *)yy->A->data;
1923     bnz = (PetscBLASInt)x->nz;
1924     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1925     x = (Mat_SeqBAIJ *)xx->B->data;
1926     y = (Mat_SeqBAIJ *)yy->B->data;
1927     bnz = (PetscBLASInt)x->nz;
1928     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1929   } else {
1930     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1931   }
1932   PetscFunctionReturn(0);
1933 }
1934 
1935 #undef __FUNCT__
1936 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1937 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1938 {
1939   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1940   PetscErrorCode ierr;
1941 
1942   PetscFunctionBegin;
1943   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1944   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1945   PetscFunctionReturn(0);
1946 }
1947 
1948 #undef __FUNCT__
1949 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
1950 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1951 {
1952   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1953   PetscErrorCode ierr;
1954 
1955   PetscFunctionBegin;
1956   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1957   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1958   PetscFunctionReturn(0);
1959 }
1960 
1961 /* -------------------------------------------------------------------*/
1962 static struct _MatOps MatOps_Values = {
1963        MatSetValues_MPIBAIJ,
1964        MatGetRow_MPIBAIJ,
1965        MatRestoreRow_MPIBAIJ,
1966        MatMult_MPIBAIJ,
1967 /* 4*/ MatMultAdd_MPIBAIJ,
1968        MatMultTranspose_MPIBAIJ,
1969        MatMultTransposeAdd_MPIBAIJ,
1970        0,
1971        0,
1972        0,
1973 /*10*/ 0,
1974        0,
1975        0,
1976        0,
1977        MatTranspose_MPIBAIJ,
1978 /*15*/ MatGetInfo_MPIBAIJ,
1979        MatEqual_MPIBAIJ,
1980        MatGetDiagonal_MPIBAIJ,
1981        MatDiagonalScale_MPIBAIJ,
1982        MatNorm_MPIBAIJ,
1983 /*20*/ MatAssemblyBegin_MPIBAIJ,
1984        MatAssemblyEnd_MPIBAIJ,
1985        0,
1986        MatSetOption_MPIBAIJ,
1987        MatZeroEntries_MPIBAIJ,
1988 /*25*/ MatZeroRows_MPIBAIJ,
1989        0,
1990        0,
1991        0,
1992        0,
1993 /*30*/ MatSetUpPreallocation_MPIBAIJ,
1994        0,
1995        0,
1996        0,
1997        0,
1998 /*35*/ MatDuplicate_MPIBAIJ,
1999        0,
2000        0,
2001        0,
2002        0,
2003 /*40*/ MatAXPY_MPIBAIJ,
2004        MatGetSubMatrices_MPIBAIJ,
2005        MatIncreaseOverlap_MPIBAIJ,
2006        MatGetValues_MPIBAIJ,
2007        MatCopy_MPIBAIJ,
2008 /*45*/ 0,
2009        MatScale_MPIBAIJ,
2010        0,
2011        0,
2012        0,
2013 /*50*/ 0,
2014        0,
2015        0,
2016        0,
2017        0,
2018 /*55*/ 0,
2019        0,
2020        MatSetUnfactored_MPIBAIJ,
2021        0,
2022        MatSetValuesBlocked_MPIBAIJ,
2023 /*60*/ 0,
2024        MatDestroy_MPIBAIJ,
2025        MatView_MPIBAIJ,
2026        0,
2027        0,
2028 /*65*/ 0,
2029        0,
2030        0,
2031        0,
2032        0,
2033 /*70*/ MatGetRowMaxAbs_MPIBAIJ,
2034        0,
2035        0,
2036        0,
2037        0,
2038 /*75*/ 0,
2039        0,
2040        0,
2041        0,
2042        0,
2043 /*80*/ 0,
2044        0,
2045        0,
2046        0,
2047        MatLoad_MPIBAIJ,
2048 /*85*/ 0,
2049        0,
2050        0,
2051        0,
2052        0,
2053 /*90*/ 0,
2054        0,
2055        0,
2056        0,
2057        0,
2058 /*95*/ 0,
2059        0,
2060        0,
2061        0,
2062        0,
2063 /*100*/0,
2064        0,
2065        0,
2066        0,
2067        0,
2068 /*105*/0,
2069        MatRealPart_MPIBAIJ,
2070        MatImaginaryPart_MPIBAIJ};
2071 
2072 
2073 EXTERN_C_BEGIN
2074 #undef __FUNCT__
2075 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2076 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
2077 {
2078   PetscFunctionBegin;
2079   *a      = ((Mat_MPIBAIJ *)A->data)->A;
2080   *iscopy = PETSC_FALSE;
2081   PetscFunctionReturn(0);
2082 }
2083 EXTERN_C_END
2084 
2085 EXTERN_C_BEGIN
2086 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2087 EXTERN_C_END
2088 
2089 #undef __FUNCT__
2090 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2091 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
2092 {
2093   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)B->data;
2094   PetscInt       m = B->rmap.n/bs,cstart = baij->cstartbs, cend = baij->cendbs,j,nnz,i,d;
2095   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = baij->rstartbs,ii;
2096   const PetscInt *JJ;
2097   PetscScalar    *values;
2098   PetscErrorCode ierr;
2099 
2100   PetscFunctionBegin;
2101 #if defined(PETSC_OPT_g)
2102   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"Ii[0] must be 0 it is %D",Ii[0]);
2103 #endif
2104   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2105   o_nnz = d_nnz + m;
2106 
2107   for (i=0; i<m; i++) {
2108     nnz     = Ii[i+1]- Ii[i];
2109     JJ      = J + Ii[i];
2110     nnz_max = PetscMax(nnz_max,nnz);
2111 #if defined(PETSC_OPT_g)
2112     if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2113 #endif
2114     for (j=0; j<nnz; j++) {
2115       if (*JJ >= cstart) break;
2116       JJ++;
2117     }
2118     d = 0;
2119     for (; j<nnz; j++) {
2120       if (*JJ++ >= cend) break;
2121       d++;
2122     }
2123     d_nnz[i] = d;
2124     o_nnz[i] = nnz - d;
2125   }
2126   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2127   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2128 
2129   if (v) values = (PetscScalar*)v;
2130   else {
2131     ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2132     ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2133   }
2134 
2135   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2136   for (i=0; i<m; i++) {
2137     ii   = i + rstart;
2138     nnz  = Ii[i+1]- Ii[i];
2139     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2140   }
2141   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2142   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2143   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2144 
2145   if (!v) {
2146     ierr = PetscFree(values);CHKERRQ(ierr);
2147   }
2148   PetscFunctionReturn(0);
2149 }
2150 
2151 #undef __FUNCT__
2152 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2153 /*@C
2154    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2155    (the default parallel PETSc format).
2156 
2157    Collective on MPI_Comm
2158 
2159    Input Parameters:
2160 +  A - the matrix
2161 .  i - the indices into j for the start of each local row (starts with zero)
2162 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2163 -  v - optional values in the matrix
2164 
2165    Level: developer
2166 
2167 .keywords: matrix, aij, compressed row, sparse, parallel
2168 
2169 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2170 @*/
2171 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2172 {
2173   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
2174 
2175   PetscFunctionBegin;
2176   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2177   if (f) {
2178     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
2179   }
2180   PetscFunctionReturn(0);
2181 }
2182 
2183 EXTERN_C_BEGIN
2184 #undef __FUNCT__
2185 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2186 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2187 {
2188   Mat_MPIBAIJ    *b;
2189   PetscErrorCode ierr;
2190   PetscInt       i;
2191 
2192   PetscFunctionBegin;
2193   B->preallocated = PETSC_TRUE;
2194   ierr = PetscOptionsBegin(B->comm,B->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr);
2195     ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2196   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2197 
2198   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2199   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2200   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2201   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2202   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2203 
2204   B->rmap.bs  = bs;
2205   B->cmap.bs  = bs;
2206   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2207   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2208 
2209   if (d_nnz) {
2210     for (i=0; i<B->rmap.n/bs; i++) {
2211       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2212     }
2213   }
2214   if (o_nnz) {
2215     for (i=0; i<B->rmap.n/bs; i++) {
2216       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2217     }
2218   }
2219 
2220   b = (Mat_MPIBAIJ*)B->data;
2221   b->bs2 = bs*bs;
2222   b->mbs = B->rmap.n/bs;
2223   b->nbs = B->cmap.n/bs;
2224   b->Mbs = B->rmap.N/bs;
2225   b->Nbs = B->cmap.N/bs;
2226 
2227   for (i=0; i<=b->size; i++) {
2228     b->rangebs[i] = B->rmap.range[i]/bs;
2229   }
2230   b->rstartbs = B->rmap.rstart/bs;
2231   b->rendbs   = B->rmap.rend/bs;
2232   b->cstartbs = B->cmap.rstart/bs;
2233   b->cendbs   = B->cmap.rend/bs;
2234 
2235   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2236   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
2237   ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2238   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2239   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2240   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2241   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
2242   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2243   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2244   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2245 
2246   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
2247 
2248   PetscFunctionReturn(0);
2249 }
2250 EXTERN_C_END
2251 
2252 EXTERN_C_BEGIN
2253 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2254 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2255 EXTERN_C_END
2256 
2257 /*MC
2258    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2259 
2260    Options Database Keys:
2261 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2262 . -mat_block_size <bs> - set the blocksize used to store the matrix
2263 - -mat_use_hash_table <fact>
2264 
2265   Level: beginner
2266 
2267 .seealso: MatCreateMPIBAIJ
2268 M*/
2269 
2270 EXTERN_C_BEGIN
2271 #undef __FUNCT__
2272 #define __FUNCT__ "MatCreate_MPIBAIJ"
2273 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B)
2274 {
2275   Mat_MPIBAIJ    *b;
2276   PetscErrorCode ierr;
2277   PetscTruth     flg;
2278 
2279   PetscFunctionBegin;
2280   ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr);
2281   B->data = (void*)b;
2282 
2283 
2284   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2285   B->mapping    = 0;
2286   B->factor     = 0;
2287   B->assembled  = PETSC_FALSE;
2288 
2289   B->insertmode = NOT_SET_VALUES;
2290   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
2291   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
2292 
2293   /* build local table of row and column ownerships */
2294   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
2295 
2296   /* build cache for off array entries formed */
2297   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
2298   b->donotstash  = PETSC_FALSE;
2299   b->colmap      = PETSC_NULL;
2300   b->garray      = PETSC_NULL;
2301   b->roworiented = PETSC_TRUE;
2302 
2303 #if defined(PETSC_USE_MAT_SINGLE)
2304   /* stuff for MatSetValues_XXX in single precision */
2305   b->setvalueslen     = 0;
2306   b->setvaluescopy    = PETSC_NULL;
2307 #endif
2308 
2309   /* stuff used in block assembly */
2310   b->barray       = 0;
2311 
2312   /* stuff used for matrix vector multiply */
2313   b->lvec         = 0;
2314   b->Mvctx        = 0;
2315 
2316   /* stuff for MatGetRow() */
2317   b->rowindices   = 0;
2318   b->rowvalues    = 0;
2319   b->getrowactive = PETSC_FALSE;
2320 
2321   /* hash table stuff */
2322   b->ht           = 0;
2323   b->hd           = 0;
2324   b->ht_size      = 0;
2325   b->ht_flag      = PETSC_FALSE;
2326   b->ht_fact      = 0;
2327   b->ht_total_ct  = 0;
2328   b->ht_insert_ct = 0;
2329 
2330   ierr = PetscOptionsBegin(B->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
2331     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2332     if (flg) {
2333       PetscReal fact = 1.39;
2334       ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2335       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
2336       if (fact <= 1.0) fact = 1.39;
2337       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2338       ierr = PetscInfo1(0,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2339     }
2340   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2341 
2342   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2343                                      "MatStoreValues_MPIBAIJ",
2344                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2345   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2346                                      "MatRetrieveValues_MPIBAIJ",
2347                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2348   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2349                                      "MatGetDiagonalBlock_MPIBAIJ",
2350                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2351   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
2352                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
2353                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2354   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
2355 				     "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
2356 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
2357   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
2358                                      "MatDiagonalScaleLocal_MPIBAIJ",
2359                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
2360   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
2361                                      "MatSetHashTableFactor_MPIBAIJ",
2362                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2363   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
2364   PetscFunctionReturn(0);
2365 }
2366 EXTERN_C_END
2367 
2368 /*MC
2369    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2370 
2371    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2372    and MATMPIBAIJ otherwise.
2373 
2374    Options Database Keys:
2375 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2376 
2377   Level: beginner
2378 
2379 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2380 M*/
2381 
2382 EXTERN_C_BEGIN
2383 #undef __FUNCT__
2384 #define __FUNCT__ "MatCreate_BAIJ"
2385 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A)
2386 {
2387   PetscErrorCode ierr;
2388   PetscMPIInt    size;
2389 
2390   PetscFunctionBegin;
2391   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2392   if (size == 1) {
2393     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
2394   } else {
2395     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
2396   }
2397   PetscFunctionReturn(0);
2398 }
2399 EXTERN_C_END
2400 
2401 #undef __FUNCT__
2402 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
2403 /*@C
2404    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
2405    (block compressed row).  For good matrix assembly performance
2406    the user should preallocate the matrix storage by setting the parameters
2407    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2408    performance can be increased by more than a factor of 50.
2409 
2410    Collective on Mat
2411 
2412    Input Parameters:
2413 +  A - the matrix
2414 .  bs   - size of blockk
2415 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2416            submatrix  (same for all local rows)
2417 .  d_nnz - array containing the number of block nonzeros in the various block rows
2418            of the in diagonal portion of the local (possibly different for each block
2419            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2420 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2421            submatrix (same for all local rows).
2422 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2423            off-diagonal portion of the local submatrix (possibly different for
2424            each block row) or PETSC_NULL.
2425 
2426    If the *_nnz parameter is given then the *_nz parameter is ignored
2427 
2428    Options Database Keys:
2429 +   -mat_block_size - size of the blocks to use
2430 -   -mat_use_hash_table <fact>
2431 
2432    Notes:
2433    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2434    than it must be used on all processors that share the object for that argument.
2435 
2436    Storage Information:
2437    For a square global matrix we define each processor's diagonal portion
2438    to be its local rows and the corresponding columns (a square submatrix);
2439    each processor's off-diagonal portion encompasses the remainder of the
2440    local matrix (a rectangular submatrix).
2441 
2442    The user can specify preallocated storage for the diagonal part of
2443    the local submatrix with either d_nz or d_nnz (not both).  Set
2444    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2445    memory allocation.  Likewise, specify preallocated storage for the
2446    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2447 
2448    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2449    the figure below we depict these three local rows and all columns (0-11).
2450 
2451 .vb
2452            0 1 2 3 4 5 6 7 8 9 10 11
2453           -------------------
2454    row 3  |  o o o d d d o o o o o o
2455    row 4  |  o o o d d d o o o o o o
2456    row 5  |  o o o d d d o o o o o o
2457           -------------------
2458 .ve
2459 
2460    Thus, any entries in the d locations are stored in the d (diagonal)
2461    submatrix, and any entries in the o locations are stored in the
2462    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2463    stored simply in the MATSEQBAIJ format for compressed row storage.
2464 
2465    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2466    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2467    In general, for PDE problems in which most nonzeros are near the diagonal,
2468    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2469    or you will get TERRIBLE performance; see the users' manual chapter on
2470    matrices.
2471 
2472    Level: intermediate
2473 
2474 .keywords: matrix, block, aij, compressed row, sparse, parallel
2475 
2476 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
2477 @*/
2478 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2479 {
2480   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2481 
2482   PetscFunctionBegin;
2483   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2484   if (f) {
2485     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2486   }
2487   PetscFunctionReturn(0);
2488 }
2489 
2490 #undef __FUNCT__
2491 #define __FUNCT__ "MatCreateMPIBAIJ"
2492 /*@C
2493    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
2494    (block compressed row).  For good matrix assembly performance
2495    the user should preallocate the matrix storage by setting the parameters
2496    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2497    performance can be increased by more than a factor of 50.
2498 
2499    Collective on MPI_Comm
2500 
2501    Input Parameters:
2502 +  comm - MPI communicator
2503 .  bs   - size of blockk
2504 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2505            This value should be the same as the local size used in creating the
2506            y vector for the matrix-vector product y = Ax.
2507 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2508            This value should be the same as the local size used in creating the
2509            x vector for the matrix-vector product y = Ax.
2510 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2511 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2512 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
2513            submatrix  (same for all local rows)
2514 .  d_nnz - array containing the number of nonzero blocks in the various block rows
2515            of the in diagonal portion of the local (possibly different for each block
2516            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2517 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
2518            submatrix (same for all local rows).
2519 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
2520            off-diagonal portion of the local submatrix (possibly different for
2521            each block row) or PETSC_NULL.
2522 
2523    Output Parameter:
2524 .  A - the matrix
2525 
2526    Options Database Keys:
2527 +   -mat_block_size - size of the blocks to use
2528 -   -mat_use_hash_table <fact>
2529 
2530    Notes:
2531    If the *_nnz parameter is given then the *_nz parameter is ignored
2532 
2533    A nonzero block is any block that as 1 or more nonzeros in it
2534 
2535    The user MUST specify either the local or global matrix dimensions
2536    (possibly both).
2537 
2538    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2539    than it must be used on all processors that share the object for that argument.
2540 
2541    Storage Information:
2542    For a square global matrix we define each processor's diagonal portion
2543    to be its local rows and the corresponding columns (a square submatrix);
2544    each processor's off-diagonal portion encompasses the remainder of the
2545    local matrix (a rectangular submatrix).
2546 
2547    The user can specify preallocated storage for the diagonal part of
2548    the local submatrix with either d_nz or d_nnz (not both).  Set
2549    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2550    memory allocation.  Likewise, specify preallocated storage for the
2551    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2552 
2553    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2554    the figure below we depict these three local rows and all columns (0-11).
2555 
2556 .vb
2557            0 1 2 3 4 5 6 7 8 9 10 11
2558           -------------------
2559    row 3  |  o o o d d d o o o o o o
2560    row 4  |  o o o d d d o o o o o o
2561    row 5  |  o o o d d d o o o o o o
2562           -------------------
2563 .ve
2564 
2565    Thus, any entries in the d locations are stored in the d (diagonal)
2566    submatrix, and any entries in the o locations are stored in the
2567    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2568    stored simply in the MATSEQBAIJ format for compressed row storage.
2569 
2570    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2571    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2572    In general, for PDE problems in which most nonzeros are near the diagonal,
2573    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2574    or you will get TERRIBLE performance; see the users' manual chapter on
2575    matrices.
2576 
2577    Level: intermediate
2578 
2579 .keywords: matrix, block, aij, compressed row, sparse, parallel
2580 
2581 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2582 @*/
2583 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2584 {
2585   PetscErrorCode ierr;
2586   PetscMPIInt    size;
2587 
2588   PetscFunctionBegin;
2589   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2590   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2591   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2592   if (size > 1) {
2593     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
2594     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2595   } else {
2596     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2597     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2598   }
2599   PetscFunctionReturn(0);
2600 }
2601 
2602 #undef __FUNCT__
2603 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
2604 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2605 {
2606   Mat            mat;
2607   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2608   PetscErrorCode ierr;
2609   PetscInt       len=0;
2610 
2611   PetscFunctionBegin;
2612   *newmat       = 0;
2613   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
2614   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
2615   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2616   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2617 
2618   mat->factor       = matin->factor;
2619   mat->preallocated = PETSC_TRUE;
2620   mat->assembled    = PETSC_TRUE;
2621   mat->insertmode   = NOT_SET_VALUES;
2622 
2623   a      = (Mat_MPIBAIJ*)mat->data;
2624   mat->rmap.bs  = matin->rmap.bs;
2625   a->bs2   = oldmat->bs2;
2626   a->mbs   = oldmat->mbs;
2627   a->nbs   = oldmat->nbs;
2628   a->Mbs   = oldmat->Mbs;
2629   a->Nbs   = oldmat->Nbs;
2630 
2631   ierr = PetscMapCopy(matin->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
2632   ierr = PetscMapCopy(matin->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
2633 
2634   a->size         = oldmat->size;
2635   a->rank         = oldmat->rank;
2636   a->donotstash   = oldmat->donotstash;
2637   a->roworiented  = oldmat->roworiented;
2638   a->rowindices   = 0;
2639   a->rowvalues    = 0;
2640   a->getrowactive = PETSC_FALSE;
2641   a->barray       = 0;
2642   a->rstartbs     = oldmat->rstartbs;
2643   a->rendbs       = oldmat->rendbs;
2644   a->cstartbs     = oldmat->cstartbs;
2645   a->cendbs       = oldmat->cendbs;
2646 
2647   /* hash table stuff */
2648   a->ht           = 0;
2649   a->hd           = 0;
2650   a->ht_size      = 0;
2651   a->ht_flag      = oldmat->ht_flag;
2652   a->ht_fact      = oldmat->ht_fact;
2653   a->ht_total_ct  = 0;
2654   a->ht_insert_ct = 0;
2655 
2656   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
2657   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2658   ierr = MatStashCreate_Private(matin->comm,matin->rmap.bs,&mat->bstash);CHKERRQ(ierr);
2659   if (oldmat->colmap) {
2660 #if defined (PETSC_USE_CTABLE)
2661   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2662 #else
2663   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2664   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2665   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2666 #endif
2667   } else a->colmap = 0;
2668 
2669   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2670     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2671     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2672     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2673   } else a->garray = 0;
2674 
2675   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2676   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2677   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2678   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2679 
2680   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2681   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2682   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2683   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2684   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2685   *newmat = mat;
2686 
2687   PetscFunctionReturn(0);
2688 }
2689 
2690 #include "petscsys.h"
2691 
2692 #undef __FUNCT__
2693 #define __FUNCT__ "MatLoad_MPIBAIJ"
2694 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2695 {
2696   Mat            A;
2697   PetscErrorCode ierr;
2698   int            fd;
2699   PetscInt       i,nz,j,rstart,rend;
2700   PetscScalar    *vals,*buf;
2701   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2702   MPI_Status     status;
2703   PetscMPIInt    rank,size,maxnz;
2704   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
2705   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
2706   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
2707   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
2708   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
2709   PetscInt       dcount,kmax,k,nzcount,tmp,mend;
2710 
2711   PetscFunctionBegin;
2712   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
2713     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2714   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2715 
2716   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2717   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2718   if (!rank) {
2719     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2720     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2721     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2722   }
2723 
2724   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2725   M = header[1]; N = header[2];
2726 
2727   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2728 
2729   /*
2730      This code adds extra rows to make sure the number of rows is
2731      divisible by the blocksize
2732   */
2733   Mbs        = M/bs;
2734   extra_rows = bs - M + bs*Mbs;
2735   if (extra_rows == bs) extra_rows = 0;
2736   else                  Mbs++;
2737   if (extra_rows && !rank) {
2738     ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2739   }
2740 
2741   /* determine ownership of all rows */
2742   mbs        = Mbs/size + ((Mbs % size) > rank);
2743   m          = mbs*bs;
2744   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
2745   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2746 
2747   /* process 0 needs enough room for process with most rows */
2748   if (!rank) {
2749     mmax = rowners[1];
2750     for (i=2; i<size; i++) {
2751       mmax = PetscMax(mmax,rowners[i]);
2752     }
2753     mmax*=bs;
2754   } else mmax = m;
2755 
2756   rowners[0] = 0;
2757   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
2758   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2759   rstart = rowners[rank];
2760   rend   = rowners[rank+1];
2761 
2762   /* distribute row lengths to all processors */
2763   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
2764   if (!rank) {
2765     mend = m;
2766     if (size == 1) mend = mend - extra_rows;
2767     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
2768     for (j=mend; j<m; j++) locrowlens[j] = 1;
2769     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2770     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2771     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2772     for (j=0; j<m; j++) {
2773       procsnz[0] += locrowlens[j];
2774     }
2775     for (i=1; i<size; i++) {
2776       mend = browners[i+1] - browners[i];
2777       if (i == size-1) mend = mend - extra_rows;
2778       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
2779       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
2780       /* calculate the number of nonzeros on each processor */
2781       for (j=0; j<browners[i+1]-browners[i]; j++) {
2782         procsnz[i] += rowlengths[j];
2783       }
2784       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2785     }
2786     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2787   } else {
2788     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2789   }
2790 
2791   if (!rank) {
2792     /* determine max buffer needed and allocate it */
2793     maxnz = procsnz[0];
2794     for (i=1; i<size; i++) {
2795       maxnz = PetscMax(maxnz,procsnz[i]);
2796     }
2797     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2798 
2799     /* read in my part of the matrix column indices  */
2800     nz     = procsnz[0];
2801     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2802     mycols = ibuf;
2803     if (size == 1)  nz -= extra_rows;
2804     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2805     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2806 
2807     /* read in every ones (except the last) and ship off */
2808     for (i=1; i<size-1; i++) {
2809       nz   = procsnz[i];
2810       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2811       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2812     }
2813     /* read in the stuff for the last proc */
2814     if (size != 1) {
2815       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2816       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2817       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2818       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2819     }
2820     ierr = PetscFree(cols);CHKERRQ(ierr);
2821   } else {
2822     /* determine buffer space needed for message */
2823     nz = 0;
2824     for (i=0; i<m; i++) {
2825       nz += locrowlens[i];
2826     }
2827     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2828     mycols = ibuf;
2829     /* receive message of column indices*/
2830     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2831     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2832     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2833   }
2834 
2835   /* loop over local rows, determining number of off diagonal entries */
2836   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2837   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2838   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2839   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2840   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2841   rowcount = 0; nzcount = 0;
2842   for (i=0; i<mbs; i++) {
2843     dcount  = 0;
2844     odcount = 0;
2845     for (j=0; j<bs; j++) {
2846       kmax = locrowlens[rowcount];
2847       for (k=0; k<kmax; k++) {
2848         tmp = mycols[nzcount++]/bs;
2849         if (!mask[tmp]) {
2850           mask[tmp] = 1;
2851           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2852           else masked1[dcount++] = tmp;
2853         }
2854       }
2855       rowcount++;
2856     }
2857 
2858     dlens[i]  = dcount;
2859     odlens[i] = odcount;
2860 
2861     /* zero out the mask elements we set */
2862     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2863     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2864   }
2865 
2866   /* create our matrix */
2867   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2868   ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2869   ierr = MatSetType(A,type);CHKERRQ(ierr)
2870   ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2871 
2872   /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */
2873   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2874 
2875   if (!rank) {
2876     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2877     /* read in my part of the matrix numerical values  */
2878     nz = procsnz[0];
2879     vals = buf;
2880     mycols = ibuf;
2881     if (size == 1)  nz -= extra_rows;
2882     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2883     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2884 
2885     /* insert into matrix */
2886     jj      = rstart*bs;
2887     for (i=0; i<m; i++) {
2888       ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2889       mycols += locrowlens[i];
2890       vals   += locrowlens[i];
2891       jj++;
2892     }
2893     /* read in other processors (except the last one) and ship out */
2894     for (i=1; i<size-1; i++) {
2895       nz   = procsnz[i];
2896       vals = buf;
2897       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2898       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2899     }
2900     /* the last proc */
2901     if (size != 1){
2902       nz   = procsnz[i] - extra_rows;
2903       vals = buf;
2904       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2905       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2906       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2907     }
2908     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2909   } else {
2910     /* receive numeric values */
2911     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2912 
2913     /* receive message of values*/
2914     vals   = buf;
2915     mycols = ibuf;
2916     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2917     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2918     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2919 
2920     /* insert into matrix */
2921     jj      = rstart*bs;
2922     for (i=0; i<m; i++) {
2923       ierr    = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2924       mycols += locrowlens[i];
2925       vals   += locrowlens[i];
2926       jj++;
2927     }
2928   }
2929   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2930   ierr = PetscFree(buf);CHKERRQ(ierr);
2931   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2932   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2933   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2934   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2935   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2936   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2937 
2938   *newmat = A;
2939   PetscFunctionReturn(0);
2940 }
2941 
2942 #undef __FUNCT__
2943 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
2944 /*@
2945    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2946 
2947    Input Parameters:
2948 .  mat  - the matrix
2949 .  fact - factor
2950 
2951    Collective on Mat
2952 
2953    Level: advanced
2954 
2955   Notes:
2956    This can also be set by the command line option: -mat_use_hash_table <fact>
2957 
2958 .keywords: matrix, hashtable, factor, HT
2959 
2960 .seealso: MatSetOption()
2961 @*/
2962 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2963 {
2964   PetscErrorCode ierr,(*f)(Mat,PetscReal);
2965 
2966   PetscFunctionBegin;
2967   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
2968   if (f) {
2969     ierr = (*f)(mat,fact);CHKERRQ(ierr);
2970   }
2971   PetscFunctionReturn(0);
2972 }
2973 
2974 EXTERN_C_BEGIN
2975 #undef __FUNCT__
2976 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
2977 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
2978 {
2979   Mat_MPIBAIJ *baij;
2980 
2981   PetscFunctionBegin;
2982   baij = (Mat_MPIBAIJ*)mat->data;
2983   baij->ht_fact = fact;
2984   PetscFunctionReturn(0);
2985 }
2986 EXTERN_C_END
2987 
2988 #undef __FUNCT__
2989 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
2990 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2991 {
2992   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2993   PetscFunctionBegin;
2994   *Ad     = a->A;
2995   *Ao     = a->B;
2996   *colmap = a->garray;
2997   PetscFunctionReturn(0);
2998 }
2999