xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision fd705b320d8d44969be9ca25a36dbdd35fbe8e12)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4 
5 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
7 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
8 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
9 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
10 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
11 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
12 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
14 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar);
15 
16 /*  UGLY, ugly, ugly
17    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
18    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
19    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
20    converts the entries into single precision and then calls ..._MatScalar() to put them
21    into the single precision data structures.
22 */
23 #if defined(PETSC_USE_MAT_SINGLE)
24 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
25 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
26 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
27 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
28 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
29 #else
30 #define MatSetValuesBlocked_SeqBAIJ_MatScalar      MatSetValuesBlocked_SeqBAIJ
31 #define MatSetValues_MPIBAIJ_MatScalar             MatSetValues_MPIBAIJ
32 #define MatSetValuesBlocked_MPIBAIJ_MatScalar      MatSetValuesBlocked_MPIBAIJ
33 #define MatSetValues_MPIBAIJ_HT_MatScalar          MatSetValues_MPIBAIJ_HT
34 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar   MatSetValuesBlocked_MPIBAIJ_HT
35 #endif
36 
37 #undef __FUNCT__
38 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
39 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
40 {
41   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
42   PetscErrorCode ierr;
43   PetscInt       i,*idxb = 0;
44   PetscScalar    *va,*vb;
45   Vec            vtmp;
46 
47   PetscFunctionBegin;
48 
49   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
50   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
51   if (idx) {
52     for (i=0; i<A->cmap.n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap.rstart;}
53   }
54 
55   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap.n,&vtmp);CHKERRQ(ierr);
56   if (idx) {ierr = PetscMalloc(A->rmap.n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
57   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
58   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
59 
60   for (i=0; i<A->rmap.n; i++){
61     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap.bs*a->garray[idxb[i]/A->cmap.bs] + (idxb[i] % A->cmap.bs);}
62   }
63 
64   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
65   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
66   if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);}
67   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
68   PetscFunctionReturn(0);
69 }
70 
71 EXTERN_C_BEGIN
72 #undef __FUNCT__
73 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
74 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat)
75 {
76   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
77   PetscErrorCode ierr;
78 
79   PetscFunctionBegin;
80   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
81   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
82   PetscFunctionReturn(0);
83 }
84 EXTERN_C_END
85 
86 EXTERN_C_BEGIN
87 #undef __FUNCT__
88 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
89 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat)
90 {
91   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
92   PetscErrorCode ierr;
93 
94   PetscFunctionBegin;
95   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
96   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
97   PetscFunctionReturn(0);
98 }
99 EXTERN_C_END
100 
101 /*
102      Local utility routine that creates a mapping from the global column
103    number to the local number in the off-diagonal part of the local
104    storage of the matrix.  This is done in a non scable way since the
105    length of colmap equals the global matrix length.
106 */
107 #undef __FUNCT__
108 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
109 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
110 {
111   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
112   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
113   PetscErrorCode ierr;
114   PetscInt       nbs = B->nbs,i,bs=mat->rmap.bs;
115 
116   PetscFunctionBegin;
117 #if defined (PETSC_USE_CTABLE)
118   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
119   for (i=0; i<nbs; i++){
120     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
121   }
122 #else
123   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
124   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
125   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
126   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
127 #endif
128   PetscFunctionReturn(0);
129 }
130 
131 #define CHUNKSIZE  10
132 
133 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
134 { \
135  \
136     brow = row/bs;  \
137     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
138     rmax = aimax[brow]; nrow = ailen[brow]; \
139       bcol = col/bs; \
140       ridx = row % bs; cidx = col % bs; \
141       low = 0; high = nrow; \
142       while (high-low > 3) { \
143         t = (low+high)/2; \
144         if (rp[t] > bcol) high = t; \
145         else              low  = t; \
146       } \
147       for (_i=low; _i<high; _i++) { \
148         if (rp[_i] > bcol) break; \
149         if (rp[_i] == bcol) { \
150           bap  = ap +  bs2*_i + bs*cidx + ridx; \
151           if (addv == ADD_VALUES) *bap += value;  \
152           else                    *bap  = value;  \
153           goto a_noinsert; \
154         } \
155       } \
156       if (a->nonew == 1) goto a_noinsert; \
157       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
158       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
159       N = nrow++ - 1;  \
160       /* shift up all the later entries in this row */ \
161       for (ii=N; ii>=_i; ii--) { \
162         rp[ii+1] = rp[ii]; \
163         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
164       } \
165       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
166       rp[_i]                      = bcol;  \
167       ap[bs2*_i + bs*cidx + ridx] = value;  \
168       a_noinsert:; \
169     ailen[brow] = nrow; \
170 }
171 
172 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
173 { \
174     brow = row/bs;  \
175     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
176     rmax = bimax[brow]; nrow = bilen[brow]; \
177       bcol = col/bs; \
178       ridx = row % bs; cidx = col % bs; \
179       low = 0; high = nrow; \
180       while (high-low > 3) { \
181         t = (low+high)/2; \
182         if (rp[t] > bcol) high = t; \
183         else              low  = t; \
184       } \
185       for (_i=low; _i<high; _i++) { \
186         if (rp[_i] > bcol) break; \
187         if (rp[_i] == bcol) { \
188           bap  = ap +  bs2*_i + bs*cidx + ridx; \
189           if (addv == ADD_VALUES) *bap += value;  \
190           else                    *bap  = value;  \
191           goto b_noinsert; \
192         } \
193       } \
194       if (b->nonew == 1) goto b_noinsert; \
195       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
196       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
197       CHKMEMQ;\
198       N = nrow++ - 1;  \
199       /* shift up all the later entries in this row */ \
200       for (ii=N; ii>=_i; ii--) { \
201         rp[ii+1] = rp[ii]; \
202         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
203       } \
204       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
205       rp[_i]                      = bcol;  \
206       ap[bs2*_i + bs*cidx + ridx] = value;  \
207       b_noinsert:; \
208     bilen[brow] = nrow; \
209 }
210 
211 #if defined(PETSC_USE_MAT_SINGLE)
212 #undef __FUNCT__
213 #define __FUNCT__ "MatSetValues_MPIBAIJ"
214 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
215 {
216   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
217   PetscErrorCode ierr;
218   PetscInt       i,N = m*n;
219   MatScalar      *vsingle;
220 
221   PetscFunctionBegin;
222   if (N > b->setvalueslen) {
223     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
224     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
225     b->setvalueslen  = N;
226   }
227   vsingle = b->setvaluescopy;
228 
229   for (i=0; i<N; i++) {
230     vsingle[i] = v[i];
231   }
232   ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
233   PetscFunctionReturn(0);
234 }
235 
236 #undef __FUNCT__
237 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
238 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
239 {
240   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
241   PetscErrorCode ierr;
242   PetscInt       i,N = m*n*b->bs2;
243   MatScalar      *vsingle;
244 
245   PetscFunctionBegin;
246   if (N > b->setvalueslen) {
247     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
248     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
249     b->setvalueslen  = N;
250   }
251   vsingle = b->setvaluescopy;
252   for (i=0; i<N; i++) {
253     vsingle[i] = v[i];
254   }
255   ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
256   PetscFunctionReturn(0);
257 }
258 
259 #undef __FUNCT__
260 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
261 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
262 {
263   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
264   PetscErrorCode ierr;
265   PetscInt       i,N = m*n;
266   MatScalar      *vsingle;
267 
268   PetscFunctionBegin;
269   if (N > b->setvalueslen) {
270     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
271     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
272     b->setvalueslen  = N;
273   }
274   vsingle = b->setvaluescopy;
275   for (i=0; i<N; i++) {
276     vsingle[i] = v[i];
277   }
278   ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
279   PetscFunctionReturn(0);
280 }
281 
282 #undef __FUNCT__
283 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
284 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
285 {
286   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
287   PetscErrorCode ierr;
288   PetscInt       i,N = m*n*b->bs2;
289   MatScalar      *vsingle;
290 
291   PetscFunctionBegin;
292   if (N > b->setvalueslen) {
293     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
294     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
295     b->setvalueslen  = N;
296   }
297   vsingle = b->setvaluescopy;
298   for (i=0; i<N; i++) {
299     vsingle[i] = v[i];
300   }
301   ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
302   PetscFunctionReturn(0);
303 }
304 #endif
305 
306 #undef __FUNCT__
307 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
308 PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
309 {
310   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
311   MatScalar      value;
312   PetscTruth     roworiented = baij->roworiented;
313   PetscErrorCode ierr;
314   PetscInt       i,j,row,col;
315   PetscInt       rstart_orig=mat->rmap.rstart;
316   PetscInt       rend_orig=mat->rmap.rend,cstart_orig=mat->cmap.rstart;
317   PetscInt       cend_orig=mat->cmap.rend,bs=mat->rmap.bs;
318 
319   /* Some Variables required in the macro */
320   Mat            A = baij->A;
321   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
322   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
323   MatScalar      *aa=a->a;
324 
325   Mat            B = baij->B;
326   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
327   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
328   MatScalar      *ba=b->a;
329 
330   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
331   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
332   MatScalar      *ap,*bap;
333 
334   PetscFunctionBegin;
335   for (i=0; i<m; i++) {
336     if (im[i] < 0) continue;
337 #if defined(PETSC_USE_DEBUG)
338     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
339 #endif
340     if (im[i] >= rstart_orig && im[i] < rend_orig) {
341       row = im[i] - rstart_orig;
342       for (j=0; j<n; j++) {
343         if (in[j] >= cstart_orig && in[j] < cend_orig){
344           col = in[j] - cstart_orig;
345           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
346           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
347           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
348         } else if (in[j] < 0) continue;
349 #if defined(PETSC_USE_DEBUG)
350         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap.N-1);}
351 #endif
352         else {
353           if (mat->was_assembled) {
354             if (!baij->colmap) {
355               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
356             }
357 #if defined (PETSC_USE_CTABLE)
358             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
359             col  = col - 1;
360 #else
361             col = baij->colmap[in[j]/bs] - 1;
362 #endif
363             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
364               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
365               col =  in[j];
366               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
367               B = baij->B;
368               b = (Mat_SeqBAIJ*)(B)->data;
369               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
370               ba=b->a;
371             } else col += in[j]%bs;
372           } else col = in[j];
373           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
374           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
375           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
376         }
377       }
378     } else {
379       if (!baij->donotstash) {
380         if (roworiented) {
381           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
382         } else {
383           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
384         }
385       }
386     }
387   }
388   PetscFunctionReturn(0);
389 }
390 
391 #undef __FUNCT__
392 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar"
393 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
394 {
395   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
396   const MatScalar *value;
397   MatScalar       *barray=baij->barray;
398   PetscTruth      roworiented = baij->roworiented;
399   PetscErrorCode  ierr;
400   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
401   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
402   PetscInt        cend=baij->cendbs,bs=mat->rmap.bs,bs2=baij->bs2;
403 
404   PetscFunctionBegin;
405   if(!barray) {
406     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
407     baij->barray = barray;
408   }
409 
410   if (roworiented) {
411     stepval = (n-1)*bs;
412   } else {
413     stepval = (m-1)*bs;
414   }
415   for (i=0; i<m; i++) {
416     if (im[i] < 0) continue;
417 #if defined(PETSC_USE_DEBUG)
418     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
419 #endif
420     if (im[i] >= rstart && im[i] < rend) {
421       row = im[i] - rstart;
422       for (j=0; j<n; j++) {
423         /* If NumCol = 1 then a copy is not required */
424         if ((roworiented) && (n == 1)) {
425           barray = (MatScalar*)v + i*bs2;
426         } else if((!roworiented) && (m == 1)) {
427           barray = (MatScalar*)v + j*bs2;
428         } else { /* Here a copy is required */
429           if (roworiented) {
430             value = v + i*(stepval+bs)*bs + j*bs;
431           } else {
432             value = v + j*(stepval+bs)*bs + i*bs;
433           }
434           for (ii=0; ii<bs; ii++,value+=stepval) {
435             for (jj=0; jj<bs; jj++) {
436               *barray++  = *value++;
437             }
438           }
439           barray -=bs2;
440         }
441 
442         if (in[j] >= cstart && in[j] < cend){
443           col  = in[j] - cstart;
444           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
445         }
446         else if (in[j] < 0) continue;
447 #if defined(PETSC_USE_DEBUG)
448         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
449 #endif
450         else {
451           if (mat->was_assembled) {
452             if (!baij->colmap) {
453               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
454             }
455 
456 #if defined(PETSC_USE_DEBUG)
457 #if defined (PETSC_USE_CTABLE)
458             { PetscInt data;
459               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
460               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
461             }
462 #else
463             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
464 #endif
465 #endif
466 #if defined (PETSC_USE_CTABLE)
467 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
468             col  = (col - 1)/bs;
469 #else
470             col = (baij->colmap[in[j]] - 1)/bs;
471 #endif
472             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
473               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
474               col =  in[j];
475             }
476           }
477           else col = in[j];
478           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
479         }
480       }
481     } else {
482       if (!baij->donotstash) {
483         if (roworiented) {
484           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
485         } else {
486           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
487         }
488       }
489     }
490   }
491   PetscFunctionReturn(0);
492 }
493 
494 #define HASH_KEY 0.6180339887
495 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
496 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
497 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
498 #undef __FUNCT__
499 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar"
500 PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
501 {
502   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
503   PetscTruth     roworiented = baij->roworiented;
504   PetscErrorCode ierr;
505   PetscInt       i,j,row,col;
506   PetscInt       rstart_orig=mat->rmap.rstart;
507   PetscInt       rend_orig=mat->rmap.rend,Nbs=baij->Nbs;
508   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap.bs,*HT=baij->ht,idx;
509   PetscReal      tmp;
510   MatScalar      **HD = baij->hd,value;
511 #if defined(PETSC_USE_DEBUG)
512   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
513 #endif
514 
515   PetscFunctionBegin;
516 
517   for (i=0; i<m; i++) {
518 #if defined(PETSC_USE_DEBUG)
519     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
520     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
521 #endif
522       row = im[i];
523     if (row >= rstart_orig && row < rend_orig) {
524       for (j=0; j<n; j++) {
525         col = in[j];
526         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
527         /* Look up PetscInto the Hash Table */
528         key = (row/bs)*Nbs+(col/bs)+1;
529         h1  = HASH(size,key,tmp);
530 
531 
532         idx = h1;
533 #if defined(PETSC_USE_DEBUG)
534         insert_ct++;
535         total_ct++;
536         if (HT[idx] != key) {
537           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
538           if (idx == size) {
539             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
540             if (idx == h1) {
541               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
542             }
543           }
544         }
545 #else
546         if (HT[idx] != key) {
547           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
548           if (idx == size) {
549             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
550             if (idx == h1) {
551               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
552             }
553           }
554         }
555 #endif
556         /* A HASH table entry is found, so insert the values at the correct address */
557         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
558         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
559       }
560     } else {
561       if (!baij->donotstash) {
562         if (roworiented) {
563           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
564         } else {
565           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
566         }
567       }
568     }
569   }
570 #if defined(PETSC_USE_DEBUG)
571   baij->ht_total_ct = total_ct;
572   baij->ht_insert_ct = insert_ct;
573 #endif
574   PetscFunctionReturn(0);
575 }
576 
577 #undef __FUNCT__
578 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar"
579 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
580 {
581   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
582   PetscTruth      roworiented = baij->roworiented;
583   PetscErrorCode  ierr;
584   PetscInt        i,j,ii,jj,row,col;
585   PetscInt        rstart=baij->rstartbs;
586   PetscInt        rend=mat->rmap.rend,stepval,bs=mat->rmap.bs,bs2=baij->bs2,nbs2=n*bs2;
587   PetscInt        h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
588   PetscReal       tmp;
589   MatScalar       **HD = baij->hd,*baij_a;
590   const MatScalar *v_t,*value;
591 #if defined(PETSC_USE_DEBUG)
592   PetscInt        total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
593 #endif
594 
595   PetscFunctionBegin;
596 
597   if (roworiented) {
598     stepval = (n-1)*bs;
599   } else {
600     stepval = (m-1)*bs;
601   }
602   for (i=0; i<m; i++) {
603 #if defined(PETSC_USE_DEBUG)
604     if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
605     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
606 #endif
607     row   = im[i];
608     v_t   = v + i*nbs2;
609     if (row >= rstart && row < rend) {
610       for (j=0; j<n; j++) {
611         col = in[j];
612 
613         /* Look up into the Hash Table */
614         key = row*Nbs+col+1;
615         h1  = HASH(size,key,tmp);
616 
617         idx = h1;
618 #if defined(PETSC_USE_DEBUG)
619         total_ct++;
620         insert_ct++;
621        if (HT[idx] != key) {
622           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
623           if (idx == size) {
624             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
625             if (idx == h1) {
626               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
627             }
628           }
629         }
630 #else
631         if (HT[idx] != key) {
632           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
633           if (idx == size) {
634             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
635             if (idx == h1) {
636               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
637             }
638           }
639         }
640 #endif
641         baij_a = HD[idx];
642         if (roworiented) {
643           /*value = v + i*(stepval+bs)*bs + j*bs;*/
644           /* value = v + (i*(stepval+bs)+j)*bs; */
645           value = v_t;
646           v_t  += bs;
647           if (addv == ADD_VALUES) {
648             for (ii=0; ii<bs; ii++,value+=stepval) {
649               for (jj=ii; jj<bs2; jj+=bs) {
650                 baij_a[jj]  += *value++;
651               }
652             }
653           } else {
654             for (ii=0; ii<bs; ii++,value+=stepval) {
655               for (jj=ii; jj<bs2; jj+=bs) {
656                 baij_a[jj]  = *value++;
657               }
658             }
659           }
660         } else {
661           value = v + j*(stepval+bs)*bs + i*bs;
662           if (addv == ADD_VALUES) {
663             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
664               for (jj=0; jj<bs; jj++) {
665                 baij_a[jj]  += *value++;
666               }
667             }
668           } else {
669             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
670               for (jj=0; jj<bs; jj++) {
671                 baij_a[jj]  = *value++;
672               }
673             }
674           }
675         }
676       }
677     } else {
678       if (!baij->donotstash) {
679         if (roworiented) {
680           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
681         } else {
682           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
683         }
684       }
685     }
686   }
687 #if defined(PETSC_USE_DEBUG)
688   baij->ht_total_ct = total_ct;
689   baij->ht_insert_ct = insert_ct;
690 #endif
691   PetscFunctionReturn(0);
692 }
693 
694 #undef __FUNCT__
695 #define __FUNCT__ "MatGetValues_MPIBAIJ"
696 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
697 {
698   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
699   PetscErrorCode ierr;
700   PetscInt       bs=mat->rmap.bs,i,j,bsrstart = mat->rmap.rstart,bsrend = mat->rmap.rend;
701   PetscInt       bscstart = mat->cmap.rstart,bscend = mat->cmap.rend,row,col,data;
702 
703   PetscFunctionBegin;
704   for (i=0; i<m; i++) {
705     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
706     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
707     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
708       row = idxm[i] - bsrstart;
709       for (j=0; j<n; j++) {
710         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
711         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
712         if (idxn[j] >= bscstart && idxn[j] < bscend){
713           col = idxn[j] - bscstart;
714           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
715         } else {
716           if (!baij->colmap) {
717             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
718           }
719 #if defined (PETSC_USE_CTABLE)
720           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
721           data --;
722 #else
723           data = baij->colmap[idxn[j]/bs]-1;
724 #endif
725           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
726           else {
727             col  = data + idxn[j]%bs;
728             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
729           }
730         }
731       }
732     } else {
733       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
734     }
735   }
736  PetscFunctionReturn(0);
737 }
738 
739 #undef __FUNCT__
740 #define __FUNCT__ "MatNorm_MPIBAIJ"
741 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
742 {
743   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
744   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
745   PetscErrorCode ierr;
746   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap.bs,nz,row,col;
747   PetscReal      sum = 0.0;
748   MatScalar      *v;
749 
750   PetscFunctionBegin;
751   if (baij->size == 1) {
752     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
753   } else {
754     if (type == NORM_FROBENIUS) {
755       v = amat->a;
756       nz = amat->nz*bs2;
757       for (i=0; i<nz; i++) {
758 #if defined(PETSC_USE_COMPLEX)
759         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
760 #else
761         sum += (*v)*(*v); v++;
762 #endif
763       }
764       v = bmat->a;
765       nz = bmat->nz*bs2;
766       for (i=0; i<nz; i++) {
767 #if defined(PETSC_USE_COMPLEX)
768         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
769 #else
770         sum += (*v)*(*v); v++;
771 #endif
772       }
773       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
774       *nrm = sqrt(*nrm);
775     } else if (type == NORM_1) { /* max column sum */
776       PetscReal *tmp,*tmp2;
777       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
778       ierr = PetscMalloc((2*mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
779       tmp2 = tmp + mat->cmap.N;
780       ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
781       v = amat->a; jj = amat->j;
782       for (i=0; i<amat->nz; i++) {
783         for (j=0; j<bs; j++){
784           col = bs*(cstart + *jj) + j; /* column index */
785           for (row=0; row<bs; row++){
786             tmp[col] += PetscAbsScalar(*v);  v++;
787           }
788         }
789         jj++;
790       }
791       v = bmat->a; jj = bmat->j;
792       for (i=0; i<bmat->nz; i++) {
793         for (j=0; j<bs; j++){
794           col = bs*garray[*jj] + j;
795           for (row=0; row<bs; row++){
796             tmp[col] += PetscAbsScalar(*v); v++;
797           }
798         }
799         jj++;
800       }
801       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
802       *nrm = 0.0;
803       for (j=0; j<mat->cmap.N; j++) {
804         if (tmp2[j] > *nrm) *nrm = tmp2[j];
805       }
806       ierr = PetscFree(tmp);CHKERRQ(ierr);
807     } else if (type == NORM_INFINITY) { /* max row sum */
808       PetscReal *sums;
809       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr)
810       sum = 0.0;
811       for (j=0; j<amat->mbs; j++) {
812         for (row=0; row<bs; row++) sums[row] = 0.0;
813         v = amat->a + bs2*amat->i[j];
814         nz = amat->i[j+1]-amat->i[j];
815         for (i=0; i<nz; i++) {
816           for (col=0; col<bs; col++){
817             for (row=0; row<bs; row++){
818               sums[row] += PetscAbsScalar(*v); v++;
819             }
820           }
821         }
822         v = bmat->a + bs2*bmat->i[j];
823         nz = bmat->i[j+1]-bmat->i[j];
824         for (i=0; i<nz; i++) {
825           for (col=0; col<bs; col++){
826             for (row=0; row<bs; row++){
827               sums[row] += PetscAbsScalar(*v); v++;
828             }
829           }
830         }
831         for (row=0; row<bs; row++){
832           if (sums[row] > sum) sum = sums[row];
833         }
834       }
835       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
836       ierr = PetscFree(sums);CHKERRQ(ierr);
837     } else {
838       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
839     }
840   }
841   PetscFunctionReturn(0);
842 }
843 
844 /*
845   Creates the hash table, and sets the table
846   This table is created only once.
847   If new entried need to be added to the matrix
848   then the hash table has to be destroyed and
849   recreated.
850 */
851 #undef __FUNCT__
852 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
853 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
854 {
855   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
856   Mat            A = baij->A,B=baij->B;
857   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
858   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
859   PetscErrorCode ierr;
860   PetscInt       size,bs2=baij->bs2,rstart=baij->rstartbs;
861   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
862   PetscInt       *HT,key;
863   MatScalar      **HD;
864   PetscReal      tmp;
865 #if defined(PETSC_USE_INFO)
866   PetscInt       ct=0,max=0;
867 #endif
868 
869   PetscFunctionBegin;
870   baij->ht_size=(PetscInt)(factor*nz);
871   size = baij->ht_size;
872 
873   if (baij->ht) {
874     PetscFunctionReturn(0);
875   }
876 
877   /* Allocate Memory for Hash Table */
878   ierr     = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr);
879   baij->ht = (PetscInt*)(baij->hd + size);
880   HD       = baij->hd;
881   HT       = baij->ht;
882 
883 
884   ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr);
885 
886 
887   /* Loop Over A */
888   for (i=0; i<a->mbs; i++) {
889     for (j=ai[i]; j<ai[i+1]; j++) {
890       row = i+rstart;
891       col = aj[j]+cstart;
892 
893       key = row*Nbs + col + 1;
894       h1  = HASH(size,key,tmp);
895       for (k=0; k<size; k++){
896         if (!HT[(h1+k)%size]) {
897           HT[(h1+k)%size] = key;
898           HD[(h1+k)%size] = a->a + j*bs2;
899           break;
900 #if defined(PETSC_USE_INFO)
901         } else {
902           ct++;
903 #endif
904         }
905       }
906 #if defined(PETSC_USE_INFO)
907       if (k> max) max = k;
908 #endif
909     }
910   }
911   /* Loop Over B */
912   for (i=0; i<b->mbs; i++) {
913     for (j=bi[i]; j<bi[i+1]; j++) {
914       row = i+rstart;
915       col = garray[bj[j]];
916       key = row*Nbs + col + 1;
917       h1  = HASH(size,key,tmp);
918       for (k=0; k<size; k++){
919         if (!HT[(h1+k)%size]) {
920           HT[(h1+k)%size] = key;
921           HD[(h1+k)%size] = b->a + j*bs2;
922           break;
923 #if defined(PETSC_USE_INFO)
924         } else {
925           ct++;
926 #endif
927         }
928       }
929 #if defined(PETSC_USE_INFO)
930       if (k> max) max = k;
931 #endif
932     }
933   }
934 
935   /* Print Summary */
936 #if defined(PETSC_USE_INFO)
937   for (i=0,j=0; i<size; i++) {
938     if (HT[i]) {j++;}
939   }
940   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
941 #endif
942   PetscFunctionReturn(0);
943 }
944 
945 #undef __FUNCT__
946 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
947 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
948 {
949   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
950   PetscErrorCode ierr;
951   PetscInt       nstash,reallocs;
952   InsertMode     addv;
953 
954   PetscFunctionBegin;
955   if (baij->donotstash) {
956     PetscFunctionReturn(0);
957   }
958 
959   /* make sure all processors are either in INSERTMODE or ADDMODE */
960   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
961   if (addv == (ADD_VALUES|INSERT_VALUES)) {
962     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
963   }
964   mat->insertmode = addv; /* in case this processor had no cache */
965 
966   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);CHKERRQ(ierr);
967   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
968   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
969   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
970   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
971   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
972   PetscFunctionReturn(0);
973 }
974 
975 #undef __FUNCT__
976 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
977 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
978 {
979   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
980   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
981   PetscErrorCode ierr;
982   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
983   PetscInt       *row,*col,other_disassembled;
984   PetscTruth     r1,r2,r3;
985   MatScalar      *val;
986   InsertMode     addv = mat->insertmode;
987   PetscMPIInt    n;
988 
989   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
990   PetscFunctionBegin;
991   if (!baij->donotstash) {
992     while (1) {
993       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
994       if (!flg) break;
995 
996       for (i=0; i<n;) {
997         /* Now identify the consecutive vals belonging to the same row */
998         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
999         if (j < n) ncols = j-i;
1000         else       ncols = n-i;
1001         /* Now assemble all these values with a single function call */
1002         ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
1003         i = j;
1004       }
1005     }
1006     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
1007     /* Now process the block-stash. Since the values are stashed column-oriented,
1008        set the roworiented flag to column oriented, and after MatSetValues()
1009        restore the original flags */
1010     r1 = baij->roworiented;
1011     r2 = a->roworiented;
1012     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
1013     baij->roworiented = PETSC_FALSE;
1014     a->roworiented    = PETSC_FALSE;
1015     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
1016     while (1) {
1017       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1018       if (!flg) break;
1019 
1020       for (i=0; i<n;) {
1021         /* Now identify the consecutive vals belonging to the same row */
1022         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1023         if (j < n) ncols = j-i;
1024         else       ncols = n-i;
1025         ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
1026         i = j;
1027       }
1028     }
1029     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
1030     baij->roworiented = r1;
1031     a->roworiented    = r2;
1032     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
1033   }
1034 
1035   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
1036   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
1037 
1038   /* determine if any processor has disassembled, if so we must
1039      also disassemble ourselfs, in order that we may reassemble. */
1040   /*
1041      if nonzero structure of submatrix B cannot change then we know that
1042      no processor disassembled thus we can skip this stuff
1043   */
1044   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
1045     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
1046     if (mat->was_assembled && !other_disassembled) {
1047       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
1048     }
1049   }
1050 
1051   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1052     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
1053   }
1054   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
1055   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1056   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1057 
1058 #if defined(PETSC_USE_INFO)
1059   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1060     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
1061     baij->ht_total_ct  = 0;
1062     baij->ht_insert_ct = 0;
1063   }
1064 #endif
1065   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1066     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1067     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1068     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1069   }
1070 
1071   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1072   baij->rowvalues = 0;
1073   PetscFunctionReturn(0);
1074 }
1075 
1076 #undef __FUNCT__
1077 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
1078 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1079 {
1080   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
1081   PetscErrorCode    ierr;
1082   PetscMPIInt       size = baij->size,rank = baij->rank;
1083   PetscInt          bs = mat->rmap.bs;
1084   PetscTruth        iascii,isdraw;
1085   PetscViewer       sviewer;
1086   PetscViewerFormat format;
1087 
1088   PetscFunctionBegin;
1089   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1090   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1091   if (iascii) {
1092     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1093     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1094       MatInfo info;
1095       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1096       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1097       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
1098               rank,mat->rmap.N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
1099               mat->rmap.bs,(PetscInt)info.memory);CHKERRQ(ierr);
1100       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1101       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
1102       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1103       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
1104       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1105       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1106       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1107       PetscFunctionReturn(0);
1108     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1109       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1110       PetscFunctionReturn(0);
1111     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1112       PetscFunctionReturn(0);
1113     }
1114   }
1115 
1116   if (isdraw) {
1117     PetscDraw       draw;
1118     PetscTruth isnull;
1119     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1120     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1121   }
1122 
1123   if (size == 1) {
1124     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1125     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1126   } else {
1127     /* assemble the entire matrix onto first processor. */
1128     Mat         A;
1129     Mat_SeqBAIJ *Aloc;
1130     PetscInt    M = mat->rmap.N,N = mat->cmap.N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1131     MatScalar   *a;
1132 
1133     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1134     /* Perhaps this should be the type of mat? */
1135     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1136     if (!rank) {
1137       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1138     } else {
1139       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1140     }
1141     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1142     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1143     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1144 
1145     /* copy over the A part */
1146     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1147     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1148     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1149 
1150     for (i=0; i<mbs; i++) {
1151       rvals[0] = bs*(baij->rstartbs + i);
1152       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1153       for (j=ai[i]; j<ai[i+1]; j++) {
1154         col = (baij->cstartbs+aj[j])*bs;
1155         for (k=0; k<bs; k++) {
1156           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1157           col++; a += bs;
1158         }
1159       }
1160     }
1161     /* copy over the B part */
1162     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1163     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1164     for (i=0; i<mbs; i++) {
1165       rvals[0] = bs*(baij->rstartbs + i);
1166       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1167       for (j=ai[i]; j<ai[i+1]; j++) {
1168         col = baij->garray[aj[j]]*bs;
1169         for (k=0; k<bs; k++) {
1170           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1171           col++; a += bs;
1172         }
1173       }
1174     }
1175     ierr = PetscFree(rvals);CHKERRQ(ierr);
1176     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1177     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1178     /*
1179        Everyone has to call to draw the matrix since the graphics waits are
1180        synchronized across all processors that share the PetscDraw object
1181     */
1182     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1183     if (!rank) {
1184       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1185       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1186     }
1187     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1188     ierr = MatDestroy(A);CHKERRQ(ierr);
1189   }
1190   PetscFunctionReturn(0);
1191 }
1192 
1193 #undef __FUNCT__
1194 #define __FUNCT__ "MatView_MPIBAIJ"
1195 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1196 {
1197   PetscErrorCode ierr;
1198   PetscTruth     iascii,isdraw,issocket,isbinary;
1199 
1200   PetscFunctionBegin;
1201   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1202   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1203   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1204   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1205   if (iascii || isdraw || issocket || isbinary) {
1206     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1207   } else {
1208     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1209   }
1210   PetscFunctionReturn(0);
1211 }
1212 
1213 #undef __FUNCT__
1214 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1215 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1216 {
1217   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1218   PetscErrorCode ierr;
1219 
1220   PetscFunctionBegin;
1221 #if defined(PETSC_USE_LOG)
1222   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap.N,mat->cmap.N);
1223 #endif
1224   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1225   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1226   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1227   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1228 #if defined (PETSC_USE_CTABLE)
1229   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
1230 #else
1231   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1232 #endif
1233   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1234   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1235   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1236   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1237   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1238   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
1239 #if defined(PETSC_USE_MAT_SINGLE)
1240   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
1241 #endif
1242   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1243   ierr = PetscFree(baij);CHKERRQ(ierr);
1244 
1245   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1246   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1247   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1248   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1249   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1250   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1251   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1252   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1253   PetscFunctionReturn(0);
1254 }
1255 
1256 #undef __FUNCT__
1257 #define __FUNCT__ "MatMult_MPIBAIJ"
1258 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1259 {
1260   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1261   PetscErrorCode ierr;
1262   PetscInt       nt;
1263 
1264   PetscFunctionBegin;
1265   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1266   if (nt != A->cmap.n) {
1267     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1268   }
1269   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1270   if (nt != A->rmap.n) {
1271     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1272   }
1273   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1274   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1275   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1276   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1277   PetscFunctionReturn(0);
1278 }
1279 
1280 #undef __FUNCT__
1281 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1282 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1283 {
1284   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1285   PetscErrorCode ierr;
1286 
1287   PetscFunctionBegin;
1288   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1289   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1290   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1291   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1292   PetscFunctionReturn(0);
1293 }
1294 
1295 #undef __FUNCT__
1296 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1297 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1298 {
1299   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1300   PetscErrorCode ierr;
1301   PetscTruth     merged;
1302 
1303   PetscFunctionBegin;
1304   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1305   /* do nondiagonal part */
1306   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1307   if (!merged) {
1308     /* send it on its way */
1309     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1310     /* do local part */
1311     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1312     /* receive remote parts: note this assumes the values are not actually */
1313     /* inserted in yy until the next line */
1314     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1315   } else {
1316     /* do local part */
1317     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1318     /* send it on its way */
1319     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1320     /* values actually were received in the Begin() but we need to call this nop */
1321     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1322   }
1323   PetscFunctionReturn(0);
1324 }
1325 
1326 #undef __FUNCT__
1327 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1328 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1329 {
1330   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1331   PetscErrorCode ierr;
1332 
1333   PetscFunctionBegin;
1334   /* do nondiagonal part */
1335   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1336   /* send it on its way */
1337   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1338   /* do local part */
1339   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1340   /* receive remote parts: note this assumes the values are not actually */
1341   /* inserted in yy until the next line, which is true for my implementation*/
1342   /* but is not perhaps always true. */
1343   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1344   PetscFunctionReturn(0);
1345 }
1346 
1347 /*
1348   This only works correctly for square matrices where the subblock A->A is the
1349    diagonal block
1350 */
1351 #undef __FUNCT__
1352 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1353 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1354 {
1355   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1356   PetscErrorCode ierr;
1357 
1358   PetscFunctionBegin;
1359   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1360   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1361   PetscFunctionReturn(0);
1362 }
1363 
1364 #undef __FUNCT__
1365 #define __FUNCT__ "MatScale_MPIBAIJ"
1366 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1367 {
1368   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1369   PetscErrorCode ierr;
1370 
1371   PetscFunctionBegin;
1372   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1373   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1374   PetscFunctionReturn(0);
1375 }
1376 
1377 #undef __FUNCT__
1378 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1379 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1380 {
1381   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1382   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1383   PetscErrorCode ierr;
1384   PetscInt       bs = matin->rmap.bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1385   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap.rstart,brend = matin->rmap.rend;
1386   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1387 
1388   PetscFunctionBegin;
1389   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1390   mat->getrowactive = PETSC_TRUE;
1391 
1392   if (!mat->rowvalues && (idx || v)) {
1393     /*
1394         allocate enough space to hold information from the longest row.
1395     */
1396     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1397     PetscInt     max = 1,mbs = mat->mbs,tmp;
1398     for (i=0; i<mbs; i++) {
1399       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1400       if (max < tmp) { max = tmp; }
1401     }
1402     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1403     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1404   }
1405 
1406   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1407   lrow = row - brstart;
1408 
1409   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1410   if (!v)   {pvA = 0; pvB = 0;}
1411   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1412   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1413   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1414   nztot = nzA + nzB;
1415 
1416   cmap  = mat->garray;
1417   if (v  || idx) {
1418     if (nztot) {
1419       /* Sort by increasing column numbers, assuming A and B already sorted */
1420       PetscInt imark = -1;
1421       if (v) {
1422         *v = v_p = mat->rowvalues;
1423         for (i=0; i<nzB; i++) {
1424           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1425           else break;
1426         }
1427         imark = i;
1428         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1429         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1430       }
1431       if (idx) {
1432         *idx = idx_p = mat->rowindices;
1433         if (imark > -1) {
1434           for (i=0; i<imark; i++) {
1435             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1436           }
1437         } else {
1438           for (i=0; i<nzB; i++) {
1439             if (cmap[cworkB[i]/bs] < cstart)
1440               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1441             else break;
1442           }
1443           imark = i;
1444         }
1445         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1446         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1447       }
1448     } else {
1449       if (idx) *idx = 0;
1450       if (v)   *v   = 0;
1451     }
1452   }
1453   *nz = nztot;
1454   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1455   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1456   PetscFunctionReturn(0);
1457 }
1458 
1459 #undef __FUNCT__
1460 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1461 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1462 {
1463   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1464 
1465   PetscFunctionBegin;
1466   if (!baij->getrowactive) {
1467     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1468   }
1469   baij->getrowactive = PETSC_FALSE;
1470   PetscFunctionReturn(0);
1471 }
1472 
1473 #undef __FUNCT__
1474 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1475 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1476 {
1477   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1478   PetscErrorCode ierr;
1479 
1480   PetscFunctionBegin;
1481   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1482   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1483   PetscFunctionReturn(0);
1484 }
1485 
1486 #undef __FUNCT__
1487 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1488 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1489 {
1490   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1491   Mat            A = a->A,B = a->B;
1492   PetscErrorCode ierr;
1493   PetscReal      isend[5],irecv[5];
1494 
1495   PetscFunctionBegin;
1496   info->block_size     = (PetscReal)matin->rmap.bs;
1497   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1498   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1499   isend[3] = info->memory;  isend[4] = info->mallocs;
1500   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1501   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1502   isend[3] += info->memory;  isend[4] += info->mallocs;
1503   if (flag == MAT_LOCAL) {
1504     info->nz_used      = isend[0];
1505     info->nz_allocated = isend[1];
1506     info->nz_unneeded  = isend[2];
1507     info->memory       = isend[3];
1508     info->mallocs      = isend[4];
1509   } else if (flag == MAT_GLOBAL_MAX) {
1510     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1511     info->nz_used      = irecv[0];
1512     info->nz_allocated = irecv[1];
1513     info->nz_unneeded  = irecv[2];
1514     info->memory       = irecv[3];
1515     info->mallocs      = irecv[4];
1516   } else if (flag == MAT_GLOBAL_SUM) {
1517     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1518     info->nz_used      = irecv[0];
1519     info->nz_allocated = irecv[1];
1520     info->nz_unneeded  = irecv[2];
1521     info->memory       = irecv[3];
1522     info->mallocs      = irecv[4];
1523   } else {
1524     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1525   }
1526   info->rows_global       = (PetscReal)A->rmap.N;
1527   info->columns_global    = (PetscReal)A->cmap.N;
1528   info->rows_local        = (PetscReal)A->rmap.N;
1529   info->columns_local     = (PetscReal)A->cmap.N;
1530   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1531   info->fill_ratio_needed = 0;
1532   info->factor_mallocs    = 0;
1533   PetscFunctionReturn(0);
1534 }
1535 
1536 #undef __FUNCT__
1537 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1538 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscTruth flg)
1539 {
1540   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1541   PetscErrorCode ierr;
1542 
1543   PetscFunctionBegin;
1544   switch (op) {
1545   case MAT_NEW_NONZERO_LOCATIONS:
1546   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1547   case MAT_KEEP_ZEROED_ROWS:
1548   case MAT_NEW_NONZERO_LOCATION_ERR:
1549     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1550     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1551     break;
1552   case MAT_ROW_ORIENTED:
1553     a->roworiented = flg;
1554     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1555     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1556     break;
1557   case MAT_NEW_DIAGONALS:
1558     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1559     break;
1560   case MAT_IGNORE_OFF_PROC_ENTRIES:
1561     a->donotstash = flg;
1562     break;
1563   case MAT_USE_HASH_TABLE:
1564     a->ht_flag = flg;
1565     break;
1566   case MAT_SYMMETRIC:
1567   case MAT_STRUCTURALLY_SYMMETRIC:
1568   case MAT_HERMITIAN:
1569   case MAT_SYMMETRY_ETERNAL:
1570     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1571     break;
1572   default:
1573     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1574   }
1575   PetscFunctionReturn(0);
1576 }
1577 
1578 #undef __FUNCT__
1579 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1580 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1581 {
1582   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1583   Mat_SeqBAIJ    *Aloc;
1584   Mat            B;
1585   PetscErrorCode ierr;
1586   PetscInt       M=A->rmap.N,N=A->cmap.N,*ai,*aj,i,*rvals,j,k,col;
1587   PetscInt       bs=A->rmap.bs,mbs=baij->mbs;
1588   MatScalar      *a;
1589 
1590   PetscFunctionBegin;
1591   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1592   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1593   ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr);
1594   ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1595   ierr = MatMPIBAIJSetPreallocation(B,A->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1596 
1597   /* copy over the A part */
1598   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1599   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1600   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1601 
1602   for (i=0; i<mbs; i++) {
1603     rvals[0] = bs*(baij->rstartbs + i);
1604     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1605     for (j=ai[i]; j<ai[i+1]; j++) {
1606       col = (baij->cstartbs+aj[j])*bs;
1607       for (k=0; k<bs; k++) {
1608         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1609         col++; a += bs;
1610       }
1611     }
1612   }
1613   /* copy over the B part */
1614   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1615   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1616   for (i=0; i<mbs; i++) {
1617     rvals[0] = bs*(baij->rstartbs + i);
1618     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1619     for (j=ai[i]; j<ai[i+1]; j++) {
1620       col = baij->garray[aj[j]]*bs;
1621       for (k=0; k<bs; k++) {
1622         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1623         col++; a += bs;
1624       }
1625     }
1626   }
1627   ierr = PetscFree(rvals);CHKERRQ(ierr);
1628   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1629   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1630 
1631   if (matout) {
1632     *matout = B;
1633   } else {
1634     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1635   }
1636   PetscFunctionReturn(0);
1637 }
1638 
1639 #undef __FUNCT__
1640 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1641 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1642 {
1643   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1644   Mat            a = baij->A,b = baij->B;
1645   PetscErrorCode ierr;
1646   PetscInt       s1,s2,s3;
1647 
1648   PetscFunctionBegin;
1649   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1650   if (rr) {
1651     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1652     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1653     /* Overlap communication with computation. */
1654     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1655   }
1656   if (ll) {
1657     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1658     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1659     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1660   }
1661   /* scale  the diagonal block */
1662   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1663 
1664   if (rr) {
1665     /* Do a scatter end and then right scale the off-diagonal block */
1666     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1667     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1668   }
1669 
1670   PetscFunctionReturn(0);
1671 }
1672 
1673 #undef __FUNCT__
1674 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1675 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
1676 {
1677   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1678   PetscErrorCode ierr;
1679   PetscMPIInt    imdex,size = l->size,n,rank = l->rank;
1680   PetscInt       i,*owners = A->rmap.range;
1681   PetscInt       *nprocs,j,idx,nsends,row;
1682   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
1683   PetscInt       *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1684   PetscInt       *lens,*lrows,*values,rstart_bs=A->rmap.rstart;
1685   MPI_Comm       comm = ((PetscObject)A)->comm;
1686   MPI_Request    *send_waits,*recv_waits;
1687   MPI_Status     recv_status,*send_status;
1688 #if defined(PETSC_DEBUG)
1689   PetscTruth     found = PETSC_FALSE;
1690 #endif
1691 
1692   PetscFunctionBegin;
1693   /*  first count number of contributors to each processor */
1694   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1695   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1696   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1697   j = 0;
1698   for (i=0; i<N; i++) {
1699     if (lastidx > (idx = rows[i])) j = 0;
1700     lastidx = idx;
1701     for (; j<size; j++) {
1702       if (idx >= owners[j] && idx < owners[j+1]) {
1703         nprocs[2*j]++;
1704         nprocs[2*j+1] = 1;
1705         owner[i] = j;
1706 #if defined(PETSC_DEBUG)
1707         found = PETSC_TRUE;
1708 #endif
1709         break;
1710       }
1711     }
1712 #if defined(PETSC_DEBUG)
1713     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1714     found = PETSC_FALSE;
1715 #endif
1716   }
1717   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1718 
1719   /* inform other processors of number of messages and max length*/
1720   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1721 
1722   /* post receives:   */
1723   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1724   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1725   for (i=0; i<nrecvs; i++) {
1726     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1727   }
1728 
1729   /* do sends:
1730      1) starts[i] gives the starting index in svalues for stuff going to
1731      the ith processor
1732   */
1733   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1734   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1735   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1736   starts[0]  = 0;
1737   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1738   for (i=0; i<N; i++) {
1739     svalues[starts[owner[i]]++] = rows[i];
1740   }
1741 
1742   starts[0] = 0;
1743   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1744   count = 0;
1745   for (i=0; i<size; i++) {
1746     if (nprocs[2*i+1]) {
1747       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1748     }
1749   }
1750   ierr = PetscFree(starts);CHKERRQ(ierr);
1751 
1752   base = owners[rank];
1753 
1754   /*  wait on receives */
1755   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
1756   source = lens + nrecvs;
1757   count  = nrecvs; slen = 0;
1758   while (count) {
1759     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1760     /* unpack receives into our local space */
1761     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1762     source[imdex]  = recv_status.MPI_SOURCE;
1763     lens[imdex]    = n;
1764     slen          += n;
1765     count--;
1766   }
1767   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1768 
1769   /* move the data into the send scatter */
1770   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1771   count = 0;
1772   for (i=0; i<nrecvs; i++) {
1773     values = rvalues + i*nmax;
1774     for (j=0; j<lens[i]; j++) {
1775       lrows[count++] = values[j] - base;
1776     }
1777   }
1778   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1779   ierr = PetscFree(lens);CHKERRQ(ierr);
1780   ierr = PetscFree(owner);CHKERRQ(ierr);
1781   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1782 
1783   /* actually zap the local rows */
1784   /*
1785         Zero the required rows. If the "diagonal block" of the matrix
1786      is square and the user wishes to set the diagonal we use separate
1787      code so that MatSetValues() is not called for each diagonal allocating
1788      new memory, thus calling lots of mallocs and slowing things down.
1789 
1790        Contributed by: Matthew Knepley
1791   */
1792   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1793   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr);
1794   if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
1795     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr);
1796   } else if (diag != 0.0) {
1797     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1798     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1799       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1800 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1801     }
1802     for (i=0; i<slen; i++) {
1803       row  = lrows[i] + rstart_bs;
1804       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1805     }
1806     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1807     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1808   } else {
1809     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1810   }
1811 
1812   ierr = PetscFree(lrows);CHKERRQ(ierr);
1813 
1814   /* wait on sends */
1815   if (nsends) {
1816     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1817     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1818     ierr = PetscFree(send_status);CHKERRQ(ierr);
1819   }
1820   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1821   ierr = PetscFree(svalues);CHKERRQ(ierr);
1822 
1823   PetscFunctionReturn(0);
1824 }
1825 
1826 #undef __FUNCT__
1827 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1828 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1829 {
1830   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1831   PetscErrorCode ierr;
1832 
1833   PetscFunctionBegin;
1834   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1835   PetscFunctionReturn(0);
1836 }
1837 
1838 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1839 
1840 #undef __FUNCT__
1841 #define __FUNCT__ "MatEqual_MPIBAIJ"
1842 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1843 {
1844   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1845   Mat            a,b,c,d;
1846   PetscTruth     flg;
1847   PetscErrorCode ierr;
1848 
1849   PetscFunctionBegin;
1850   a = matA->A; b = matA->B;
1851   c = matB->A; d = matB->B;
1852 
1853   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1854   if (flg) {
1855     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1856   }
1857   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1858   PetscFunctionReturn(0);
1859 }
1860 
1861 #undef __FUNCT__
1862 #define __FUNCT__ "MatCopy_MPIBAIJ"
1863 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1864 {
1865   PetscErrorCode ierr;
1866   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1867   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1868 
1869   PetscFunctionBegin;
1870   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1871   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1872     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1873   } else {
1874     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1875     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1876   }
1877   PetscFunctionReturn(0);
1878 }
1879 
1880 #undef __FUNCT__
1881 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1882 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1883 {
1884   PetscErrorCode ierr;
1885 
1886   PetscFunctionBegin;
1887   ierr =  MatMPIBAIJSetPreallocation(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1888   PetscFunctionReturn(0);
1889 }
1890 
1891 #include "petscblaslapack.h"
1892 #undef __FUNCT__
1893 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1894 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1895 {
1896   PetscErrorCode ierr;
1897   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1898   PetscBLASInt   bnz,one=1;
1899   Mat_SeqBAIJ    *x,*y;
1900 
1901   PetscFunctionBegin;
1902   if (str == SAME_NONZERO_PATTERN) {
1903     PetscScalar alpha = a;
1904     x = (Mat_SeqBAIJ *)xx->A->data;
1905     y = (Mat_SeqBAIJ *)yy->A->data;
1906     bnz = (PetscBLASInt)x->nz;
1907     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1908     x = (Mat_SeqBAIJ *)xx->B->data;
1909     y = (Mat_SeqBAIJ *)yy->B->data;
1910     bnz = (PetscBLASInt)x->nz;
1911     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1912   } else {
1913     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1914   }
1915   PetscFunctionReturn(0);
1916 }
1917 
1918 #undef __FUNCT__
1919 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1920 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1921 {
1922   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1923   PetscErrorCode ierr;
1924 
1925   PetscFunctionBegin;
1926   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1927   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1928   PetscFunctionReturn(0);
1929 }
1930 
1931 #undef __FUNCT__
1932 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
1933 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1934 {
1935   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1936   PetscErrorCode ierr;
1937 
1938   PetscFunctionBegin;
1939   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1940   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1941   PetscFunctionReturn(0);
1942 }
1943 
1944 /* -------------------------------------------------------------------*/
1945 static struct _MatOps MatOps_Values = {
1946        MatSetValues_MPIBAIJ,
1947        MatGetRow_MPIBAIJ,
1948        MatRestoreRow_MPIBAIJ,
1949        MatMult_MPIBAIJ,
1950 /* 4*/ MatMultAdd_MPIBAIJ,
1951        MatMultTranspose_MPIBAIJ,
1952        MatMultTransposeAdd_MPIBAIJ,
1953        0,
1954        0,
1955        0,
1956 /*10*/ 0,
1957        0,
1958        0,
1959        0,
1960        MatTranspose_MPIBAIJ,
1961 /*15*/ MatGetInfo_MPIBAIJ,
1962        MatEqual_MPIBAIJ,
1963        MatGetDiagonal_MPIBAIJ,
1964        MatDiagonalScale_MPIBAIJ,
1965        MatNorm_MPIBAIJ,
1966 /*20*/ MatAssemblyBegin_MPIBAIJ,
1967        MatAssemblyEnd_MPIBAIJ,
1968        0,
1969        MatSetOption_MPIBAIJ,
1970        MatZeroEntries_MPIBAIJ,
1971 /*25*/ MatZeroRows_MPIBAIJ,
1972        0,
1973        0,
1974        0,
1975        0,
1976 /*30*/ MatSetUpPreallocation_MPIBAIJ,
1977        0,
1978        0,
1979        0,
1980        0,
1981 /*35*/ MatDuplicate_MPIBAIJ,
1982        0,
1983        0,
1984        0,
1985        0,
1986 /*40*/ MatAXPY_MPIBAIJ,
1987        MatGetSubMatrices_MPIBAIJ,
1988        MatIncreaseOverlap_MPIBAIJ,
1989        MatGetValues_MPIBAIJ,
1990        MatCopy_MPIBAIJ,
1991 /*45*/ 0,
1992        MatScale_MPIBAIJ,
1993        0,
1994        0,
1995        0,
1996 /*50*/ 0,
1997        0,
1998        0,
1999        0,
2000        0,
2001 /*55*/ 0,
2002        0,
2003        MatSetUnfactored_MPIBAIJ,
2004        0,
2005        MatSetValuesBlocked_MPIBAIJ,
2006 /*60*/ 0,
2007        MatDestroy_MPIBAIJ,
2008        MatView_MPIBAIJ,
2009        0,
2010        0,
2011 /*65*/ 0,
2012        0,
2013        0,
2014        0,
2015        0,
2016 /*70*/ MatGetRowMaxAbs_MPIBAIJ,
2017        0,
2018        0,
2019        0,
2020        0,
2021 /*75*/ 0,
2022        0,
2023        0,
2024        0,
2025        0,
2026 /*80*/ 0,
2027        0,
2028        0,
2029        0,
2030        MatLoad_MPIBAIJ,
2031 /*85*/ 0,
2032        0,
2033        0,
2034        0,
2035        0,
2036 /*90*/ 0,
2037        0,
2038        0,
2039        0,
2040        0,
2041 /*95*/ 0,
2042        0,
2043        0,
2044        0,
2045        0,
2046 /*100*/0,
2047        0,
2048        0,
2049        0,
2050        0,
2051 /*105*/0,
2052        MatRealPart_MPIBAIJ,
2053        MatImaginaryPart_MPIBAIJ};
2054 
2055 
2056 EXTERN_C_BEGIN
2057 #undef __FUNCT__
2058 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2059 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
2060 {
2061   PetscFunctionBegin;
2062   *a      = ((Mat_MPIBAIJ *)A->data)->A;
2063   *iscopy = PETSC_FALSE;
2064   PetscFunctionReturn(0);
2065 }
2066 EXTERN_C_END
2067 
2068 EXTERN_C_BEGIN
2069 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2070 EXTERN_C_END
2071 
2072 #undef __FUNCT__
2073 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2074 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
2075 {
2076   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)B->data;
2077   PetscInt       m = B->rmap.n/bs,cstart = baij->cstartbs, cend = baij->cendbs,j,nnz,i,d;
2078   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = baij->rstartbs,ii;
2079   const PetscInt *JJ;
2080   PetscScalar    *values;
2081   PetscErrorCode ierr;
2082 
2083   PetscFunctionBegin;
2084 #if defined(PETSC_OPT_g)
2085   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"Ii[0] must be 0 it is %D",Ii[0]);
2086 #endif
2087   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2088   o_nnz = d_nnz + m;
2089 
2090   for (i=0; i<m; i++) {
2091     nnz     = Ii[i+1]- Ii[i];
2092     JJ      = J + Ii[i];
2093     nnz_max = PetscMax(nnz_max,nnz);
2094 #if defined(PETSC_OPT_g)
2095     if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2096 #endif
2097     for (j=0; j<nnz; j++) {
2098       if (*JJ >= cstart) break;
2099       JJ++;
2100     }
2101     d = 0;
2102     for (; j<nnz; j++) {
2103       if (*JJ++ >= cend) break;
2104       d++;
2105     }
2106     d_nnz[i] = d;
2107     o_nnz[i] = nnz - d;
2108   }
2109   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2110   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2111 
2112   if (v) values = (PetscScalar*)v;
2113   else {
2114     ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2115     ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2116   }
2117 
2118   for (i=0; i<m; i++) {
2119     ii   = i + rstart;
2120     nnz  = Ii[i+1]- Ii[i];
2121     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2122   }
2123   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2124   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2125 
2126   if (!v) {
2127     ierr = PetscFree(values);CHKERRQ(ierr);
2128   }
2129   PetscFunctionReturn(0);
2130 }
2131 
2132 #undef __FUNCT__
2133 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2134 /*@C
2135    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2136    (the default parallel PETSc format).
2137 
2138    Collective on MPI_Comm
2139 
2140    Input Parameters:
2141 +  A - the matrix
2142 .  i - the indices into j for the start of each local row (starts with zero)
2143 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2144 -  v - optional values in the matrix
2145 
2146    Level: developer
2147 
2148 .keywords: matrix, aij, compressed row, sparse, parallel
2149 
2150 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2151 @*/
2152 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2153 {
2154   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
2155 
2156   PetscFunctionBegin;
2157   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2158   if (f) {
2159     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
2160   }
2161   PetscFunctionReturn(0);
2162 }
2163 
2164 EXTERN_C_BEGIN
2165 #undef __FUNCT__
2166 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2167 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2168 {
2169   Mat_MPIBAIJ    *b;
2170   PetscErrorCode ierr;
2171   PetscInt       i;
2172 
2173   PetscFunctionBegin;
2174   B->preallocated = PETSC_TRUE;
2175   ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr);
2176     ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2177   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2178 
2179   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2180   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2181   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2182   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2183   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2184 
2185   B->rmap.bs  = bs;
2186   B->cmap.bs  = bs;
2187   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2188   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2189 
2190   if (d_nnz) {
2191     for (i=0; i<B->rmap.n/bs; i++) {
2192       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2193     }
2194   }
2195   if (o_nnz) {
2196     for (i=0; i<B->rmap.n/bs; i++) {
2197       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2198     }
2199   }
2200 
2201   b = (Mat_MPIBAIJ*)B->data;
2202   b->bs2 = bs*bs;
2203   b->mbs = B->rmap.n/bs;
2204   b->nbs = B->cmap.n/bs;
2205   b->Mbs = B->rmap.N/bs;
2206   b->Nbs = B->cmap.N/bs;
2207 
2208   for (i=0; i<=b->size; i++) {
2209     b->rangebs[i] = B->rmap.range[i]/bs;
2210   }
2211   b->rstartbs = B->rmap.rstart/bs;
2212   b->rendbs   = B->rmap.rend/bs;
2213   b->cstartbs = B->cmap.rstart/bs;
2214   b->cendbs   = B->cmap.rend/bs;
2215 
2216   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2217   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
2218   ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2219   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2220   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2221   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2222   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
2223   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2224   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2225   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2226 
2227   ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
2228 
2229   PetscFunctionReturn(0);
2230 }
2231 EXTERN_C_END
2232 
2233 EXTERN_C_BEGIN
2234 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2235 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2236 EXTERN_C_END
2237 
2238 /*MC
2239    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2240 
2241    Options Database Keys:
2242 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2243 . -mat_block_size <bs> - set the blocksize used to store the matrix
2244 - -mat_use_hash_table <fact>
2245 
2246   Level: beginner
2247 
2248 .seealso: MatCreateMPIBAIJ
2249 M*/
2250 
2251 EXTERN_C_BEGIN
2252 #undef __FUNCT__
2253 #define __FUNCT__ "MatCreate_MPIBAIJ"
2254 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B)
2255 {
2256   Mat_MPIBAIJ    *b;
2257   PetscErrorCode ierr;
2258   PetscTruth     flg;
2259 
2260   PetscFunctionBegin;
2261   ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
2262   B->data = (void*)b;
2263 
2264 
2265   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2266   B->mapping    = 0;
2267   B->factor     = 0;
2268   B->assembled  = PETSC_FALSE;
2269 
2270   B->insertmode = NOT_SET_VALUES;
2271   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
2272   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
2273 
2274   /* build local table of row and column ownerships */
2275   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
2276 
2277   /* build cache for off array entries formed */
2278   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
2279   b->donotstash  = PETSC_FALSE;
2280   b->colmap      = PETSC_NULL;
2281   b->garray      = PETSC_NULL;
2282   b->roworiented = PETSC_TRUE;
2283 
2284 #if defined(PETSC_USE_MAT_SINGLE)
2285   /* stuff for MatSetValues_XXX in single precision */
2286   b->setvalueslen     = 0;
2287   b->setvaluescopy    = PETSC_NULL;
2288 #endif
2289 
2290   /* stuff used in block assembly */
2291   b->barray       = 0;
2292 
2293   /* stuff used for matrix vector multiply */
2294   b->lvec         = 0;
2295   b->Mvctx        = 0;
2296 
2297   /* stuff for MatGetRow() */
2298   b->rowindices   = 0;
2299   b->rowvalues    = 0;
2300   b->getrowactive = PETSC_FALSE;
2301 
2302   /* hash table stuff */
2303   b->ht           = 0;
2304   b->hd           = 0;
2305   b->ht_size      = 0;
2306   b->ht_flag      = PETSC_FALSE;
2307   b->ht_fact      = 0;
2308   b->ht_total_ct  = 0;
2309   b->ht_insert_ct = 0;
2310 
2311   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
2312     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2313     if (flg) {
2314       PetscReal fact = 1.39;
2315       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2316       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
2317       if (fact <= 1.0) fact = 1.39;
2318       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2319       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2320     }
2321   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2322 
2323   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2324                                      "MatStoreValues_MPIBAIJ",
2325                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2326   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2327                                      "MatRetrieveValues_MPIBAIJ",
2328                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2329   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2330                                      "MatGetDiagonalBlock_MPIBAIJ",
2331                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2332   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
2333                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
2334                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2335   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
2336 				     "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
2337 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
2338   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
2339                                      "MatDiagonalScaleLocal_MPIBAIJ",
2340                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
2341   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
2342                                      "MatSetHashTableFactor_MPIBAIJ",
2343                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2344   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
2345   PetscFunctionReturn(0);
2346 }
2347 EXTERN_C_END
2348 
2349 /*MC
2350    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2351 
2352    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2353    and MATMPIBAIJ otherwise.
2354 
2355    Options Database Keys:
2356 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2357 
2358   Level: beginner
2359 
2360 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2361 M*/
2362 
2363 EXTERN_C_BEGIN
2364 #undef __FUNCT__
2365 #define __FUNCT__ "MatCreate_BAIJ"
2366 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A)
2367 {
2368   PetscErrorCode ierr;
2369   PetscMPIInt    size;
2370 
2371   PetscFunctionBegin;
2372   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2373   if (size == 1) {
2374     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
2375   } else {
2376     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
2377   }
2378   PetscFunctionReturn(0);
2379 }
2380 EXTERN_C_END
2381 
2382 #undef __FUNCT__
2383 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
2384 /*@C
2385    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
2386    (block compressed row).  For good matrix assembly performance
2387    the user should preallocate the matrix storage by setting the parameters
2388    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2389    performance can be increased by more than a factor of 50.
2390 
2391    Collective on Mat
2392 
2393    Input Parameters:
2394 +  A - the matrix
2395 .  bs   - size of blockk
2396 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2397            submatrix  (same for all local rows)
2398 .  d_nnz - array containing the number of block nonzeros in the various block rows
2399            of the in diagonal portion of the local (possibly different for each block
2400            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2401 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2402            submatrix (same for all local rows).
2403 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2404            off-diagonal portion of the local submatrix (possibly different for
2405            each block row) or PETSC_NULL.
2406 
2407    If the *_nnz parameter is given then the *_nz parameter is ignored
2408 
2409    Options Database Keys:
2410 +   -mat_block_size - size of the blocks to use
2411 -   -mat_use_hash_table <fact>
2412 
2413    Notes:
2414    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2415    than it must be used on all processors that share the object for that argument.
2416 
2417    Storage Information:
2418    For a square global matrix we define each processor's diagonal portion
2419    to be its local rows and the corresponding columns (a square submatrix);
2420    each processor's off-diagonal portion encompasses the remainder of the
2421    local matrix (a rectangular submatrix).
2422 
2423    The user can specify preallocated storage for the diagonal part of
2424    the local submatrix with either d_nz or d_nnz (not both).  Set
2425    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2426    memory allocation.  Likewise, specify preallocated storage for the
2427    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2428 
2429    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2430    the figure below we depict these three local rows and all columns (0-11).
2431 
2432 .vb
2433            0 1 2 3 4 5 6 7 8 9 10 11
2434           -------------------
2435    row 3  |  o o o d d d o o o o o o
2436    row 4  |  o o o d d d o o o o o o
2437    row 5  |  o o o d d d o o o o o o
2438           -------------------
2439 .ve
2440 
2441    Thus, any entries in the d locations are stored in the d (diagonal)
2442    submatrix, and any entries in the o locations are stored in the
2443    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2444    stored simply in the MATSEQBAIJ format for compressed row storage.
2445 
2446    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2447    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2448    In general, for PDE problems in which most nonzeros are near the diagonal,
2449    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2450    or you will get TERRIBLE performance; see the users' manual chapter on
2451    matrices.
2452 
2453    You can call MatGetInfo() to get information on how effective the preallocation was;
2454    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2455    You can also run with the option -info and look for messages with the string
2456    malloc in them to see if additional memory allocation was needed.
2457 
2458    Level: intermediate
2459 
2460 .keywords: matrix, block, aij, compressed row, sparse, parallel
2461 
2462 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
2463 @*/
2464 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2465 {
2466   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2467 
2468   PetscFunctionBegin;
2469   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2470   if (f) {
2471     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2472   }
2473   PetscFunctionReturn(0);
2474 }
2475 
2476 #undef __FUNCT__
2477 #define __FUNCT__ "MatCreateMPIBAIJ"
2478 /*@C
2479    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
2480    (block compressed row).  For good matrix assembly performance
2481    the user should preallocate the matrix storage by setting the parameters
2482    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2483    performance can be increased by more than a factor of 50.
2484 
2485    Collective on MPI_Comm
2486 
2487    Input Parameters:
2488 +  comm - MPI communicator
2489 .  bs   - size of blockk
2490 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2491            This value should be the same as the local size used in creating the
2492            y vector for the matrix-vector product y = Ax.
2493 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2494            This value should be the same as the local size used in creating the
2495            x vector for the matrix-vector product y = Ax.
2496 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2497 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2498 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
2499            submatrix  (same for all local rows)
2500 .  d_nnz - array containing the number of nonzero blocks in the various block rows
2501            of the in diagonal portion of the local (possibly different for each block
2502            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2503 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
2504            submatrix (same for all local rows).
2505 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
2506            off-diagonal portion of the local submatrix (possibly different for
2507            each block row) or PETSC_NULL.
2508 
2509    Output Parameter:
2510 .  A - the matrix
2511 
2512    Options Database Keys:
2513 +   -mat_block_size - size of the blocks to use
2514 -   -mat_use_hash_table <fact>
2515 
2516    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2517    MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely
2518    true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles.
2519    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2520 
2521    Notes:
2522    If the *_nnz parameter is given then the *_nz parameter is ignored
2523 
2524    A nonzero block is any block that as 1 or more nonzeros in it
2525 
2526    The user MUST specify either the local or global matrix dimensions
2527    (possibly both).
2528 
2529    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2530    than it must be used on all processors that share the object for that argument.
2531 
2532    Storage Information:
2533    For a square global matrix we define each processor's diagonal portion
2534    to be its local rows and the corresponding columns (a square submatrix);
2535    each processor's off-diagonal portion encompasses the remainder of the
2536    local matrix (a rectangular submatrix).
2537 
2538    The user can specify preallocated storage for the diagonal part of
2539    the local submatrix with either d_nz or d_nnz (not both).  Set
2540    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2541    memory allocation.  Likewise, specify preallocated storage for the
2542    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2543 
2544    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2545    the figure below we depict these three local rows and all columns (0-11).
2546 
2547 .vb
2548            0 1 2 3 4 5 6 7 8 9 10 11
2549           -------------------
2550    row 3  |  o o o d d d o o o o o o
2551    row 4  |  o o o d d d o o o o o o
2552    row 5  |  o o o d d d o o o o o o
2553           -------------------
2554 .ve
2555 
2556    Thus, any entries in the d locations are stored in the d (diagonal)
2557    submatrix, and any entries in the o locations are stored in the
2558    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2559    stored simply in the MATSEQBAIJ format for compressed row storage.
2560 
2561    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2562    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2563    In general, for PDE problems in which most nonzeros are near the diagonal,
2564    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2565    or you will get TERRIBLE performance; see the users' manual chapter on
2566    matrices.
2567 
2568    Level: intermediate
2569 
2570 .keywords: matrix, block, aij, compressed row, sparse, parallel
2571 
2572 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2573 @*/
2574 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2575 {
2576   PetscErrorCode ierr;
2577   PetscMPIInt    size;
2578 
2579   PetscFunctionBegin;
2580   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2581   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2582   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2583   if (size > 1) {
2584     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
2585     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2586   } else {
2587     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2588     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2589   }
2590   PetscFunctionReturn(0);
2591 }
2592 
2593 #undef __FUNCT__
2594 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
2595 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2596 {
2597   Mat            mat;
2598   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2599   PetscErrorCode ierr;
2600   PetscInt       len=0;
2601 
2602   PetscFunctionBegin;
2603   *newmat       = 0;
2604   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2605   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
2606   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2607   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2608 
2609   mat->factor       = matin->factor;
2610   mat->preallocated = PETSC_TRUE;
2611   mat->assembled    = PETSC_TRUE;
2612   mat->insertmode   = NOT_SET_VALUES;
2613 
2614   a      = (Mat_MPIBAIJ*)mat->data;
2615   mat->rmap.bs  = matin->rmap.bs;
2616   a->bs2   = oldmat->bs2;
2617   a->mbs   = oldmat->mbs;
2618   a->nbs   = oldmat->nbs;
2619   a->Mbs   = oldmat->Mbs;
2620   a->Nbs   = oldmat->Nbs;
2621 
2622   ierr = PetscMapCopy(((PetscObject)matin)->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
2623   ierr = PetscMapCopy(((PetscObject)matin)->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
2624 
2625   a->size         = oldmat->size;
2626   a->rank         = oldmat->rank;
2627   a->donotstash   = oldmat->donotstash;
2628   a->roworiented  = oldmat->roworiented;
2629   a->rowindices   = 0;
2630   a->rowvalues    = 0;
2631   a->getrowactive = PETSC_FALSE;
2632   a->barray       = 0;
2633   a->rstartbs     = oldmat->rstartbs;
2634   a->rendbs       = oldmat->rendbs;
2635   a->cstartbs     = oldmat->cstartbs;
2636   a->cendbs       = oldmat->cendbs;
2637 
2638   /* hash table stuff */
2639   a->ht           = 0;
2640   a->hd           = 0;
2641   a->ht_size      = 0;
2642   a->ht_flag      = oldmat->ht_flag;
2643   a->ht_fact      = oldmat->ht_fact;
2644   a->ht_total_ct  = 0;
2645   a->ht_insert_ct = 0;
2646 
2647   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
2648   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
2649   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap.bs,&mat->bstash);CHKERRQ(ierr);
2650   if (oldmat->colmap) {
2651 #if defined (PETSC_USE_CTABLE)
2652   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2653 #else
2654   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2655   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2656   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2657 #endif
2658   } else a->colmap = 0;
2659 
2660   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2661     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2662     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2663     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2664   } else a->garray = 0;
2665 
2666   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2667   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2668   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2669   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2670 
2671   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2672   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2673   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2674   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2675   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2676   *newmat = mat;
2677 
2678   PetscFunctionReturn(0);
2679 }
2680 
2681 #include "petscsys.h"
2682 
2683 #undef __FUNCT__
2684 #define __FUNCT__ "MatLoad_MPIBAIJ"
2685 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2686 {
2687   Mat            A;
2688   PetscErrorCode ierr;
2689   int            fd;
2690   PetscInt       i,nz,j,rstart,rend;
2691   PetscScalar    *vals,*buf;
2692   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2693   MPI_Status     status;
2694   PetscMPIInt    rank,size,maxnz;
2695   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
2696   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
2697   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
2698   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
2699   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
2700   PetscInt       dcount,kmax,k,nzcount,tmp,mend;
2701 
2702   PetscFunctionBegin;
2703   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
2704     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2705   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2706 
2707   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2708   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2709   if (!rank) {
2710     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2711     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2712     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2713   }
2714 
2715   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2716   M = header[1]; N = header[2];
2717 
2718   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2719 
2720   /*
2721      This code adds extra rows to make sure the number of rows is
2722      divisible by the blocksize
2723   */
2724   Mbs        = M/bs;
2725   extra_rows = bs - M + bs*Mbs;
2726   if (extra_rows == bs) extra_rows = 0;
2727   else                  Mbs++;
2728   if (extra_rows && !rank) {
2729     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2730   }
2731 
2732   /* determine ownership of all rows */
2733   mbs        = Mbs/size + ((Mbs % size) > rank);
2734   m          = mbs*bs;
2735   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
2736   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2737 
2738   /* process 0 needs enough room for process with most rows */
2739   if (!rank) {
2740     mmax = rowners[1];
2741     for (i=2; i<size; i++) {
2742       mmax = PetscMax(mmax,rowners[i]);
2743     }
2744     mmax*=bs;
2745   } else mmax = m;
2746 
2747   rowners[0] = 0;
2748   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
2749   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2750   rstart = rowners[rank];
2751   rend   = rowners[rank+1];
2752 
2753   /* distribute row lengths to all processors */
2754   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
2755   if (!rank) {
2756     mend = m;
2757     if (size == 1) mend = mend - extra_rows;
2758     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
2759     for (j=mend; j<m; j++) locrowlens[j] = 1;
2760     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2761     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2762     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2763     for (j=0; j<m; j++) {
2764       procsnz[0] += locrowlens[j];
2765     }
2766     for (i=1; i<size; i++) {
2767       mend = browners[i+1] - browners[i];
2768       if (i == size-1) mend = mend - extra_rows;
2769       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
2770       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
2771       /* calculate the number of nonzeros on each processor */
2772       for (j=0; j<browners[i+1]-browners[i]; j++) {
2773         procsnz[i] += rowlengths[j];
2774       }
2775       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2776     }
2777     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2778   } else {
2779     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2780   }
2781 
2782   if (!rank) {
2783     /* determine max buffer needed and allocate it */
2784     maxnz = procsnz[0];
2785     for (i=1; i<size; i++) {
2786       maxnz = PetscMax(maxnz,procsnz[i]);
2787     }
2788     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2789 
2790     /* read in my part of the matrix column indices  */
2791     nz     = procsnz[0];
2792     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2793     mycols = ibuf;
2794     if (size == 1)  nz -= extra_rows;
2795     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2796     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2797 
2798     /* read in every ones (except the last) and ship off */
2799     for (i=1; i<size-1; i++) {
2800       nz   = procsnz[i];
2801       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2802       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2803     }
2804     /* read in the stuff for the last proc */
2805     if (size != 1) {
2806       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2807       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2808       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2809       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2810     }
2811     ierr = PetscFree(cols);CHKERRQ(ierr);
2812   } else {
2813     /* determine buffer space needed for message */
2814     nz = 0;
2815     for (i=0; i<m; i++) {
2816       nz += locrowlens[i];
2817     }
2818     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2819     mycols = ibuf;
2820     /* receive message of column indices*/
2821     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2822     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2823     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2824   }
2825 
2826   /* loop over local rows, determining number of off diagonal entries */
2827   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2828   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2829   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2830   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2831   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2832   rowcount = 0; nzcount = 0;
2833   for (i=0; i<mbs; i++) {
2834     dcount  = 0;
2835     odcount = 0;
2836     for (j=0; j<bs; j++) {
2837       kmax = locrowlens[rowcount];
2838       for (k=0; k<kmax; k++) {
2839         tmp = mycols[nzcount++]/bs;
2840         if (!mask[tmp]) {
2841           mask[tmp] = 1;
2842           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2843           else masked1[dcount++] = tmp;
2844         }
2845       }
2846       rowcount++;
2847     }
2848 
2849     dlens[i]  = dcount;
2850     odlens[i] = odcount;
2851 
2852     /* zero out the mask elements we set */
2853     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2854     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2855   }
2856 
2857   /* create our matrix */
2858   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2859   ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2860   ierr = MatSetType(A,type);CHKERRQ(ierr)
2861   ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2862 
2863   if (!rank) {
2864     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2865     /* read in my part of the matrix numerical values  */
2866     nz = procsnz[0];
2867     vals = buf;
2868     mycols = ibuf;
2869     if (size == 1)  nz -= extra_rows;
2870     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2871     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2872 
2873     /* insert into matrix */
2874     jj      = rstart*bs;
2875     for (i=0; i<m; i++) {
2876       ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2877       mycols += locrowlens[i];
2878       vals   += locrowlens[i];
2879       jj++;
2880     }
2881     /* read in other processors (except the last one) and ship out */
2882     for (i=1; i<size-1; i++) {
2883       nz   = procsnz[i];
2884       vals = buf;
2885       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2886       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2887     }
2888     /* the last proc */
2889     if (size != 1){
2890       nz   = procsnz[i] - extra_rows;
2891       vals = buf;
2892       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2893       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2894       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2895     }
2896     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2897   } else {
2898     /* receive numeric values */
2899     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2900 
2901     /* receive message of values*/
2902     vals   = buf;
2903     mycols = ibuf;
2904     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2905     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2906     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2907 
2908     /* insert into matrix */
2909     jj      = rstart*bs;
2910     for (i=0; i<m; i++) {
2911       ierr    = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2912       mycols += locrowlens[i];
2913       vals   += locrowlens[i];
2914       jj++;
2915     }
2916   }
2917   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2918   ierr = PetscFree(buf);CHKERRQ(ierr);
2919   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2920   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2921   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2922   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2923   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2924   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2925 
2926   *newmat = A;
2927   PetscFunctionReturn(0);
2928 }
2929 
2930 #undef __FUNCT__
2931 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
2932 /*@
2933    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2934 
2935    Input Parameters:
2936 .  mat  - the matrix
2937 .  fact - factor
2938 
2939    Collective on Mat
2940 
2941    Level: advanced
2942 
2943   Notes:
2944    This can also be set by the command line option: -mat_use_hash_table <fact>
2945 
2946 .keywords: matrix, hashtable, factor, HT
2947 
2948 .seealso: MatSetOption()
2949 @*/
2950 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2951 {
2952   PetscErrorCode ierr,(*f)(Mat,PetscReal);
2953 
2954   PetscFunctionBegin;
2955   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
2956   if (f) {
2957     ierr = (*f)(mat,fact);CHKERRQ(ierr);
2958   }
2959   PetscFunctionReturn(0);
2960 }
2961 
2962 EXTERN_C_BEGIN
2963 #undef __FUNCT__
2964 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
2965 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
2966 {
2967   Mat_MPIBAIJ *baij;
2968 
2969   PetscFunctionBegin;
2970   baij = (Mat_MPIBAIJ*)mat->data;
2971   baij->ht_fact = fact;
2972   PetscFunctionReturn(0);
2973 }
2974 EXTERN_C_END
2975 
2976 #undef __FUNCT__
2977 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
2978 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2979 {
2980   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2981   PetscFunctionBegin;
2982   *Ad     = a->A;
2983   *Ao     = a->B;
2984   *colmap = a->garray;
2985   PetscFunctionReturn(0);
2986 }
2987 
2988 /*
2989     Special version for direct calls from Fortran (to eliminate two function call overheads
2990 */
2991 #if defined(PETSC_HAVE_FORTRAN_CAPS)
2992 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
2993 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
2994 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
2995 #endif
2996 
2997 #undef __FUNCT__
2998 #define __FUNCT__ "matmpibiajsetvaluesblocked"
2999 /*@C
3000   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3001 
3002   Collective on Mat
3003 
3004   Input Parameters:
3005 + mat - the matrix
3006 . min - number of input rows
3007 . im - input rows
3008 . nin - number of input columns
3009 . in - input columns
3010 . v - numerical values input
3011 - addvin - INSERT_VALUES or ADD_VALUES
3012 
3013   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3014 
3015   Level: advanced
3016 
3017 .seealso:   MatSetValuesBlocked()
3018 @*/
3019 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3020 {
3021   /* convert input arguments to C version */
3022   Mat             mat = *matin;
3023   PetscInt        m = *min, n = *nin;
3024   InsertMode      addv = *addvin;
3025 
3026   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3027   const MatScalar *value;
3028   MatScalar       *barray=baij->barray;
3029   PetscTruth      roworiented = baij->roworiented;
3030   PetscErrorCode  ierr;
3031   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3032   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3033   PetscInt        cend=baij->cendbs,bs=mat->rmap.bs,bs2=baij->bs2;
3034 
3035   PetscFunctionBegin;
3036   /* tasks normally handled by MatSetValuesBlocked() */
3037   if (mat->insertmode == NOT_SET_VALUES) {
3038     mat->insertmode = addv;
3039   }
3040 #if defined(PETSC_USE_DEBUG)
3041   else if (mat->insertmode != addv) {
3042     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3043   }
3044   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3045 #endif
3046   if (mat->assembled) {
3047     mat->was_assembled = PETSC_TRUE;
3048     mat->assembled     = PETSC_FALSE;
3049   }
3050   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3051 
3052 
3053   if(!barray) {
3054     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
3055     baij->barray = barray;
3056   }
3057 
3058   if (roworiented) {
3059     stepval = (n-1)*bs;
3060   } else {
3061     stepval = (m-1)*bs;
3062   }
3063   for (i=0; i<m; i++) {
3064     if (im[i] < 0) continue;
3065 #if defined(PETSC_USE_DEBUG)
3066     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3067 #endif
3068     if (im[i] >= rstart && im[i] < rend) {
3069       row = im[i] - rstart;
3070       for (j=0; j<n; j++) {
3071         /* If NumCol = 1 then a copy is not required */
3072         if ((roworiented) && (n == 1)) {
3073           barray = (MatScalar*)v + i*bs2;
3074         } else if((!roworiented) && (m == 1)) {
3075           barray = (MatScalar*)v + j*bs2;
3076         } else { /* Here a copy is required */
3077           if (roworiented) {
3078             value = v + i*(stepval+bs)*bs + j*bs;
3079           } else {
3080             value = v + j*(stepval+bs)*bs + i*bs;
3081           }
3082           for (ii=0; ii<bs; ii++,value+=stepval) {
3083             for (jj=0; jj<bs; jj++) {
3084               *barray++  = *value++;
3085             }
3086           }
3087           barray -=bs2;
3088         }
3089 
3090         if (in[j] >= cstart && in[j] < cend){
3091           col  = in[j] - cstart;
3092           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3093         }
3094         else if (in[j] < 0) continue;
3095 #if defined(PETSC_USE_DEBUG)
3096         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
3097 #endif
3098         else {
3099           if (mat->was_assembled) {
3100             if (!baij->colmap) {
3101               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3102             }
3103 
3104 #if defined(PETSC_USE_DEBUG)
3105 #if defined (PETSC_USE_CTABLE)
3106             { PetscInt data;
3107               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3108               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
3109             }
3110 #else
3111             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
3112 #endif
3113 #endif
3114 #if defined (PETSC_USE_CTABLE)
3115 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3116             col  = (col - 1)/bs;
3117 #else
3118             col = (baij->colmap[in[j]] - 1)/bs;
3119 #endif
3120             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3121               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3122               col =  in[j];
3123             }
3124           }
3125           else col = in[j];
3126           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3127         }
3128       }
3129     } else {
3130       if (!baij->donotstash) {
3131         if (roworiented) {
3132           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3133         } else {
3134           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3135         }
3136       }
3137     }
3138   }
3139 
3140   /* task normally handled by MatSetValuesBlocked() */
3141   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3142   PetscFunctionReturn(0);
3143 }
3144