xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision e5bd52469b00a8f2942d86b46a52285e84b32048)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4 
5 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
7 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
8 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
9 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
10 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
11 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
12 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
14 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar);
15 
16 /*  UGLY, ugly, ugly
17    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
18    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
19    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
20    converts the entries into single precision and then calls ..._MatScalar() to put them
21    into the single precision data structures.
22 */
23 #if defined(PETSC_USE_MAT_SINGLE)
24 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
25 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
26 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
27 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
28 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
29 #else
30 #define MatSetValuesBlocked_SeqBAIJ_MatScalar      MatSetValuesBlocked_SeqBAIJ
31 #define MatSetValues_MPIBAIJ_MatScalar             MatSetValues_MPIBAIJ
32 #define MatSetValuesBlocked_MPIBAIJ_MatScalar      MatSetValuesBlocked_MPIBAIJ
33 #define MatSetValues_MPIBAIJ_HT_MatScalar          MatSetValues_MPIBAIJ_HT
34 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar   MatSetValuesBlocked_MPIBAIJ_HT
35 #endif
36 
37 #undef __FUNCT__
38 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
39 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
40 {
41   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
42   PetscErrorCode ierr;
43   PetscInt       i,*idxb = 0;
44   PetscScalar    *va,*vb;
45   Vec            vtmp;
46 
47   PetscFunctionBegin;
48 
49   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
50   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
51   if (idx) {
52     for (i=0; i<A->cmap.n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap.rstart;}
53   }
54 
55   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap.n,&vtmp);CHKERRQ(ierr);
56   if (idx) {ierr = PetscMalloc(A->rmap.n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
57   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
58   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
59 
60   for (i=0; i<A->rmap.n; i++){
61     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap.bs*a->garray[idxb[i]/A->cmap.bs] + (idxb[i] % A->cmap.bs);}
62   }
63 
64   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
65   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
66   if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);}
67   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
68   PetscFunctionReturn(0);
69 }
70 
71 EXTERN_C_BEGIN
72 #undef __FUNCT__
73 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
74 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat)
75 {
76   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
77   PetscErrorCode ierr;
78 
79   PetscFunctionBegin;
80   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
81   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
82   PetscFunctionReturn(0);
83 }
84 EXTERN_C_END
85 
86 EXTERN_C_BEGIN
87 #undef __FUNCT__
88 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
89 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat)
90 {
91   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
92   PetscErrorCode ierr;
93 
94   PetscFunctionBegin;
95   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
96   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
97   PetscFunctionReturn(0);
98 }
99 EXTERN_C_END
100 
101 /*
102      Local utility routine that creates a mapping from the global column
103    number to the local number in the off-diagonal part of the local
104    storage of the matrix.  This is done in a non scable way since the
105    length of colmap equals the global matrix length.
106 */
107 #undef __FUNCT__
108 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
109 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
110 {
111   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
112   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
113   PetscErrorCode ierr;
114   PetscInt       nbs = B->nbs,i,bs=mat->rmap.bs;
115 
116   PetscFunctionBegin;
117 #if defined (PETSC_USE_CTABLE)
118   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
119   for (i=0; i<nbs; i++){
120     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
121   }
122 #else
123   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
124   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
125   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
126   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
127 #endif
128   PetscFunctionReturn(0);
129 }
130 
131 #define CHUNKSIZE  10
132 
133 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
134 { \
135  \
136     brow = row/bs;  \
137     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
138     rmax = aimax[brow]; nrow = ailen[brow]; \
139       bcol = col/bs; \
140       ridx = row % bs; cidx = col % bs; \
141       low = 0; high = nrow; \
142       while (high-low > 3) { \
143         t = (low+high)/2; \
144         if (rp[t] > bcol) high = t; \
145         else              low  = t; \
146       } \
147       for (_i=low; _i<high; _i++) { \
148         if (rp[_i] > bcol) break; \
149         if (rp[_i] == bcol) { \
150           bap  = ap +  bs2*_i + bs*cidx + ridx; \
151           if (addv == ADD_VALUES) *bap += value;  \
152           else                    *bap  = value;  \
153           goto a_noinsert; \
154         } \
155       } \
156       if (a->nonew == 1) goto a_noinsert; \
157       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
158       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
159       N = nrow++ - 1;  \
160       /* shift up all the later entries in this row */ \
161       for (ii=N; ii>=_i; ii--) { \
162         rp[ii+1] = rp[ii]; \
163         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
164       } \
165       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
166       rp[_i]                      = bcol;  \
167       ap[bs2*_i + bs*cidx + ridx] = value;  \
168       a_noinsert:; \
169     ailen[brow] = nrow; \
170 }
171 
172 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
173 { \
174     brow = row/bs;  \
175     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
176     rmax = bimax[brow]; nrow = bilen[brow]; \
177       bcol = col/bs; \
178       ridx = row % bs; cidx = col % bs; \
179       low = 0; high = nrow; \
180       while (high-low > 3) { \
181         t = (low+high)/2; \
182         if (rp[t] > bcol) high = t; \
183         else              low  = t; \
184       } \
185       for (_i=low; _i<high; _i++) { \
186         if (rp[_i] > bcol) break; \
187         if (rp[_i] == bcol) { \
188           bap  = ap +  bs2*_i + bs*cidx + ridx; \
189           if (addv == ADD_VALUES) *bap += value;  \
190           else                    *bap  = value;  \
191           goto b_noinsert; \
192         } \
193       } \
194       if (b->nonew == 1) goto b_noinsert; \
195       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
196       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
197       CHKMEMQ;\
198       N = nrow++ - 1;  \
199       /* shift up all the later entries in this row */ \
200       for (ii=N; ii>=_i; ii--) { \
201         rp[ii+1] = rp[ii]; \
202         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
203       } \
204       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
205       rp[_i]                      = bcol;  \
206       ap[bs2*_i + bs*cidx + ridx] = value;  \
207       b_noinsert:; \
208     bilen[brow] = nrow; \
209 }
210 
211 #if defined(PETSC_USE_MAT_SINGLE)
212 #undef __FUNCT__
213 #define __FUNCT__ "MatSetValues_MPIBAIJ"
214 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
215 {
216   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
217   PetscErrorCode ierr;
218   PetscInt       i,N = m*n;
219   MatScalar      *vsingle;
220 
221   PetscFunctionBegin;
222   if (N > b->setvalueslen) {
223     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
224     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
225     b->setvalueslen  = N;
226   }
227   vsingle = b->setvaluescopy;
228 
229   for (i=0; i<N; i++) {
230     vsingle[i] = v[i];
231   }
232   ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
233   PetscFunctionReturn(0);
234 }
235 
236 #undef __FUNCT__
237 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
238 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
239 {
240   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
241   PetscErrorCode ierr;
242   PetscInt       i,N = m*n*b->bs2;
243   MatScalar      *vsingle;
244 
245   PetscFunctionBegin;
246   if (N > b->setvalueslen) {
247     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
248     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
249     b->setvalueslen  = N;
250   }
251   vsingle = b->setvaluescopy;
252   for (i=0; i<N; i++) {
253     vsingle[i] = v[i];
254   }
255   ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
256   PetscFunctionReturn(0);
257 }
258 
259 #undef __FUNCT__
260 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
261 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
262 {
263   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
264   PetscErrorCode ierr;
265   PetscInt       i,N = m*n;
266   MatScalar      *vsingle;
267 
268   PetscFunctionBegin;
269   if (N > b->setvalueslen) {
270     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
271     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
272     b->setvalueslen  = N;
273   }
274   vsingle = b->setvaluescopy;
275   for (i=0; i<N; i++) {
276     vsingle[i] = v[i];
277   }
278   ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
279   PetscFunctionReturn(0);
280 }
281 
282 #undef __FUNCT__
283 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
284 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
285 {
286   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
287   PetscErrorCode ierr;
288   PetscInt       i,N = m*n*b->bs2;
289   MatScalar      *vsingle;
290 
291   PetscFunctionBegin;
292   if (N > b->setvalueslen) {
293     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
294     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
295     b->setvalueslen  = N;
296   }
297   vsingle = b->setvaluescopy;
298   for (i=0; i<N; i++) {
299     vsingle[i] = v[i];
300   }
301   ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
302   PetscFunctionReturn(0);
303 }
304 #endif
305 
306 #undef __FUNCT__
307 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
308 PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
309 {
310   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
311   MatScalar      value;
312   PetscTruth     roworiented = baij->roworiented;
313   PetscErrorCode ierr;
314   PetscInt       i,j,row,col;
315   PetscInt       rstart_orig=mat->rmap.rstart;
316   PetscInt       rend_orig=mat->rmap.rend,cstart_orig=mat->cmap.rstart;
317   PetscInt       cend_orig=mat->cmap.rend,bs=mat->rmap.bs;
318 
319   /* Some Variables required in the macro */
320   Mat            A = baij->A;
321   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
322   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
323   MatScalar      *aa=a->a;
324 
325   Mat            B = baij->B;
326   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
327   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
328   MatScalar      *ba=b->a;
329 
330   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
331   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
332   MatScalar      *ap,*bap;
333 
334   PetscFunctionBegin;
335   for (i=0; i<m; i++) {
336     if (im[i] < 0) continue;
337 #if defined(PETSC_USE_DEBUG)
338     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
339 #endif
340     if (im[i] >= rstart_orig && im[i] < rend_orig) {
341       row = im[i] - rstart_orig;
342       for (j=0; j<n; j++) {
343         if (in[j] >= cstart_orig && in[j] < cend_orig){
344           col = in[j] - cstart_orig;
345           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
346           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
347           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
348         } else if (in[j] < 0) continue;
349 #if defined(PETSC_USE_DEBUG)
350         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap.N-1);}
351 #endif
352         else {
353           if (mat->was_assembled) {
354             if (!baij->colmap) {
355               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
356             }
357 #if defined (PETSC_USE_CTABLE)
358             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
359             col  = col - 1;
360 #else
361             col = baij->colmap[in[j]/bs] - 1;
362 #endif
363             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
364               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
365               col =  in[j];
366               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
367               B = baij->B;
368               b = (Mat_SeqBAIJ*)(B)->data;
369               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
370               ba=b->a;
371             } else col += in[j]%bs;
372           } else col = in[j];
373           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
374           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
375           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
376         }
377       }
378     } else {
379       if (!baij->donotstash) {
380         if (roworiented) {
381           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
382         } else {
383           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
384         }
385       }
386     }
387   }
388   PetscFunctionReturn(0);
389 }
390 
391 #undef __FUNCT__
392 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar"
393 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
394 {
395   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
396   const MatScalar *value;
397   MatScalar       *barray=baij->barray;
398   PetscTruth      roworiented = baij->roworiented;
399   PetscErrorCode  ierr;
400   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
401   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
402   PetscInt        cend=baij->cendbs,bs=mat->rmap.bs,bs2=baij->bs2;
403 
404   PetscFunctionBegin;
405   if(!barray) {
406     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
407     baij->barray = barray;
408   }
409 
410   if (roworiented) {
411     stepval = (n-1)*bs;
412   } else {
413     stepval = (m-1)*bs;
414   }
415   for (i=0; i<m; i++) {
416     if (im[i] < 0) continue;
417 #if defined(PETSC_USE_DEBUG)
418     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
419 #endif
420     if (im[i] >= rstart && im[i] < rend) {
421       row = im[i] - rstart;
422       for (j=0; j<n; j++) {
423         /* If NumCol = 1 then a copy is not required */
424         if ((roworiented) && (n == 1)) {
425           barray = (MatScalar*)v + i*bs2;
426         } else if((!roworiented) && (m == 1)) {
427           barray = (MatScalar*)v + j*bs2;
428         } else { /* Here a copy is required */
429           if (roworiented) {
430             value = v + i*(stepval+bs)*bs + j*bs;
431           } else {
432             value = v + j*(stepval+bs)*bs + i*bs;
433           }
434           for (ii=0; ii<bs; ii++,value+=stepval) {
435             for (jj=0; jj<bs; jj++) {
436               *barray++  = *value++;
437             }
438           }
439           barray -=bs2;
440         }
441 
442         if (in[j] >= cstart && in[j] < cend){
443           col  = in[j] - cstart;
444           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
445         }
446         else if (in[j] < 0) continue;
447 #if defined(PETSC_USE_DEBUG)
448         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
449 #endif
450         else {
451           if (mat->was_assembled) {
452             if (!baij->colmap) {
453               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
454             }
455 
456 #if defined(PETSC_USE_DEBUG)
457 #if defined (PETSC_USE_CTABLE)
458             { PetscInt data;
459               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
460               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
461             }
462 #else
463             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
464 #endif
465 #endif
466 #if defined (PETSC_USE_CTABLE)
467 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
468             col  = (col - 1)/bs;
469 #else
470             col = (baij->colmap[in[j]] - 1)/bs;
471 #endif
472             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
473               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
474               col =  in[j];
475             }
476           }
477           else col = in[j];
478           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
479         }
480       }
481     } else {
482       if (!baij->donotstash) {
483         if (roworiented) {
484           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
485         } else {
486           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
487         }
488       }
489     }
490   }
491   PetscFunctionReturn(0);
492 }
493 
494 #define HASH_KEY 0.6180339887
495 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
496 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
497 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
498 #undef __FUNCT__
499 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar"
500 PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
501 {
502   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
503   PetscTruth     roworiented = baij->roworiented;
504   PetscErrorCode ierr;
505   PetscInt       i,j,row,col;
506   PetscInt       rstart_orig=mat->rmap.rstart;
507   PetscInt       rend_orig=mat->rmap.rend,Nbs=baij->Nbs;
508   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap.bs,*HT=baij->ht,idx;
509   PetscReal      tmp;
510   MatScalar      **HD = baij->hd,value;
511 #if defined(PETSC_USE_DEBUG)
512   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
513 #endif
514 
515   PetscFunctionBegin;
516 
517   for (i=0; i<m; i++) {
518 #if defined(PETSC_USE_DEBUG)
519     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
520     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
521 #endif
522       row = im[i];
523     if (row >= rstart_orig && row < rend_orig) {
524       for (j=0; j<n; j++) {
525         col = in[j];
526         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
527         /* Look up PetscInto the Hash Table */
528         key = (row/bs)*Nbs+(col/bs)+1;
529         h1  = HASH(size,key,tmp);
530 
531 
532         idx = h1;
533 #if defined(PETSC_USE_DEBUG)
534         insert_ct++;
535         total_ct++;
536         if (HT[idx] != key) {
537           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
538           if (idx == size) {
539             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
540             if (idx == h1) {
541               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
542             }
543           }
544         }
545 #else
546         if (HT[idx] != key) {
547           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
548           if (idx == size) {
549             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
550             if (idx == h1) {
551               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
552             }
553           }
554         }
555 #endif
556         /* A HASH table entry is found, so insert the values at the correct address */
557         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
558         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
559       }
560     } else {
561       if (!baij->donotstash) {
562         if (roworiented) {
563           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
564         } else {
565           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
566         }
567       }
568     }
569   }
570 #if defined(PETSC_USE_DEBUG)
571   baij->ht_total_ct = total_ct;
572   baij->ht_insert_ct = insert_ct;
573 #endif
574   PetscFunctionReturn(0);
575 }
576 
577 #undef __FUNCT__
578 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar"
579 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
580 {
581   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
582   PetscTruth      roworiented = baij->roworiented;
583   PetscErrorCode  ierr;
584   PetscInt        i,j,ii,jj,row,col;
585   PetscInt        rstart=baij->rstartbs;
586   PetscInt        rend=mat->rmap.rend,stepval,bs=mat->rmap.bs,bs2=baij->bs2;
587   PetscInt        h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
588   PetscReal       tmp;
589   MatScalar       **HD = baij->hd,*baij_a;
590   const MatScalar *v_t,*value;
591 #if defined(PETSC_USE_DEBUG)
592   PetscInt        total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
593 #endif
594 
595   PetscFunctionBegin;
596 
597   if (roworiented) {
598     stepval = (n-1)*bs;
599   } else {
600     stepval = (m-1)*bs;
601   }
602   for (i=0; i<m; i++) {
603 #if defined(PETSC_USE_DEBUG)
604     if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
605     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
606 #endif
607     row   = im[i];
608     v_t   = v + i*bs2;
609     if (row >= rstart && row < rend) {
610       for (j=0; j<n; j++) {
611         col = in[j];
612 
613         /* Look up into the Hash Table */
614         key = row*Nbs+col+1;
615         h1  = HASH(size,key,tmp);
616 
617         idx = h1;
618 #if defined(PETSC_USE_DEBUG)
619         total_ct++;
620         insert_ct++;
621        if (HT[idx] != key) {
622           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
623           if (idx == size) {
624             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
625             if (idx == h1) {
626               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
627             }
628           }
629         }
630 #else
631         if (HT[idx] != key) {
632           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
633           if (idx == size) {
634             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
635             if (idx == h1) {
636               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
637             }
638           }
639         }
640 #endif
641         baij_a = HD[idx];
642         if (roworiented) {
643           /*value = v + i*(stepval+bs)*bs + j*bs;*/
644           /* value = v + (i*(stepval+bs)+j)*bs; */
645           value = v_t;
646           v_t  += bs;
647           if (addv == ADD_VALUES) {
648             for (ii=0; ii<bs; ii++,value+=stepval) {
649               for (jj=ii; jj<bs2; jj+=bs) {
650                 baij_a[jj]  += *value++;
651               }
652             }
653           } else {
654             for (ii=0; ii<bs; ii++,value+=stepval) {
655               for (jj=ii; jj<bs2; jj+=bs) {
656                 baij_a[jj]  = *value++;
657               }
658             }
659           }
660         } else {
661           value = v + j*(stepval+bs)*bs + i*bs;
662           if (addv == ADD_VALUES) {
663             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
664               for (jj=0; jj<bs; jj++) {
665                 baij_a[jj]  += *value++;
666               }
667             }
668           } else {
669             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
670               for (jj=0; jj<bs; jj++) {
671                 baij_a[jj]  = *value++;
672               }
673             }
674           }
675         }
676       }
677     } else {
678       if (!baij->donotstash) {
679         if (roworiented) {
680           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
681         } else {
682           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
683         }
684       }
685     }
686   }
687 #if defined(PETSC_USE_DEBUG)
688   baij->ht_total_ct = total_ct;
689   baij->ht_insert_ct = insert_ct;
690 #endif
691   PetscFunctionReturn(0);
692 }
693 
694 #undef __FUNCT__
695 #define __FUNCT__ "MatGetValues_MPIBAIJ"
696 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
697 {
698   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
699   PetscErrorCode ierr;
700   PetscInt       bs=mat->rmap.bs,i,j,bsrstart = mat->rmap.rstart,bsrend = mat->rmap.rend;
701   PetscInt       bscstart = mat->cmap.rstart,bscend = mat->cmap.rend,row,col,data;
702 
703   PetscFunctionBegin;
704   for (i=0; i<m; i++) {
705     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
706     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
707     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
708       row = idxm[i] - bsrstart;
709       for (j=0; j<n; j++) {
710         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
711         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
712         if (idxn[j] >= bscstart && idxn[j] < bscend){
713           col = idxn[j] - bscstart;
714           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
715         } else {
716           if (!baij->colmap) {
717             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
718           }
719 #if defined (PETSC_USE_CTABLE)
720           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
721           data --;
722 #else
723           data = baij->colmap[idxn[j]/bs]-1;
724 #endif
725           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
726           else {
727             col  = data + idxn[j]%bs;
728             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
729           }
730         }
731       }
732     } else {
733       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
734     }
735   }
736  PetscFunctionReturn(0);
737 }
738 
739 #undef __FUNCT__
740 #define __FUNCT__ "MatNorm_MPIBAIJ"
741 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
742 {
743   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
744   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
745   PetscErrorCode ierr;
746   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap.bs,nz,row,col;
747   PetscReal      sum = 0.0;
748   MatScalar      *v;
749 
750   PetscFunctionBegin;
751   if (baij->size == 1) {
752     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
753   } else {
754     if (type == NORM_FROBENIUS) {
755       v = amat->a;
756       nz = amat->nz*bs2;
757       for (i=0; i<nz; i++) {
758 #if defined(PETSC_USE_COMPLEX)
759         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
760 #else
761         sum += (*v)*(*v); v++;
762 #endif
763       }
764       v = bmat->a;
765       nz = bmat->nz*bs2;
766       for (i=0; i<nz; i++) {
767 #if defined(PETSC_USE_COMPLEX)
768         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
769 #else
770         sum += (*v)*(*v); v++;
771 #endif
772       }
773       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
774       *nrm = sqrt(*nrm);
775     } else if (type == NORM_1) { /* max column sum */
776       PetscReal *tmp,*tmp2;
777       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
778       ierr = PetscMalloc((2*mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
779       tmp2 = tmp + mat->cmap.N;
780       ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
781       v = amat->a; jj = amat->j;
782       for (i=0; i<amat->nz; i++) {
783         for (j=0; j<bs; j++){
784           col = bs*(cstart + *jj) + j; /* column index */
785           for (row=0; row<bs; row++){
786             tmp[col] += PetscAbsScalar(*v);  v++;
787           }
788         }
789         jj++;
790       }
791       v = bmat->a; jj = bmat->j;
792       for (i=0; i<bmat->nz; i++) {
793         for (j=0; j<bs; j++){
794           col = bs*garray[*jj] + j;
795           for (row=0; row<bs; row++){
796             tmp[col] += PetscAbsScalar(*v); v++;
797           }
798         }
799         jj++;
800       }
801       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
802       *nrm = 0.0;
803       for (j=0; j<mat->cmap.N; j++) {
804         if (tmp2[j] > *nrm) *nrm = tmp2[j];
805       }
806       ierr = PetscFree(tmp);CHKERRQ(ierr);
807     } else if (type == NORM_INFINITY) { /* max row sum */
808       PetscReal *sums;
809       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr)
810       sum = 0.0;
811       for (j=0; j<amat->mbs; j++) {
812         for (row=0; row<bs; row++) sums[row] = 0.0;
813         v = amat->a + bs2*amat->i[j];
814         nz = amat->i[j+1]-amat->i[j];
815         for (i=0; i<nz; i++) {
816           for (col=0; col<bs; col++){
817             for (row=0; row<bs; row++){
818               sums[row] += PetscAbsScalar(*v); v++;
819             }
820           }
821         }
822         v = bmat->a + bs2*bmat->i[j];
823         nz = bmat->i[j+1]-bmat->i[j];
824         for (i=0; i<nz; i++) {
825           for (col=0; col<bs; col++){
826             for (row=0; row<bs; row++){
827               sums[row] += PetscAbsScalar(*v); v++;
828             }
829           }
830         }
831         for (row=0; row<bs; row++){
832           if (sums[row] > sum) sum = sums[row];
833         }
834       }
835       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
836       ierr = PetscFree(sums);CHKERRQ(ierr);
837     } else {
838       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
839     }
840   }
841   PetscFunctionReturn(0);
842 }
843 
844 /*
845   Creates the hash table, and sets the table
846   This table is created only once.
847   If new entried need to be added to the matrix
848   then the hash table has to be destroyed and
849   recreated.
850 */
851 #undef __FUNCT__
852 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
853 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
854 {
855   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
856   Mat            A = baij->A,B=baij->B;
857   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
858   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
859   PetscErrorCode ierr;
860   PetscInt       size,bs2=baij->bs2,rstart=baij->rstartbs;
861   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
862   PetscInt       *HT,key;
863   MatScalar      **HD;
864   PetscReal      tmp;
865 #if defined(PETSC_USE_INFO)
866   PetscInt       ct=0,max=0;
867 #endif
868 
869   PetscFunctionBegin;
870   baij->ht_size=(PetscInt)(factor*nz);
871   size = baij->ht_size;
872 
873   if (baij->ht) {
874     PetscFunctionReturn(0);
875   }
876 
877   /* Allocate Memory for Hash Table */
878   ierr     = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr);
879   baij->ht = (PetscInt*)(baij->hd + size);
880   HD       = baij->hd;
881   HT       = baij->ht;
882 
883 
884   ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr);
885 
886 
887   /* Loop Over A */
888   for (i=0; i<a->mbs; i++) {
889     for (j=ai[i]; j<ai[i+1]; j++) {
890       row = i+rstart;
891       col = aj[j]+cstart;
892 
893       key = row*Nbs + col + 1;
894       h1  = HASH(size,key,tmp);
895       for (k=0; k<size; k++){
896         if (!HT[(h1+k)%size]) {
897           HT[(h1+k)%size] = key;
898           HD[(h1+k)%size] = a->a + j*bs2;
899           break;
900 #if defined(PETSC_USE_INFO)
901         } else {
902           ct++;
903 #endif
904         }
905       }
906 #if defined(PETSC_USE_INFO)
907       if (k> max) max = k;
908 #endif
909     }
910   }
911   /* Loop Over B */
912   for (i=0; i<b->mbs; i++) {
913     for (j=bi[i]; j<bi[i+1]; j++) {
914       row = i+rstart;
915       col = garray[bj[j]];
916       key = row*Nbs + col + 1;
917       h1  = HASH(size,key,tmp);
918       for (k=0; k<size; k++){
919         if (!HT[(h1+k)%size]) {
920           HT[(h1+k)%size] = key;
921           HD[(h1+k)%size] = b->a + j*bs2;
922           break;
923 #if defined(PETSC_USE_INFO)
924         } else {
925           ct++;
926 #endif
927         }
928       }
929 #if defined(PETSC_USE_INFO)
930       if (k> max) max = k;
931 #endif
932     }
933   }
934 
935   /* Print Summary */
936 #if defined(PETSC_USE_INFO)
937   for (i=0,j=0; i<size; i++) {
938     if (HT[i]) {j++;}
939   }
940   ierr = PetscInfo2(0,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
941 #endif
942   PetscFunctionReturn(0);
943 }
944 
945 #undef __FUNCT__
946 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
947 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
948 {
949   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
950   PetscErrorCode ierr;
951   PetscInt       nstash,reallocs;
952   InsertMode     addv;
953 
954   PetscFunctionBegin;
955   if (baij->donotstash) {
956     PetscFunctionReturn(0);
957   }
958 
959   /* make sure all processors are either in INSERTMODE or ADDMODE */
960   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
961   if (addv == (ADD_VALUES|INSERT_VALUES)) {
962     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
963   }
964   mat->insertmode = addv; /* in case this processor had no cache */
965 
966   ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr);
967   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rangebs);CHKERRQ(ierr);
968   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
969   ierr = PetscInfo2(0,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
970   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
971   ierr = PetscInfo2(0,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
972   PetscFunctionReturn(0);
973 }
974 
975 #undef __FUNCT__
976 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
977 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
978 {
979   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
980   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
981   PetscErrorCode ierr;
982   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
983   PetscInt       *row,*col,other_disassembled;
984   PetscTruth     r1,r2,r3;
985   MatScalar      *val;
986   InsertMode     addv = mat->insertmode;
987   PetscMPIInt    n;
988 
989   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
990   PetscFunctionBegin;
991   if (!baij->donotstash) {
992     while (1) {
993       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
994       if (!flg) break;
995 
996       for (i=0; i<n;) {
997         /* Now identify the consecutive vals belonging to the same row */
998         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
999         if (j < n) ncols = j-i;
1000         else       ncols = n-i;
1001         /* Now assemble all these values with a single function call */
1002         ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
1003         i = j;
1004       }
1005     }
1006     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
1007     /* Now process the block-stash. Since the values are stashed column-oriented,
1008        set the roworiented flag to column oriented, and after MatSetValues()
1009        restore the original flags */
1010     r1 = baij->roworiented;
1011     r2 = a->roworiented;
1012     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
1013     baij->roworiented = PETSC_FALSE;
1014     a->roworiented    = PETSC_FALSE;
1015     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
1016     while (1) {
1017       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1018       if (!flg) break;
1019 
1020       for (i=0; i<n;) {
1021         /* Now identify the consecutive vals belonging to the same row */
1022         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1023         if (j < n) ncols = j-i;
1024         else       ncols = n-i;
1025         ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
1026         i = j;
1027       }
1028     }
1029     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
1030     baij->roworiented = r1;
1031     a->roworiented    = r2;
1032     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
1033   }
1034 
1035   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
1036   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
1037 
1038   /* determine if any processor has disassembled, if so we must
1039      also disassemble ourselfs, in order that we may reassemble. */
1040   /*
1041      if nonzero structure of submatrix B cannot change then we know that
1042      no processor disassembled thus we can skip this stuff
1043   */
1044   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
1045     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
1046     if (mat->was_assembled && !other_disassembled) {
1047       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
1048     }
1049   }
1050 
1051   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1052     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
1053   }
1054   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
1055   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1056   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1057 
1058 #if defined(PETSC_USE_INFO)
1059   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1060     ierr = PetscInfo1(0,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
1061     baij->ht_total_ct  = 0;
1062     baij->ht_insert_ct = 0;
1063   }
1064 #endif
1065   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1066     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1067     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1068     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1069   }
1070 
1071   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1072   baij->rowvalues = 0;
1073   PetscFunctionReturn(0);
1074 }
1075 
1076 #undef __FUNCT__
1077 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
1078 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1079 {
1080   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
1081   PetscErrorCode    ierr;
1082   PetscMPIInt       size = baij->size,rank = baij->rank;
1083   PetscInt          bs = mat->rmap.bs;
1084   PetscTruth        iascii,isdraw;
1085   PetscViewer       sviewer;
1086   PetscViewerFormat format;
1087 
1088   PetscFunctionBegin;
1089   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1090   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1091   if (iascii) {
1092     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1093     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1094       MatInfo info;
1095       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
1096       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1097       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
1098               rank,mat->rmap.N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
1099               mat->rmap.bs,(PetscInt)info.memory);CHKERRQ(ierr);
1100       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1101       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
1102       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1103       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
1104       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1105       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1106       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1107       PetscFunctionReturn(0);
1108     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1109       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1110       PetscFunctionReturn(0);
1111     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1112       PetscFunctionReturn(0);
1113     }
1114   }
1115 
1116   if (isdraw) {
1117     PetscDraw       draw;
1118     PetscTruth isnull;
1119     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1120     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1121   }
1122 
1123   if (size == 1) {
1124     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
1125     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1126   } else {
1127     /* assemble the entire matrix onto first processor. */
1128     Mat         A;
1129     Mat_SeqBAIJ *Aloc;
1130     PetscInt    M = mat->rmap.N,N = mat->cmap.N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1131     MatScalar   *a;
1132 
1133     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1134     /* Perhaps this should be the type of mat? */
1135     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
1136     if (!rank) {
1137       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1138     } else {
1139       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1140     }
1141     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1142     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1143     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1144 
1145     /* copy over the A part */
1146     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1147     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1148     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1149 
1150     for (i=0; i<mbs; i++) {
1151       rvals[0] = bs*(baij->rstartbs + i);
1152       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1153       for (j=ai[i]; j<ai[i+1]; j++) {
1154         col = (baij->cstartbs+aj[j])*bs;
1155         for (k=0; k<bs; k++) {
1156           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1157           col++; a += bs;
1158         }
1159       }
1160     }
1161     /* copy over the B part */
1162     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1163     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1164     for (i=0; i<mbs; i++) {
1165       rvals[0] = bs*(baij->rstartbs + i);
1166       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1167       for (j=ai[i]; j<ai[i+1]; j++) {
1168         col = baij->garray[aj[j]]*bs;
1169         for (k=0; k<bs; k++) {
1170           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1171           col++; a += bs;
1172         }
1173       }
1174     }
1175     ierr = PetscFree(rvals);CHKERRQ(ierr);
1176     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1177     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1178     /*
1179        Everyone has to call to draw the matrix since the graphics waits are
1180        synchronized across all processors that share the PetscDraw object
1181     */
1182     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1183     if (!rank) {
1184       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1185       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1186     }
1187     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1188     ierr = MatDestroy(A);CHKERRQ(ierr);
1189   }
1190   PetscFunctionReturn(0);
1191 }
1192 
1193 #undef __FUNCT__
1194 #define __FUNCT__ "MatView_MPIBAIJ"
1195 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1196 {
1197   PetscErrorCode ierr;
1198   PetscTruth     iascii,isdraw,issocket,isbinary;
1199 
1200   PetscFunctionBegin;
1201   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1202   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1203   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1204   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1205   if (iascii || isdraw || issocket || isbinary) {
1206     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1207   } else {
1208     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1209   }
1210   PetscFunctionReturn(0);
1211 }
1212 
1213 #undef __FUNCT__
1214 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1215 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1216 {
1217   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1218   PetscErrorCode ierr;
1219 
1220   PetscFunctionBegin;
1221 #if defined(PETSC_USE_LOG)
1222   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap.N,mat->cmap.N);
1223 #endif
1224   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1225   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1226   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1227   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1228 #if defined (PETSC_USE_CTABLE)
1229   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1230 #else
1231   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1232 #endif
1233   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1234   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1235   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1236   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1237   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1238   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
1239 #if defined(PETSC_USE_MAT_SINGLE)
1240   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
1241 #endif
1242   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1243   ierr = PetscFree(baij);CHKERRQ(ierr);
1244 
1245   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1246   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1247   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1248   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1249   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1250   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1251   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1252   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1253   PetscFunctionReturn(0);
1254 }
1255 
1256 #undef __FUNCT__
1257 #define __FUNCT__ "MatMult_MPIBAIJ"
1258 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1259 {
1260   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1261   PetscErrorCode ierr;
1262   PetscInt       nt;
1263 
1264   PetscFunctionBegin;
1265   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1266   if (nt != A->cmap.n) {
1267     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1268   }
1269   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1270   if (nt != A->rmap.n) {
1271     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1272   }
1273   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1274   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1275   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1276   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1277   PetscFunctionReturn(0);
1278 }
1279 
1280 #undef __FUNCT__
1281 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1282 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1283 {
1284   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1285   PetscErrorCode ierr;
1286 
1287   PetscFunctionBegin;
1288   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1289   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1290   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1291   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1292   PetscFunctionReturn(0);
1293 }
1294 
1295 #undef __FUNCT__
1296 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1297 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1298 {
1299   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1300   PetscErrorCode ierr;
1301   PetscTruth     merged;
1302 
1303   PetscFunctionBegin;
1304   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1305   /* do nondiagonal part */
1306   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1307   if (!merged) {
1308     /* send it on its way */
1309     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1310     /* do local part */
1311     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1312     /* receive remote parts: note this assumes the values are not actually */
1313     /* inserted in yy until the next line */
1314     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1315   } else {
1316     /* do local part */
1317     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1318     /* send it on its way */
1319     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1320     /* values actually were received in the Begin() but we need to call this nop */
1321     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1322   }
1323   PetscFunctionReturn(0);
1324 }
1325 
1326 #undef __FUNCT__
1327 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1328 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1329 {
1330   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1331   PetscErrorCode ierr;
1332 
1333   PetscFunctionBegin;
1334   /* do nondiagonal part */
1335   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1336   /* send it on its way */
1337   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1338   /* do local part */
1339   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1340   /* receive remote parts: note this assumes the values are not actually */
1341   /* inserted in yy until the next line, which is true for my implementation*/
1342   /* but is not perhaps always true. */
1343   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1344   PetscFunctionReturn(0);
1345 }
1346 
1347 /*
1348   This only works correctly for square matrices where the subblock A->A is the
1349    diagonal block
1350 */
1351 #undef __FUNCT__
1352 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1353 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1354 {
1355   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1356   PetscErrorCode ierr;
1357 
1358   PetscFunctionBegin;
1359   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1360   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1361   PetscFunctionReturn(0);
1362 }
1363 
1364 #undef __FUNCT__
1365 #define __FUNCT__ "MatScale_MPIBAIJ"
1366 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1367 {
1368   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1369   PetscErrorCode ierr;
1370 
1371   PetscFunctionBegin;
1372   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1373   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1374   PetscFunctionReturn(0);
1375 }
1376 
1377 #undef __FUNCT__
1378 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1379 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1380 {
1381   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1382   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1383   PetscErrorCode ierr;
1384   PetscInt       bs = matin->rmap.bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1385   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap.rstart,brend = matin->rmap.rend;
1386   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1387 
1388   PetscFunctionBegin;
1389   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1390   mat->getrowactive = PETSC_TRUE;
1391 
1392   if (!mat->rowvalues && (idx || v)) {
1393     /*
1394         allocate enough space to hold information from the longest row.
1395     */
1396     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1397     PetscInt     max = 1,mbs = mat->mbs,tmp;
1398     for (i=0; i<mbs; i++) {
1399       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1400       if (max < tmp) { max = tmp; }
1401     }
1402     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1403     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1404   }
1405 
1406   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1407   lrow = row - brstart;
1408 
1409   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1410   if (!v)   {pvA = 0; pvB = 0;}
1411   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1412   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1413   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1414   nztot = nzA + nzB;
1415 
1416   cmap  = mat->garray;
1417   if (v  || idx) {
1418     if (nztot) {
1419       /* Sort by increasing column numbers, assuming A and B already sorted */
1420       PetscInt imark = -1;
1421       if (v) {
1422         *v = v_p = mat->rowvalues;
1423         for (i=0; i<nzB; i++) {
1424           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1425           else break;
1426         }
1427         imark = i;
1428         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1429         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1430       }
1431       if (idx) {
1432         *idx = idx_p = mat->rowindices;
1433         if (imark > -1) {
1434           for (i=0; i<imark; i++) {
1435             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1436           }
1437         } else {
1438           for (i=0; i<nzB; i++) {
1439             if (cmap[cworkB[i]/bs] < cstart)
1440               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1441             else break;
1442           }
1443           imark = i;
1444         }
1445         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1446         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1447       }
1448     } else {
1449       if (idx) *idx = 0;
1450       if (v)   *v   = 0;
1451     }
1452   }
1453   *nz = nztot;
1454   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1455   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1456   PetscFunctionReturn(0);
1457 }
1458 
1459 #undef __FUNCT__
1460 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1461 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1462 {
1463   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1464 
1465   PetscFunctionBegin;
1466   if (!baij->getrowactive) {
1467     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1468   }
1469   baij->getrowactive = PETSC_FALSE;
1470   PetscFunctionReturn(0);
1471 }
1472 
1473 #undef __FUNCT__
1474 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1475 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1476 {
1477   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1478   PetscErrorCode ierr;
1479 
1480   PetscFunctionBegin;
1481   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1482   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1483   PetscFunctionReturn(0);
1484 }
1485 
1486 #undef __FUNCT__
1487 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1488 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1489 {
1490   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1491   Mat            A = a->A,B = a->B;
1492   PetscErrorCode ierr;
1493   PetscReal      isend[5],irecv[5];
1494 
1495   PetscFunctionBegin;
1496   info->block_size     = (PetscReal)matin->rmap.bs;
1497   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1498   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1499   isend[3] = info->memory;  isend[4] = info->mallocs;
1500   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1501   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1502   isend[3] += info->memory;  isend[4] += info->mallocs;
1503   if (flag == MAT_LOCAL) {
1504     info->nz_used      = isend[0];
1505     info->nz_allocated = isend[1];
1506     info->nz_unneeded  = isend[2];
1507     info->memory       = isend[3];
1508     info->mallocs      = isend[4];
1509   } else if (flag == MAT_GLOBAL_MAX) {
1510     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1511     info->nz_used      = irecv[0];
1512     info->nz_allocated = irecv[1];
1513     info->nz_unneeded  = irecv[2];
1514     info->memory       = irecv[3];
1515     info->mallocs      = irecv[4];
1516   } else if (flag == MAT_GLOBAL_SUM) {
1517     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1518     info->nz_used      = irecv[0];
1519     info->nz_allocated = irecv[1];
1520     info->nz_unneeded  = irecv[2];
1521     info->memory       = irecv[3];
1522     info->mallocs      = irecv[4];
1523   } else {
1524     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1525   }
1526   info->rows_global       = (PetscReal)A->rmap.N;
1527   info->columns_global    = (PetscReal)A->cmap.N;
1528   info->rows_local        = (PetscReal)A->rmap.N;
1529   info->columns_local     = (PetscReal)A->cmap.N;
1530   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1531   info->fill_ratio_needed = 0;
1532   info->factor_mallocs    = 0;
1533   PetscFunctionReturn(0);
1534 }
1535 
1536 #undef __FUNCT__
1537 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1538 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op)
1539 {
1540   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1541   PetscErrorCode ierr;
1542 
1543   PetscFunctionBegin;
1544   switch (op) {
1545   case MAT_NO_NEW_NONZERO_LOCATIONS:
1546   case MAT_YES_NEW_NONZERO_LOCATIONS:
1547   case MAT_COLUMNS_UNSORTED:
1548   case MAT_COLUMNS_SORTED:
1549   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1550   case MAT_KEEP_ZEROED_ROWS:
1551   case MAT_NEW_NONZERO_LOCATION_ERR:
1552     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1553     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1554     break;
1555   case MAT_ROW_ORIENTED:
1556     a->roworiented = PETSC_TRUE;
1557     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1558     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1559     break;
1560   case MAT_ROWS_SORTED:
1561   case MAT_ROWS_UNSORTED:
1562   case MAT_YES_NEW_DIAGONALS:
1563     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1564     break;
1565   case MAT_COLUMN_ORIENTED:
1566     a->roworiented = PETSC_FALSE;
1567     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1568     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1569     break;
1570   case MAT_IGNORE_OFF_PROC_ENTRIES:
1571     a->donotstash = PETSC_TRUE;
1572     break;
1573   case MAT_NO_NEW_DIAGONALS:
1574     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1575   case MAT_USE_HASH_TABLE:
1576     a->ht_flag = PETSC_TRUE;
1577     break;
1578   case MAT_SYMMETRIC:
1579   case MAT_STRUCTURALLY_SYMMETRIC:
1580   case MAT_HERMITIAN:
1581   case MAT_SYMMETRY_ETERNAL:
1582     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1583     break;
1584   case MAT_NOT_SYMMETRIC:
1585   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1586   case MAT_NOT_HERMITIAN:
1587   case MAT_NOT_SYMMETRY_ETERNAL:
1588     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1589     break;
1590   default:
1591     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1592   }
1593   PetscFunctionReturn(0);
1594 }
1595 
1596 #undef __FUNCT__
1597 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1598 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1599 {
1600   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1601   Mat_SeqBAIJ    *Aloc;
1602   Mat            B;
1603   PetscErrorCode ierr;
1604   PetscInt       M=A->rmap.N,N=A->cmap.N,*ai,*aj,i,*rvals,j,k,col;
1605   PetscInt       bs=A->rmap.bs,mbs=baij->mbs;
1606   MatScalar      *a;
1607 
1608   PetscFunctionBegin;
1609   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1610   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
1611   ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr);
1612   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1613   ierr = MatMPIBAIJSetPreallocation(B,A->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1614 
1615   /* copy over the A part */
1616   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1617   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1618   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1619 
1620   for (i=0; i<mbs; i++) {
1621     rvals[0] = bs*(baij->rstartbs + i);
1622     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1623     for (j=ai[i]; j<ai[i+1]; j++) {
1624       col = (baij->cstartbs+aj[j])*bs;
1625       for (k=0; k<bs; k++) {
1626         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1627         col++; a += bs;
1628       }
1629     }
1630   }
1631   /* copy over the B part */
1632   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1633   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1634   for (i=0; i<mbs; i++) {
1635     rvals[0] = bs*(baij->rstartbs + i);
1636     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1637     for (j=ai[i]; j<ai[i+1]; j++) {
1638       col = baij->garray[aj[j]]*bs;
1639       for (k=0; k<bs; k++) {
1640         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1641         col++; a += bs;
1642       }
1643     }
1644   }
1645   ierr = PetscFree(rvals);CHKERRQ(ierr);
1646   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1647   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1648 
1649   if (matout) {
1650     *matout = B;
1651   } else {
1652     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1653   }
1654   PetscFunctionReturn(0);
1655 }
1656 
1657 #undef __FUNCT__
1658 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1659 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1660 {
1661   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1662   Mat            a = baij->A,b = baij->B;
1663   PetscErrorCode ierr;
1664   PetscInt       s1,s2,s3;
1665 
1666   PetscFunctionBegin;
1667   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1668   if (rr) {
1669     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1670     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1671     /* Overlap communication with computation. */
1672     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1673   }
1674   if (ll) {
1675     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1676     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1677     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1678   }
1679   /* scale  the diagonal block */
1680   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1681 
1682   if (rr) {
1683     /* Do a scatter end and then right scale the off-diagonal block */
1684     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1685     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1686   }
1687 
1688   PetscFunctionReturn(0);
1689 }
1690 
1691 #undef __FUNCT__
1692 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1693 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
1694 {
1695   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1696   PetscErrorCode ierr;
1697   PetscMPIInt    imdex,size = l->size,n,rank = l->rank;
1698   PetscInt       i,*owners = A->rmap.range;
1699   PetscInt       *nprocs,j,idx,nsends,row;
1700   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
1701   PetscInt       *rvalues,tag = A->tag,count,base,slen,*source,lastidx = -1;
1702   PetscInt       *lens,*lrows,*values,rstart_bs=A->rmap.rstart;
1703   MPI_Comm       comm = A->comm;
1704   MPI_Request    *send_waits,*recv_waits;
1705   MPI_Status     recv_status,*send_status;
1706 #if defined(PETSC_DEBUG)
1707   PetscTruth     found = PETSC_FALSE;
1708 #endif
1709 
1710   PetscFunctionBegin;
1711   /*  first count number of contributors to each processor */
1712   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1713   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1714   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1715   j = 0;
1716   for (i=0; i<N; i++) {
1717     if (lastidx > (idx = rows[i])) j = 0;
1718     lastidx = idx;
1719     for (; j<size; j++) {
1720       if (idx >= owners[j] && idx < owners[j+1]) {
1721         nprocs[2*j]++;
1722         nprocs[2*j+1] = 1;
1723         owner[i] = j;
1724 #if defined(PETSC_DEBUG)
1725         found = PETSC_TRUE;
1726 #endif
1727         break;
1728       }
1729     }
1730 #if defined(PETSC_DEBUG)
1731     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1732     found = PETSC_FALSE;
1733 #endif
1734   }
1735   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1736 
1737   /* inform other processors of number of messages and max length*/
1738   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1739 
1740   /* post receives:   */
1741   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1742   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1743   for (i=0; i<nrecvs; i++) {
1744     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1745   }
1746 
1747   /* do sends:
1748      1) starts[i] gives the starting index in svalues for stuff going to
1749      the ith processor
1750   */
1751   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1752   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1753   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1754   starts[0]  = 0;
1755   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1756   for (i=0; i<N; i++) {
1757     svalues[starts[owner[i]]++] = rows[i];
1758   }
1759 
1760   starts[0] = 0;
1761   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1762   count = 0;
1763   for (i=0; i<size; i++) {
1764     if (nprocs[2*i+1]) {
1765       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1766     }
1767   }
1768   ierr = PetscFree(starts);CHKERRQ(ierr);
1769 
1770   base = owners[rank];
1771 
1772   /*  wait on receives */
1773   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
1774   source = lens + nrecvs;
1775   count  = nrecvs; slen = 0;
1776   while (count) {
1777     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1778     /* unpack receives into our local space */
1779     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1780     source[imdex]  = recv_status.MPI_SOURCE;
1781     lens[imdex]    = n;
1782     slen          += n;
1783     count--;
1784   }
1785   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1786 
1787   /* move the data into the send scatter */
1788   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1789   count = 0;
1790   for (i=0; i<nrecvs; i++) {
1791     values = rvalues + i*nmax;
1792     for (j=0; j<lens[i]; j++) {
1793       lrows[count++] = values[j] - base;
1794     }
1795   }
1796   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1797   ierr = PetscFree(lens);CHKERRQ(ierr);
1798   ierr = PetscFree(owner);CHKERRQ(ierr);
1799   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1800 
1801   /* actually zap the local rows */
1802   /*
1803         Zero the required rows. If the "diagonal block" of the matrix
1804      is square and the user wishes to set the diagonal we use separate
1805      code so that MatSetValues() is not called for each diagonal allocating
1806      new memory, thus calling lots of mallocs and slowing things down.
1807 
1808        Contributed by: Matthew Knepley
1809   */
1810   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1811   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr);
1812   if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
1813     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr);
1814   } else if (diag != 0.0) {
1815     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1816     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1817       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1818 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1819     }
1820     for (i=0; i<slen; i++) {
1821       row  = lrows[i] + rstart_bs;
1822       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1823     }
1824     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1825     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1826   } else {
1827     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1828   }
1829 
1830   ierr = PetscFree(lrows);CHKERRQ(ierr);
1831 
1832   /* wait on sends */
1833   if (nsends) {
1834     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1835     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1836     ierr = PetscFree(send_status);CHKERRQ(ierr);
1837   }
1838   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1839   ierr = PetscFree(svalues);CHKERRQ(ierr);
1840 
1841   PetscFunctionReturn(0);
1842 }
1843 
1844 #undef __FUNCT__
1845 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1846 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1847 {
1848   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1849   PetscErrorCode ierr;
1850 
1851   PetscFunctionBegin;
1852   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1853   PetscFunctionReturn(0);
1854 }
1855 
1856 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1857 
1858 #undef __FUNCT__
1859 #define __FUNCT__ "MatEqual_MPIBAIJ"
1860 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1861 {
1862   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1863   Mat            a,b,c,d;
1864   PetscTruth     flg;
1865   PetscErrorCode ierr;
1866 
1867   PetscFunctionBegin;
1868   a = matA->A; b = matA->B;
1869   c = matB->A; d = matB->B;
1870 
1871   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1872   if (flg) {
1873     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1874   }
1875   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1876   PetscFunctionReturn(0);
1877 }
1878 
1879 #undef __FUNCT__
1880 #define __FUNCT__ "MatCopy_MPIBAIJ"
1881 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1882 {
1883   PetscErrorCode ierr;
1884   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1885   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1886 
1887   PetscFunctionBegin;
1888   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1889   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1890     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1891   } else {
1892     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1893     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1894   }
1895   PetscFunctionReturn(0);
1896 }
1897 
1898 #undef __FUNCT__
1899 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1900 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1901 {
1902   PetscErrorCode ierr;
1903 
1904   PetscFunctionBegin;
1905   ierr =  MatMPIBAIJSetPreallocation(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1906   PetscFunctionReturn(0);
1907 }
1908 
1909 #include "petscblaslapack.h"
1910 #undef __FUNCT__
1911 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1912 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1913 {
1914   PetscErrorCode ierr;
1915   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1916   PetscBLASInt   bnz,one=1;
1917   Mat_SeqBAIJ    *x,*y;
1918 
1919   PetscFunctionBegin;
1920   if (str == SAME_NONZERO_PATTERN) {
1921     PetscScalar alpha = a;
1922     x = (Mat_SeqBAIJ *)xx->A->data;
1923     y = (Mat_SeqBAIJ *)yy->A->data;
1924     bnz = (PetscBLASInt)x->nz;
1925     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1926     x = (Mat_SeqBAIJ *)xx->B->data;
1927     y = (Mat_SeqBAIJ *)yy->B->data;
1928     bnz = (PetscBLASInt)x->nz;
1929     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1930   } else {
1931     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1932   }
1933   PetscFunctionReturn(0);
1934 }
1935 
1936 #undef __FUNCT__
1937 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1938 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1939 {
1940   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1941   PetscErrorCode ierr;
1942 
1943   PetscFunctionBegin;
1944   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1945   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1946   PetscFunctionReturn(0);
1947 }
1948 
1949 #undef __FUNCT__
1950 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
1951 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1952 {
1953   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1954   PetscErrorCode ierr;
1955 
1956   PetscFunctionBegin;
1957   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1958   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1959   PetscFunctionReturn(0);
1960 }
1961 
1962 /* -------------------------------------------------------------------*/
1963 static struct _MatOps MatOps_Values = {
1964        MatSetValues_MPIBAIJ,
1965        MatGetRow_MPIBAIJ,
1966        MatRestoreRow_MPIBAIJ,
1967        MatMult_MPIBAIJ,
1968 /* 4*/ MatMultAdd_MPIBAIJ,
1969        MatMultTranspose_MPIBAIJ,
1970        MatMultTransposeAdd_MPIBAIJ,
1971        0,
1972        0,
1973        0,
1974 /*10*/ 0,
1975        0,
1976        0,
1977        0,
1978        MatTranspose_MPIBAIJ,
1979 /*15*/ MatGetInfo_MPIBAIJ,
1980        MatEqual_MPIBAIJ,
1981        MatGetDiagonal_MPIBAIJ,
1982        MatDiagonalScale_MPIBAIJ,
1983        MatNorm_MPIBAIJ,
1984 /*20*/ MatAssemblyBegin_MPIBAIJ,
1985        MatAssemblyEnd_MPIBAIJ,
1986        0,
1987        MatSetOption_MPIBAIJ,
1988        MatZeroEntries_MPIBAIJ,
1989 /*25*/ MatZeroRows_MPIBAIJ,
1990        0,
1991        0,
1992        0,
1993        0,
1994 /*30*/ MatSetUpPreallocation_MPIBAIJ,
1995        0,
1996        0,
1997        0,
1998        0,
1999 /*35*/ MatDuplicate_MPIBAIJ,
2000        0,
2001        0,
2002        0,
2003        0,
2004 /*40*/ MatAXPY_MPIBAIJ,
2005        MatGetSubMatrices_MPIBAIJ,
2006        MatIncreaseOverlap_MPIBAIJ,
2007        MatGetValues_MPIBAIJ,
2008        MatCopy_MPIBAIJ,
2009 /*45*/ 0,
2010        MatScale_MPIBAIJ,
2011        0,
2012        0,
2013        0,
2014 /*50*/ 0,
2015        0,
2016        0,
2017        0,
2018        0,
2019 /*55*/ 0,
2020        0,
2021        MatSetUnfactored_MPIBAIJ,
2022        0,
2023        MatSetValuesBlocked_MPIBAIJ,
2024 /*60*/ 0,
2025        MatDestroy_MPIBAIJ,
2026        MatView_MPIBAIJ,
2027        0,
2028        0,
2029 /*65*/ 0,
2030        0,
2031        0,
2032        0,
2033        0,
2034 /*70*/ MatGetRowMaxAbs_MPIBAIJ,
2035        0,
2036        0,
2037        0,
2038        0,
2039 /*75*/ 0,
2040        0,
2041        0,
2042        0,
2043        0,
2044 /*80*/ 0,
2045        0,
2046        0,
2047        0,
2048        MatLoad_MPIBAIJ,
2049 /*85*/ 0,
2050        0,
2051        0,
2052        0,
2053        0,
2054 /*90*/ 0,
2055        0,
2056        0,
2057        0,
2058        0,
2059 /*95*/ 0,
2060        0,
2061        0,
2062        0,
2063        0,
2064 /*100*/0,
2065        0,
2066        0,
2067        0,
2068        0,
2069 /*105*/0,
2070        MatRealPart_MPIBAIJ,
2071        MatImaginaryPart_MPIBAIJ};
2072 
2073 
2074 EXTERN_C_BEGIN
2075 #undef __FUNCT__
2076 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2077 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
2078 {
2079   PetscFunctionBegin;
2080   *a      = ((Mat_MPIBAIJ *)A->data)->A;
2081   *iscopy = PETSC_FALSE;
2082   PetscFunctionReturn(0);
2083 }
2084 EXTERN_C_END
2085 
2086 EXTERN_C_BEGIN
2087 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2088 EXTERN_C_END
2089 
2090 #undef __FUNCT__
2091 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2092 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
2093 {
2094   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)B->data;
2095   PetscInt       m = B->rmap.n/bs,cstart = baij->cstartbs, cend = baij->cendbs,j,nnz,i,d;
2096   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = baij->rstartbs,ii;
2097   const PetscInt *JJ;
2098   PetscScalar    *values;
2099   PetscErrorCode ierr;
2100 
2101   PetscFunctionBegin;
2102 #if defined(PETSC_OPT_g)
2103   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"Ii[0] must be 0 it is %D",Ii[0]);
2104 #endif
2105   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2106   o_nnz = d_nnz + m;
2107 
2108   for (i=0; i<m; i++) {
2109     nnz     = Ii[i+1]- Ii[i];
2110     JJ      = J + Ii[i];
2111     nnz_max = PetscMax(nnz_max,nnz);
2112 #if defined(PETSC_OPT_g)
2113     if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2114 #endif
2115     for (j=0; j<nnz; j++) {
2116       if (*JJ >= cstart) break;
2117       JJ++;
2118     }
2119     d = 0;
2120     for (; j<nnz; j++) {
2121       if (*JJ++ >= cend) break;
2122       d++;
2123     }
2124     d_nnz[i] = d;
2125     o_nnz[i] = nnz - d;
2126   }
2127   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2128   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2129 
2130   if (v) values = (PetscScalar*)v;
2131   else {
2132     ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2133     ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2134   }
2135 
2136   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2137   for (i=0; i<m; i++) {
2138     ii   = i + rstart;
2139     nnz  = Ii[i+1]- Ii[i];
2140     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2141   }
2142   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2143   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2144   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2145 
2146   if (!v) {
2147     ierr = PetscFree(values);CHKERRQ(ierr);
2148   }
2149   PetscFunctionReturn(0);
2150 }
2151 
2152 #undef __FUNCT__
2153 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2154 /*@C
2155    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2156    (the default parallel PETSc format).
2157 
2158    Collective on MPI_Comm
2159 
2160    Input Parameters:
2161 +  A - the matrix
2162 .  i - the indices into j for the start of each local row (starts with zero)
2163 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2164 -  v - optional values in the matrix
2165 
2166    Level: developer
2167 
2168 .keywords: matrix, aij, compressed row, sparse, parallel
2169 
2170 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2171 @*/
2172 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2173 {
2174   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
2175 
2176   PetscFunctionBegin;
2177   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2178   if (f) {
2179     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
2180   }
2181   PetscFunctionReturn(0);
2182 }
2183 
2184 EXTERN_C_BEGIN
2185 #undef __FUNCT__
2186 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2187 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2188 {
2189   Mat_MPIBAIJ    *b;
2190   PetscErrorCode ierr;
2191   PetscInt       i;
2192 
2193   PetscFunctionBegin;
2194   B->preallocated = PETSC_TRUE;
2195   ierr = PetscOptionsBegin(B->comm,B->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr);
2196     ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2197   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2198 
2199   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2200   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2201   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2202   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2203   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2204 
2205   B->rmap.bs  = bs;
2206   B->cmap.bs  = bs;
2207   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2208   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2209 
2210   if (d_nnz) {
2211     for (i=0; i<B->rmap.n/bs; i++) {
2212       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2213     }
2214   }
2215   if (o_nnz) {
2216     for (i=0; i<B->rmap.n/bs; i++) {
2217       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2218     }
2219   }
2220 
2221   b = (Mat_MPIBAIJ*)B->data;
2222   b->bs2 = bs*bs;
2223   b->mbs = B->rmap.n/bs;
2224   b->nbs = B->cmap.n/bs;
2225   b->Mbs = B->rmap.N/bs;
2226   b->Nbs = B->cmap.N/bs;
2227 
2228   for (i=0; i<=b->size; i++) {
2229     b->rangebs[i] = B->rmap.range[i]/bs;
2230   }
2231   b->rstartbs = B->rmap.rstart/bs;
2232   b->rendbs   = B->rmap.rend/bs;
2233   b->cstartbs = B->cmap.rstart/bs;
2234   b->cendbs   = B->cmap.rend/bs;
2235 
2236   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2237   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
2238   ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2239   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2240   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2241   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2242   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
2243   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2244   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2245   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2246 
2247   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
2248 
2249   PetscFunctionReturn(0);
2250 }
2251 EXTERN_C_END
2252 
2253 EXTERN_C_BEGIN
2254 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2255 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2256 EXTERN_C_END
2257 
2258 /*MC
2259    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2260 
2261    Options Database Keys:
2262 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2263 . -mat_block_size <bs> - set the blocksize used to store the matrix
2264 - -mat_use_hash_table <fact>
2265 
2266   Level: beginner
2267 
2268 .seealso: MatCreateMPIBAIJ
2269 M*/
2270 
2271 EXTERN_C_BEGIN
2272 #undef __FUNCT__
2273 #define __FUNCT__ "MatCreate_MPIBAIJ"
2274 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B)
2275 {
2276   Mat_MPIBAIJ    *b;
2277   PetscErrorCode ierr;
2278   PetscTruth     flg;
2279 
2280   PetscFunctionBegin;
2281   ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr);
2282   B->data = (void*)b;
2283 
2284 
2285   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2286   B->mapping    = 0;
2287   B->factor     = 0;
2288   B->assembled  = PETSC_FALSE;
2289 
2290   B->insertmode = NOT_SET_VALUES;
2291   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
2292   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
2293 
2294   /* build local table of row and column ownerships */
2295   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
2296 
2297   /* build cache for off array entries formed */
2298   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
2299   b->donotstash  = PETSC_FALSE;
2300   b->colmap      = PETSC_NULL;
2301   b->garray      = PETSC_NULL;
2302   b->roworiented = PETSC_TRUE;
2303 
2304 #if defined(PETSC_USE_MAT_SINGLE)
2305   /* stuff for MatSetValues_XXX in single precision */
2306   b->setvalueslen     = 0;
2307   b->setvaluescopy    = PETSC_NULL;
2308 #endif
2309 
2310   /* stuff used in block assembly */
2311   b->barray       = 0;
2312 
2313   /* stuff used for matrix vector multiply */
2314   b->lvec         = 0;
2315   b->Mvctx        = 0;
2316 
2317   /* stuff for MatGetRow() */
2318   b->rowindices   = 0;
2319   b->rowvalues    = 0;
2320   b->getrowactive = PETSC_FALSE;
2321 
2322   /* hash table stuff */
2323   b->ht           = 0;
2324   b->hd           = 0;
2325   b->ht_size      = 0;
2326   b->ht_flag      = PETSC_FALSE;
2327   b->ht_fact      = 0;
2328   b->ht_total_ct  = 0;
2329   b->ht_insert_ct = 0;
2330 
2331   ierr = PetscOptionsBegin(B->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
2332     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2333     if (flg) {
2334       PetscReal fact = 1.39;
2335       ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2336       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
2337       if (fact <= 1.0) fact = 1.39;
2338       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2339       ierr = PetscInfo1(0,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2340     }
2341   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2342 
2343   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2344                                      "MatStoreValues_MPIBAIJ",
2345                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2346   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2347                                      "MatRetrieveValues_MPIBAIJ",
2348                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2349   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2350                                      "MatGetDiagonalBlock_MPIBAIJ",
2351                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2352   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
2353                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
2354                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2355   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
2356 				     "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
2357 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
2358   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
2359                                      "MatDiagonalScaleLocal_MPIBAIJ",
2360                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
2361   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
2362                                      "MatSetHashTableFactor_MPIBAIJ",
2363                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2364   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
2365   PetscFunctionReturn(0);
2366 }
2367 EXTERN_C_END
2368 
2369 /*MC
2370    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2371 
2372    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2373    and MATMPIBAIJ otherwise.
2374 
2375    Options Database Keys:
2376 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2377 
2378   Level: beginner
2379 
2380 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2381 M*/
2382 
2383 EXTERN_C_BEGIN
2384 #undef __FUNCT__
2385 #define __FUNCT__ "MatCreate_BAIJ"
2386 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A)
2387 {
2388   PetscErrorCode ierr;
2389   PetscMPIInt    size;
2390 
2391   PetscFunctionBegin;
2392   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2393   if (size == 1) {
2394     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
2395   } else {
2396     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
2397   }
2398   PetscFunctionReturn(0);
2399 }
2400 EXTERN_C_END
2401 
2402 #undef __FUNCT__
2403 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
2404 /*@C
2405    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
2406    (block compressed row).  For good matrix assembly performance
2407    the user should preallocate the matrix storage by setting the parameters
2408    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2409    performance can be increased by more than a factor of 50.
2410 
2411    Collective on Mat
2412 
2413    Input Parameters:
2414 +  A - the matrix
2415 .  bs   - size of blockk
2416 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2417            submatrix  (same for all local rows)
2418 .  d_nnz - array containing the number of block nonzeros in the various block rows
2419            of the in diagonal portion of the local (possibly different for each block
2420            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2421 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2422            submatrix (same for all local rows).
2423 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2424            off-diagonal portion of the local submatrix (possibly different for
2425            each block row) or PETSC_NULL.
2426 
2427    If the *_nnz parameter is given then the *_nz parameter is ignored
2428 
2429    Options Database Keys:
2430 +   -mat_block_size - size of the blocks to use
2431 -   -mat_use_hash_table <fact>
2432 
2433    Notes:
2434    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2435    than it must be used on all processors that share the object for that argument.
2436 
2437    Storage Information:
2438    For a square global matrix we define each processor's diagonal portion
2439    to be its local rows and the corresponding columns (a square submatrix);
2440    each processor's off-diagonal portion encompasses the remainder of the
2441    local matrix (a rectangular submatrix).
2442 
2443    The user can specify preallocated storage for the diagonal part of
2444    the local submatrix with either d_nz or d_nnz (not both).  Set
2445    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2446    memory allocation.  Likewise, specify preallocated storage for the
2447    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2448 
2449    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2450    the figure below we depict these three local rows and all columns (0-11).
2451 
2452 .vb
2453            0 1 2 3 4 5 6 7 8 9 10 11
2454           -------------------
2455    row 3  |  o o o d d d o o o o o o
2456    row 4  |  o o o d d d o o o o o o
2457    row 5  |  o o o d d d o o o o o o
2458           -------------------
2459 .ve
2460 
2461    Thus, any entries in the d locations are stored in the d (diagonal)
2462    submatrix, and any entries in the o locations are stored in the
2463    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2464    stored simply in the MATSEQBAIJ format for compressed row storage.
2465 
2466    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2467    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2468    In general, for PDE problems in which most nonzeros are near the diagonal,
2469    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2470    or you will get TERRIBLE performance; see the users' manual chapter on
2471    matrices.
2472 
2473    Level: intermediate
2474 
2475 .keywords: matrix, block, aij, compressed row, sparse, parallel
2476 
2477 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
2478 @*/
2479 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2480 {
2481   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2482 
2483   PetscFunctionBegin;
2484   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2485   if (f) {
2486     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2487   }
2488   PetscFunctionReturn(0);
2489 }
2490 
2491 #undef __FUNCT__
2492 #define __FUNCT__ "MatCreateMPIBAIJ"
2493 /*@C
2494    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
2495    (block compressed row).  For good matrix assembly performance
2496    the user should preallocate the matrix storage by setting the parameters
2497    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2498    performance can be increased by more than a factor of 50.
2499 
2500    Collective on MPI_Comm
2501 
2502    Input Parameters:
2503 +  comm - MPI communicator
2504 .  bs   - size of blockk
2505 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2506            This value should be the same as the local size used in creating the
2507            y vector for the matrix-vector product y = Ax.
2508 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2509            This value should be the same as the local size used in creating the
2510            x vector for the matrix-vector product y = Ax.
2511 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2512 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2513 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
2514            submatrix  (same for all local rows)
2515 .  d_nnz - array containing the number of nonzero blocks in the various block rows
2516            of the in diagonal portion of the local (possibly different for each block
2517            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2518 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
2519            submatrix (same for all local rows).
2520 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
2521            off-diagonal portion of the local submatrix (possibly different for
2522            each block row) or PETSC_NULL.
2523 
2524    Output Parameter:
2525 .  A - the matrix
2526 
2527    Options Database Keys:
2528 +   -mat_block_size - size of the blocks to use
2529 -   -mat_use_hash_table <fact>
2530 
2531    Notes:
2532    If the *_nnz parameter is given then the *_nz parameter is ignored
2533 
2534    A nonzero block is any block that as 1 or more nonzeros in it
2535 
2536    The user MUST specify either the local or global matrix dimensions
2537    (possibly both).
2538 
2539    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2540    than it must be used on all processors that share the object for that argument.
2541 
2542    Storage Information:
2543    For a square global matrix we define each processor's diagonal portion
2544    to be its local rows and the corresponding columns (a square submatrix);
2545    each processor's off-diagonal portion encompasses the remainder of the
2546    local matrix (a rectangular submatrix).
2547 
2548    The user can specify preallocated storage for the diagonal part of
2549    the local submatrix with either d_nz or d_nnz (not both).  Set
2550    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2551    memory allocation.  Likewise, specify preallocated storage for the
2552    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2553 
2554    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2555    the figure below we depict these three local rows and all columns (0-11).
2556 
2557 .vb
2558            0 1 2 3 4 5 6 7 8 9 10 11
2559           -------------------
2560    row 3  |  o o o d d d o o o o o o
2561    row 4  |  o o o d d d o o o o o o
2562    row 5  |  o o o d d d o o o o o o
2563           -------------------
2564 .ve
2565 
2566    Thus, any entries in the d locations are stored in the d (diagonal)
2567    submatrix, and any entries in the o locations are stored in the
2568    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2569    stored simply in the MATSEQBAIJ format for compressed row storage.
2570 
2571    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2572    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2573    In general, for PDE problems in which most nonzeros are near the diagonal,
2574    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2575    or you will get TERRIBLE performance; see the users' manual chapter on
2576    matrices.
2577 
2578    Level: intermediate
2579 
2580 .keywords: matrix, block, aij, compressed row, sparse, parallel
2581 
2582 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2583 @*/
2584 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2585 {
2586   PetscErrorCode ierr;
2587   PetscMPIInt    size;
2588 
2589   PetscFunctionBegin;
2590   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2591   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2592   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2593   if (size > 1) {
2594     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
2595     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2596   } else {
2597     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2598     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2599   }
2600   PetscFunctionReturn(0);
2601 }
2602 
2603 #undef __FUNCT__
2604 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
2605 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2606 {
2607   Mat            mat;
2608   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2609   PetscErrorCode ierr;
2610   PetscInt       len=0;
2611 
2612   PetscFunctionBegin;
2613   *newmat       = 0;
2614   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
2615   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
2616   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2617   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2618 
2619   mat->factor       = matin->factor;
2620   mat->preallocated = PETSC_TRUE;
2621   mat->assembled    = PETSC_TRUE;
2622   mat->insertmode   = NOT_SET_VALUES;
2623 
2624   a      = (Mat_MPIBAIJ*)mat->data;
2625   mat->rmap.bs  = matin->rmap.bs;
2626   a->bs2   = oldmat->bs2;
2627   a->mbs   = oldmat->mbs;
2628   a->nbs   = oldmat->nbs;
2629   a->Mbs   = oldmat->Mbs;
2630   a->Nbs   = oldmat->Nbs;
2631 
2632   ierr = PetscMapCopy(matin->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
2633   ierr = PetscMapCopy(matin->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
2634 
2635   a->size         = oldmat->size;
2636   a->rank         = oldmat->rank;
2637   a->donotstash   = oldmat->donotstash;
2638   a->roworiented  = oldmat->roworiented;
2639   a->rowindices   = 0;
2640   a->rowvalues    = 0;
2641   a->getrowactive = PETSC_FALSE;
2642   a->barray       = 0;
2643   a->rstartbs     = oldmat->rstartbs;
2644   a->rendbs       = oldmat->rendbs;
2645   a->cstartbs     = oldmat->cstartbs;
2646   a->cendbs       = oldmat->cendbs;
2647 
2648   /* hash table stuff */
2649   a->ht           = 0;
2650   a->hd           = 0;
2651   a->ht_size      = 0;
2652   a->ht_flag      = oldmat->ht_flag;
2653   a->ht_fact      = oldmat->ht_fact;
2654   a->ht_total_ct  = 0;
2655   a->ht_insert_ct = 0;
2656 
2657   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
2658   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2659   ierr = MatStashCreate_Private(matin->comm,matin->rmap.bs,&mat->bstash);CHKERRQ(ierr);
2660   if (oldmat->colmap) {
2661 #if defined (PETSC_USE_CTABLE)
2662   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2663 #else
2664   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2665   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2666   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2667 #endif
2668   } else a->colmap = 0;
2669 
2670   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2671     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2672     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2673     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2674   } else a->garray = 0;
2675 
2676   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2677   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2678   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2679   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2680 
2681   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2682   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2683   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2684   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2685   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2686   *newmat = mat;
2687 
2688   PetscFunctionReturn(0);
2689 }
2690 
2691 #include "petscsys.h"
2692 
2693 #undef __FUNCT__
2694 #define __FUNCT__ "MatLoad_MPIBAIJ"
2695 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2696 {
2697   Mat            A;
2698   PetscErrorCode ierr;
2699   int            fd;
2700   PetscInt       i,nz,j,rstart,rend;
2701   PetscScalar    *vals,*buf;
2702   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2703   MPI_Status     status;
2704   PetscMPIInt    rank,size,maxnz;
2705   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
2706   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
2707   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
2708   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
2709   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
2710   PetscInt       dcount,kmax,k,nzcount,tmp,mend;
2711 
2712   PetscFunctionBegin;
2713   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
2714     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2715   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2716 
2717   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2718   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2719   if (!rank) {
2720     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2721     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2722     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2723   }
2724 
2725   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2726   M = header[1]; N = header[2];
2727 
2728   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2729 
2730   /*
2731      This code adds extra rows to make sure the number of rows is
2732      divisible by the blocksize
2733   */
2734   Mbs        = M/bs;
2735   extra_rows = bs - M + bs*Mbs;
2736   if (extra_rows == bs) extra_rows = 0;
2737   else                  Mbs++;
2738   if (extra_rows && !rank) {
2739     ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2740   }
2741 
2742   /* determine ownership of all rows */
2743   mbs        = Mbs/size + ((Mbs % size) > rank);
2744   m          = mbs*bs;
2745   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
2746   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2747 
2748   /* process 0 needs enough room for process with most rows */
2749   if (!rank) {
2750     mmax = rowners[1];
2751     for (i=2; i<size; i++) {
2752       mmax = PetscMax(mmax,rowners[i]);
2753     }
2754     mmax*=bs;
2755   } else mmax = m;
2756 
2757   rowners[0] = 0;
2758   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
2759   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2760   rstart = rowners[rank];
2761   rend   = rowners[rank+1];
2762 
2763   /* distribute row lengths to all processors */
2764   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
2765   if (!rank) {
2766     mend = m;
2767     if (size == 1) mend = mend - extra_rows;
2768     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
2769     for (j=mend; j<m; j++) locrowlens[j] = 1;
2770     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2771     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2772     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2773     for (j=0; j<m; j++) {
2774       procsnz[0] += locrowlens[j];
2775     }
2776     for (i=1; i<size; i++) {
2777       mend = browners[i+1] - browners[i];
2778       if (i == size-1) mend = mend - extra_rows;
2779       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
2780       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
2781       /* calculate the number of nonzeros on each processor */
2782       for (j=0; j<browners[i+1]-browners[i]; j++) {
2783         procsnz[i] += rowlengths[j];
2784       }
2785       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2786     }
2787     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2788   } else {
2789     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2790   }
2791 
2792   if (!rank) {
2793     /* determine max buffer needed and allocate it */
2794     maxnz = procsnz[0];
2795     for (i=1; i<size; i++) {
2796       maxnz = PetscMax(maxnz,procsnz[i]);
2797     }
2798     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2799 
2800     /* read in my part of the matrix column indices  */
2801     nz     = procsnz[0];
2802     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2803     mycols = ibuf;
2804     if (size == 1)  nz -= extra_rows;
2805     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2806     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2807 
2808     /* read in every ones (except the last) and ship off */
2809     for (i=1; i<size-1; i++) {
2810       nz   = procsnz[i];
2811       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2812       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2813     }
2814     /* read in the stuff for the last proc */
2815     if (size != 1) {
2816       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2817       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2818       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2819       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2820     }
2821     ierr = PetscFree(cols);CHKERRQ(ierr);
2822   } else {
2823     /* determine buffer space needed for message */
2824     nz = 0;
2825     for (i=0; i<m; i++) {
2826       nz += locrowlens[i];
2827     }
2828     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2829     mycols = ibuf;
2830     /* receive message of column indices*/
2831     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2832     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2833     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2834   }
2835 
2836   /* loop over local rows, determining number of off diagonal entries */
2837   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2838   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2839   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2840   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2841   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2842   rowcount = 0; nzcount = 0;
2843   for (i=0; i<mbs; i++) {
2844     dcount  = 0;
2845     odcount = 0;
2846     for (j=0; j<bs; j++) {
2847       kmax = locrowlens[rowcount];
2848       for (k=0; k<kmax; k++) {
2849         tmp = mycols[nzcount++]/bs;
2850         if (!mask[tmp]) {
2851           mask[tmp] = 1;
2852           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2853           else masked1[dcount++] = tmp;
2854         }
2855       }
2856       rowcount++;
2857     }
2858 
2859     dlens[i]  = dcount;
2860     odlens[i] = odcount;
2861 
2862     /* zero out the mask elements we set */
2863     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2864     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2865   }
2866 
2867   /* create our matrix */
2868   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2869   ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2870   ierr = MatSetType(A,type);CHKERRQ(ierr)
2871   ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2872 
2873   /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */
2874   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2875 
2876   if (!rank) {
2877     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2878     /* read in my part of the matrix numerical values  */
2879     nz = procsnz[0];
2880     vals = buf;
2881     mycols = ibuf;
2882     if (size == 1)  nz -= extra_rows;
2883     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2884     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2885 
2886     /* insert into matrix */
2887     jj      = rstart*bs;
2888     for (i=0; i<m; i++) {
2889       ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2890       mycols += locrowlens[i];
2891       vals   += locrowlens[i];
2892       jj++;
2893     }
2894     /* read in other processors (except the last one) and ship out */
2895     for (i=1; i<size-1; i++) {
2896       nz   = procsnz[i];
2897       vals = buf;
2898       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2899       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2900     }
2901     /* the last proc */
2902     if (size != 1){
2903       nz   = procsnz[i] - extra_rows;
2904       vals = buf;
2905       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2906       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2907       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2908     }
2909     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2910   } else {
2911     /* receive numeric values */
2912     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2913 
2914     /* receive message of values*/
2915     vals   = buf;
2916     mycols = ibuf;
2917     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2918     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2919     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2920 
2921     /* insert into matrix */
2922     jj      = rstart*bs;
2923     for (i=0; i<m; i++) {
2924       ierr    = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2925       mycols += locrowlens[i];
2926       vals   += locrowlens[i];
2927       jj++;
2928     }
2929   }
2930   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2931   ierr = PetscFree(buf);CHKERRQ(ierr);
2932   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2933   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2934   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2935   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2936   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2937   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2938 
2939   *newmat = A;
2940   PetscFunctionReturn(0);
2941 }
2942 
2943 #undef __FUNCT__
2944 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
2945 /*@
2946    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2947 
2948    Input Parameters:
2949 .  mat  - the matrix
2950 .  fact - factor
2951 
2952    Collective on Mat
2953 
2954    Level: advanced
2955 
2956   Notes:
2957    This can also be set by the command line option: -mat_use_hash_table <fact>
2958 
2959 .keywords: matrix, hashtable, factor, HT
2960 
2961 .seealso: MatSetOption()
2962 @*/
2963 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2964 {
2965   PetscErrorCode ierr,(*f)(Mat,PetscReal);
2966 
2967   PetscFunctionBegin;
2968   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
2969   if (f) {
2970     ierr = (*f)(mat,fact);CHKERRQ(ierr);
2971   }
2972   PetscFunctionReturn(0);
2973 }
2974 
2975 EXTERN_C_BEGIN
2976 #undef __FUNCT__
2977 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
2978 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
2979 {
2980   Mat_MPIBAIJ *baij;
2981 
2982   PetscFunctionBegin;
2983   baij = (Mat_MPIBAIJ*)mat->data;
2984   baij->ht_fact = fact;
2985   PetscFunctionReturn(0);
2986 }
2987 EXTERN_C_END
2988 
2989 #undef __FUNCT__
2990 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
2991 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2992 {
2993   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2994   PetscFunctionBegin;
2995   *Ad     = a->A;
2996   *Ao     = a->B;
2997   *colmap = a->garray;
2998   PetscFunctionReturn(0);
2999 }
3000