xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 290bbb0a1dcfb34dbf94efcfcc44171581b0efea)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4 
5 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
7 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
8 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
9 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
10 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
11 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
12 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
14 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar);
15 
16 /*  UGLY, ugly, ugly
17    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
18    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
19    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
20    converts the entries into single precision and then calls ..._MatScalar() to put them
21    into the single precision data structures.
22 */
23 #if defined(PETSC_USE_MAT_SINGLE)
24 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
25 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
26 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
27 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
28 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
29 #else
30 #define MatSetValuesBlocked_SeqBAIJ_MatScalar      MatSetValuesBlocked_SeqBAIJ
31 #define MatSetValues_MPIBAIJ_MatScalar             MatSetValues_MPIBAIJ
32 #define MatSetValuesBlocked_MPIBAIJ_MatScalar      MatSetValuesBlocked_MPIBAIJ
33 #define MatSetValues_MPIBAIJ_HT_MatScalar          MatSetValues_MPIBAIJ_HT
34 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar   MatSetValuesBlocked_MPIBAIJ_HT
35 #endif
36 
37 #undef __FUNCT__
38 #define __FUNCT__ "MatGetRowMax_MPIBAIJ"
39 PetscErrorCode MatGetRowMax_MPIBAIJ(Mat A,Vec v)
40 {
41   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
42   PetscErrorCode ierr;
43   PetscInt       i;
44   PetscScalar    *va,*vb;
45   Vec            vtmp;
46 
47   PetscFunctionBegin;
48 
49   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
50   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
51 
52   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap.n,&vtmp);CHKERRQ(ierr);
53   ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr);
54   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
55 
56   for (i=0; i<A->rmap.n; i++){
57     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i];
58   }
59 
60   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
61   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
62   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
63 
64   PetscFunctionReturn(0);
65 }
66 
67 EXTERN_C_BEGIN
68 #undef __FUNCT__
69 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
70 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat)
71 {
72   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
73   PetscErrorCode ierr;
74 
75   PetscFunctionBegin;
76   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
77   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
78   PetscFunctionReturn(0);
79 }
80 EXTERN_C_END
81 
82 EXTERN_C_BEGIN
83 #undef __FUNCT__
84 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
85 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat)
86 {
87   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
88   PetscErrorCode ierr;
89 
90   PetscFunctionBegin;
91   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
92   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
93   PetscFunctionReturn(0);
94 }
95 EXTERN_C_END
96 
97 /*
98      Local utility routine that creates a mapping from the global column
99    number to the local number in the off-diagonal part of the local
100    storage of the matrix.  This is done in a non scable way since the
101    length of colmap equals the global matrix length.
102 */
103 #undef __FUNCT__
104 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
105 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
106 {
107   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
108   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
109   PetscErrorCode ierr;
110   PetscInt       nbs = B->nbs,i,bs=mat->rmap.bs;
111 
112   PetscFunctionBegin;
113 #if defined (PETSC_USE_CTABLE)
114   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
115   for (i=0; i<nbs; i++){
116     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
117   }
118 #else
119   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
120   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
121   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
122   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
123 #endif
124   PetscFunctionReturn(0);
125 }
126 
127 #define CHUNKSIZE  10
128 
129 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
130 { \
131  \
132     brow = row/bs;  \
133     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
134     rmax = aimax[brow]; nrow = ailen[brow]; \
135       bcol = col/bs; \
136       ridx = row % bs; cidx = col % bs; \
137       low = 0; high = nrow; \
138       while (high-low > 3) { \
139         t = (low+high)/2; \
140         if (rp[t] > bcol) high = t; \
141         else              low  = t; \
142       } \
143       for (_i=low; _i<high; _i++) { \
144         if (rp[_i] > bcol) break; \
145         if (rp[_i] == bcol) { \
146           bap  = ap +  bs2*_i + bs*cidx + ridx; \
147           if (addv == ADD_VALUES) *bap += value;  \
148           else                    *bap  = value;  \
149           goto a_noinsert; \
150         } \
151       } \
152       if (a->nonew == 1) goto a_noinsert; \
153       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
154       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
155       N = nrow++ - 1;  \
156       /* shift up all the later entries in this row */ \
157       for (ii=N; ii>=_i; ii--) { \
158         rp[ii+1] = rp[ii]; \
159         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
160       } \
161       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
162       rp[_i]                      = bcol;  \
163       ap[bs2*_i + bs*cidx + ridx] = value;  \
164       a_noinsert:; \
165     ailen[brow] = nrow; \
166 }
167 
168 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
169 { \
170     brow = row/bs;  \
171     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
172     rmax = bimax[brow]; nrow = bilen[brow]; \
173       bcol = col/bs; \
174       ridx = row % bs; cidx = col % bs; \
175       low = 0; high = nrow; \
176       while (high-low > 3) { \
177         t = (low+high)/2; \
178         if (rp[t] > bcol) high = t; \
179         else              low  = t; \
180       } \
181       for (_i=low; _i<high; _i++) { \
182         if (rp[_i] > bcol) break; \
183         if (rp[_i] == bcol) { \
184           bap  = ap +  bs2*_i + bs*cidx + ridx; \
185           if (addv == ADD_VALUES) *bap += value;  \
186           else                    *bap  = value;  \
187           goto b_noinsert; \
188         } \
189       } \
190       if (b->nonew == 1) goto b_noinsert; \
191       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
192       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
193       CHKMEMQ;\
194       N = nrow++ - 1;  \
195       /* shift up all the later entries in this row */ \
196       for (ii=N; ii>=_i; ii--) { \
197         rp[ii+1] = rp[ii]; \
198         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
199       } \
200       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
201       rp[_i]                      = bcol;  \
202       ap[bs2*_i + bs*cidx + ridx] = value;  \
203       b_noinsert:; \
204     bilen[brow] = nrow; \
205 }
206 
207 #if defined(PETSC_USE_MAT_SINGLE)
208 #undef __FUNCT__
209 #define __FUNCT__ "MatSetValues_MPIBAIJ"
210 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
211 {
212   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
213   PetscErrorCode ierr;
214   PetscInt       i,N = m*n;
215   MatScalar      *vsingle;
216 
217   PetscFunctionBegin;
218   if (N > b->setvalueslen) {
219     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
220     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
221     b->setvalueslen  = N;
222   }
223   vsingle = b->setvaluescopy;
224 
225   for (i=0; i<N; i++) {
226     vsingle[i] = v[i];
227   }
228   ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
229   PetscFunctionReturn(0);
230 }
231 
232 #undef __FUNCT__
233 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
234 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
235 {
236   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
237   PetscErrorCode ierr;
238   PetscInt       i,N = m*n*b->bs2;
239   MatScalar      *vsingle;
240 
241   PetscFunctionBegin;
242   if (N > b->setvalueslen) {
243     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
244     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
245     b->setvalueslen  = N;
246   }
247   vsingle = b->setvaluescopy;
248   for (i=0; i<N; i++) {
249     vsingle[i] = v[i];
250   }
251   ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
252   PetscFunctionReturn(0);
253 }
254 
255 #undef __FUNCT__
256 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
257 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
258 {
259   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
260   PetscErrorCode ierr;
261   PetscInt       i,N = m*n;
262   MatScalar      *vsingle;
263 
264   PetscFunctionBegin;
265   if (N > b->setvalueslen) {
266     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
267     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
268     b->setvalueslen  = N;
269   }
270   vsingle = b->setvaluescopy;
271   for (i=0; i<N; i++) {
272     vsingle[i] = v[i];
273   }
274   ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
275   PetscFunctionReturn(0);
276 }
277 
278 #undef __FUNCT__
279 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
280 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
281 {
282   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
283   PetscErrorCode ierr;
284   PetscInt       i,N = m*n*b->bs2;
285   MatScalar      *vsingle;
286 
287   PetscFunctionBegin;
288   if (N > b->setvalueslen) {
289     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
290     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
291     b->setvalueslen  = N;
292   }
293   vsingle = b->setvaluescopy;
294   for (i=0; i<N; i++) {
295     vsingle[i] = v[i];
296   }
297   ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
298   PetscFunctionReturn(0);
299 }
300 #endif
301 
302 #undef __FUNCT__
303 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
304 PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
305 {
306   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
307   MatScalar      value;
308   PetscTruth     roworiented = baij->roworiented;
309   PetscErrorCode ierr;
310   PetscInt       i,j,row,col;
311   PetscInt       rstart_orig=mat->rmap.rstart;
312   PetscInt       rend_orig=mat->rmap.rend,cstart_orig=mat->cmap.rstart;
313   PetscInt       cend_orig=mat->cmap.rend,bs=mat->rmap.bs;
314 
315   /* Some Variables required in the macro */
316   Mat            A = baij->A;
317   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
318   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
319   MatScalar      *aa=a->a;
320 
321   Mat            B = baij->B;
322   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
323   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
324   MatScalar      *ba=b->a;
325 
326   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
327   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
328   MatScalar      *ap,*bap;
329 
330   PetscFunctionBegin;
331   for (i=0; i<m; i++) {
332     if (im[i] < 0) continue;
333 #if defined(PETSC_USE_DEBUG)
334     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
335 #endif
336     if (im[i] >= rstart_orig && im[i] < rend_orig) {
337       row = im[i] - rstart_orig;
338       for (j=0; j<n; j++) {
339         if (in[j] >= cstart_orig && in[j] < cend_orig){
340           col = in[j] - cstart_orig;
341           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
342           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
343           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
344         } else if (in[j] < 0) continue;
345 #if defined(PETSC_USE_DEBUG)
346         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap.N-1);}
347 #endif
348         else {
349           if (mat->was_assembled) {
350             if (!baij->colmap) {
351               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
352             }
353 #if defined (PETSC_USE_CTABLE)
354             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
355             col  = col - 1;
356 #else
357             col = baij->colmap[in[j]/bs] - 1;
358 #endif
359             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
360               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
361               col =  in[j];
362               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
363               B = baij->B;
364               b = (Mat_SeqBAIJ*)(B)->data;
365               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
366               ba=b->a;
367             } else col += in[j]%bs;
368           } else col = in[j];
369           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
370           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
371           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
372         }
373       }
374     } else {
375       if (!baij->donotstash) {
376         if (roworiented) {
377           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
378         } else {
379           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
380         }
381       }
382     }
383   }
384   PetscFunctionReturn(0);
385 }
386 
387 #undef __FUNCT__
388 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar"
389 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
390 {
391   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
392   const MatScalar *value;
393   MatScalar       *barray=baij->barray;
394   PetscTruth      roworiented = baij->roworiented;
395   PetscErrorCode  ierr;
396   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
397   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
398   PetscInt        cend=baij->cendbs,bs=mat->rmap.bs,bs2=baij->bs2;
399 
400   PetscFunctionBegin;
401   if(!barray) {
402     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
403     baij->barray = barray;
404   }
405 
406   if (roworiented) {
407     stepval = (n-1)*bs;
408   } else {
409     stepval = (m-1)*bs;
410   }
411   for (i=0; i<m; i++) {
412     if (im[i] < 0) continue;
413 #if defined(PETSC_USE_DEBUG)
414     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
415 #endif
416     if (im[i] >= rstart && im[i] < rend) {
417       row = im[i] - rstart;
418       for (j=0; j<n; j++) {
419         /* If NumCol = 1 then a copy is not required */
420         if ((roworiented) && (n == 1)) {
421           barray = (MatScalar*)v + i*bs2;
422         } else if((!roworiented) && (m == 1)) {
423           barray = (MatScalar*)v + j*bs2;
424         } else { /* Here a copy is required */
425           if (roworiented) {
426             value = v + i*(stepval+bs)*bs + j*bs;
427           } else {
428             value = v + j*(stepval+bs)*bs + i*bs;
429           }
430           for (ii=0; ii<bs; ii++,value+=stepval) {
431             for (jj=0; jj<bs; jj++) {
432               *barray++  = *value++;
433             }
434           }
435           barray -=bs2;
436         }
437 
438         if (in[j] >= cstart && in[j] < cend){
439           col  = in[j] - cstart;
440           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
441         }
442         else if (in[j] < 0) continue;
443 #if defined(PETSC_USE_DEBUG)
444         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
445 #endif
446         else {
447           if (mat->was_assembled) {
448             if (!baij->colmap) {
449               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
450             }
451 
452 #if defined(PETSC_USE_DEBUG)
453 #if defined (PETSC_USE_CTABLE)
454             { PetscInt data;
455               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
456               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
457             }
458 #else
459             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
460 #endif
461 #endif
462 #if defined (PETSC_USE_CTABLE)
463 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
464             col  = (col - 1)/bs;
465 #else
466             col = (baij->colmap[in[j]] - 1)/bs;
467 #endif
468             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
469               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
470               col =  in[j];
471             }
472           }
473           else col = in[j];
474           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
475         }
476       }
477     } else {
478       if (!baij->donotstash) {
479         if (roworiented) {
480           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
481         } else {
482           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
483         }
484       }
485     }
486   }
487   PetscFunctionReturn(0);
488 }
489 
490 #define HASH_KEY 0.6180339887
491 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
492 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
493 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
494 #undef __FUNCT__
495 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar"
496 PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
497 {
498   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
499   PetscTruth     roworiented = baij->roworiented;
500   PetscErrorCode ierr;
501   PetscInt       i,j,row,col;
502   PetscInt       rstart_orig=mat->rmap.rstart;
503   PetscInt       rend_orig=mat->rmap.rend,Nbs=baij->Nbs;
504   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap.bs,*HT=baij->ht,idx;
505   PetscReal      tmp;
506   MatScalar      **HD = baij->hd,value;
507 #if defined(PETSC_USE_DEBUG)
508   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
509 #endif
510 
511   PetscFunctionBegin;
512 
513   for (i=0; i<m; i++) {
514 #if defined(PETSC_USE_DEBUG)
515     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
516     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
517 #endif
518       row = im[i];
519     if (row >= rstart_orig && row < rend_orig) {
520       for (j=0; j<n; j++) {
521         col = in[j];
522         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
523         /* Look up PetscInto the Hash Table */
524         key = (row/bs)*Nbs+(col/bs)+1;
525         h1  = HASH(size,key,tmp);
526 
527 
528         idx = h1;
529 #if defined(PETSC_USE_DEBUG)
530         insert_ct++;
531         total_ct++;
532         if (HT[idx] != key) {
533           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
534           if (idx == size) {
535             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
536             if (idx == h1) {
537               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
538             }
539           }
540         }
541 #else
542         if (HT[idx] != key) {
543           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
544           if (idx == size) {
545             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
546             if (idx == h1) {
547               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
548             }
549           }
550         }
551 #endif
552         /* A HASH table entry is found, so insert the values at the correct address */
553         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
554         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
555       }
556     } else {
557       if (!baij->donotstash) {
558         if (roworiented) {
559           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
560         } else {
561           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
562         }
563       }
564     }
565   }
566 #if defined(PETSC_USE_DEBUG)
567   baij->ht_total_ct = total_ct;
568   baij->ht_insert_ct = insert_ct;
569 #endif
570   PetscFunctionReturn(0);
571 }
572 
573 #undef __FUNCT__
574 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar"
575 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
576 {
577   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
578   PetscTruth      roworiented = baij->roworiented;
579   PetscErrorCode  ierr;
580   PetscInt        i,j,ii,jj,row,col;
581   PetscInt        rstart=baij->rstartbs;
582   PetscInt        rend=mat->rmap.rend,stepval,bs=mat->rmap.bs,bs2=baij->bs2;
583   PetscInt        h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
584   PetscReal       tmp;
585   MatScalar       **HD = baij->hd,*baij_a;
586   const MatScalar *v_t,*value;
587 #if defined(PETSC_USE_DEBUG)
588   PetscInt        total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
589 #endif
590 
591   PetscFunctionBegin;
592 
593   if (roworiented) {
594     stepval = (n-1)*bs;
595   } else {
596     stepval = (m-1)*bs;
597   }
598   for (i=0; i<m; i++) {
599 #if defined(PETSC_USE_DEBUG)
600     if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
601     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
602 #endif
603     row   = im[i];
604     v_t   = v + i*bs2;
605     if (row >= rstart && row < rend) {
606       for (j=0; j<n; j++) {
607         col = in[j];
608 
609         /* Look up into the Hash Table */
610         key = row*Nbs+col+1;
611         h1  = HASH(size,key,tmp);
612 
613         idx = h1;
614 #if defined(PETSC_USE_DEBUG)
615         total_ct++;
616         insert_ct++;
617        if (HT[idx] != key) {
618           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
619           if (idx == size) {
620             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
621             if (idx == h1) {
622               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
623             }
624           }
625         }
626 #else
627         if (HT[idx] != key) {
628           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
629           if (idx == size) {
630             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
631             if (idx == h1) {
632               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
633             }
634           }
635         }
636 #endif
637         baij_a = HD[idx];
638         if (roworiented) {
639           /*value = v + i*(stepval+bs)*bs + j*bs;*/
640           /* value = v + (i*(stepval+bs)+j)*bs; */
641           value = v_t;
642           v_t  += bs;
643           if (addv == ADD_VALUES) {
644             for (ii=0; ii<bs; ii++,value+=stepval) {
645               for (jj=ii; jj<bs2; jj+=bs) {
646                 baij_a[jj]  += *value++;
647               }
648             }
649           } else {
650             for (ii=0; ii<bs; ii++,value+=stepval) {
651               for (jj=ii; jj<bs2; jj+=bs) {
652                 baij_a[jj]  = *value++;
653               }
654             }
655           }
656         } else {
657           value = v + j*(stepval+bs)*bs + i*bs;
658           if (addv == ADD_VALUES) {
659             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
660               for (jj=0; jj<bs; jj++) {
661                 baij_a[jj]  += *value++;
662               }
663             }
664           } else {
665             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
666               for (jj=0; jj<bs; jj++) {
667                 baij_a[jj]  = *value++;
668               }
669             }
670           }
671         }
672       }
673     } else {
674       if (!baij->donotstash) {
675         if (roworiented) {
676           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
677         } else {
678           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
679         }
680       }
681     }
682   }
683 #if defined(PETSC_USE_DEBUG)
684   baij->ht_total_ct = total_ct;
685   baij->ht_insert_ct = insert_ct;
686 #endif
687   PetscFunctionReturn(0);
688 }
689 
690 #undef __FUNCT__
691 #define __FUNCT__ "MatGetValues_MPIBAIJ"
692 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
693 {
694   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
695   PetscErrorCode ierr;
696   PetscInt       bs=mat->rmap.bs,i,j,bsrstart = mat->rmap.rstart,bsrend = mat->rmap.rend;
697   PetscInt       bscstart = mat->cmap.rstart,bscend = mat->cmap.rend,row,col,data;
698 
699   PetscFunctionBegin;
700   for (i=0; i<m; i++) {
701     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
702     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
703     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
704       row = idxm[i] - bsrstart;
705       for (j=0; j<n; j++) {
706         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
707         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
708         if (idxn[j] >= bscstart && idxn[j] < bscend){
709           col = idxn[j] - bscstart;
710           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
711         } else {
712           if (!baij->colmap) {
713             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
714           }
715 #if defined (PETSC_USE_CTABLE)
716           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
717           data --;
718 #else
719           data = baij->colmap[idxn[j]/bs]-1;
720 #endif
721           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
722           else {
723             col  = data + idxn[j]%bs;
724             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
725           }
726         }
727       }
728     } else {
729       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
730     }
731   }
732  PetscFunctionReturn(0);
733 }
734 
735 #undef __FUNCT__
736 #define __FUNCT__ "MatNorm_MPIBAIJ"
737 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
738 {
739   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
740   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
741   PetscErrorCode ierr;
742   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap.bs,nz,row,col;
743   PetscReal      sum = 0.0;
744   MatScalar      *v;
745 
746   PetscFunctionBegin;
747   if (baij->size == 1) {
748     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
749   } else {
750     if (type == NORM_FROBENIUS) {
751       v = amat->a;
752       nz = amat->nz*bs2;
753       for (i=0; i<nz; i++) {
754 #if defined(PETSC_USE_COMPLEX)
755         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
756 #else
757         sum += (*v)*(*v); v++;
758 #endif
759       }
760       v = bmat->a;
761       nz = bmat->nz*bs2;
762       for (i=0; i<nz; i++) {
763 #if defined(PETSC_USE_COMPLEX)
764         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
765 #else
766         sum += (*v)*(*v); v++;
767 #endif
768       }
769       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
770       *nrm = sqrt(*nrm);
771     } else if (type == NORM_1) { /* max column sum */
772       PetscReal *tmp,*tmp2;
773       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
774       ierr = PetscMalloc((2*mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
775       tmp2 = tmp + mat->cmap.N;
776       ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
777       v = amat->a; jj = amat->j;
778       for (i=0; i<amat->nz; i++) {
779         for (j=0; j<bs; j++){
780           col = bs*(cstart + *jj) + j; /* column index */
781           for (row=0; row<bs; row++){
782             tmp[col] += PetscAbsScalar(*v);  v++;
783           }
784         }
785         jj++;
786       }
787       v = bmat->a; jj = bmat->j;
788       for (i=0; i<bmat->nz; i++) {
789         for (j=0; j<bs; j++){
790           col = bs*garray[*jj] + j;
791           for (row=0; row<bs; row++){
792             tmp[col] += PetscAbsScalar(*v); v++;
793           }
794         }
795         jj++;
796       }
797       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
798       *nrm = 0.0;
799       for (j=0; j<mat->cmap.N; j++) {
800         if (tmp2[j] > *nrm) *nrm = tmp2[j];
801       }
802       ierr = PetscFree(tmp);CHKERRQ(ierr);
803     } else if (type == NORM_INFINITY) { /* max row sum */
804       PetscReal *sums;
805       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr)
806       sum = 0.0;
807       for (j=0; j<amat->mbs; j++) {
808         for (row=0; row<bs; row++) sums[row] = 0.0;
809         v = amat->a + bs2*amat->i[j];
810         nz = amat->i[j+1]-amat->i[j];
811         for (i=0; i<nz; i++) {
812           for (col=0; col<bs; col++){
813             for (row=0; row<bs; row++){
814               sums[row] += PetscAbsScalar(*v); v++;
815             }
816           }
817         }
818         v = bmat->a + bs2*bmat->i[j];
819         nz = bmat->i[j+1]-bmat->i[j];
820         for (i=0; i<nz; i++) {
821           for (col=0; col<bs; col++){
822             for (row=0; row<bs; row++){
823               sums[row] += PetscAbsScalar(*v); v++;
824             }
825           }
826         }
827         for (row=0; row<bs; row++){
828           if (sums[row] > sum) sum = sums[row];
829         }
830       }
831       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
832       ierr = PetscFree(sums);CHKERRQ(ierr);
833     } else {
834       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
835     }
836   }
837   PetscFunctionReturn(0);
838 }
839 
840 /*
841   Creates the hash table, and sets the table
842   This table is created only once.
843   If new entried need to be added to the matrix
844   then the hash table has to be destroyed and
845   recreated.
846 */
847 #undef __FUNCT__
848 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
849 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
850 {
851   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
852   Mat            A = baij->A,B=baij->B;
853   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
854   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
855   PetscErrorCode ierr;
856   PetscInt       size,bs2=baij->bs2,rstart=baij->rstartbs;
857   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
858   PetscInt       *HT,key;
859   MatScalar      **HD;
860   PetscReal      tmp;
861 #if defined(PETSC_USE_INFO)
862   PetscInt       ct=0,max=0;
863 #endif
864 
865   PetscFunctionBegin;
866   baij->ht_size=(PetscInt)(factor*nz);
867   size = baij->ht_size;
868 
869   if (baij->ht) {
870     PetscFunctionReturn(0);
871   }
872 
873   /* Allocate Memory for Hash Table */
874   ierr     = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr);
875   baij->ht = (PetscInt*)(baij->hd + size);
876   HD       = baij->hd;
877   HT       = baij->ht;
878 
879 
880   ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr);
881 
882 
883   /* Loop Over A */
884   for (i=0; i<a->mbs; i++) {
885     for (j=ai[i]; j<ai[i+1]; j++) {
886       row = i+rstart;
887       col = aj[j]+cstart;
888 
889       key = row*Nbs + col + 1;
890       h1  = HASH(size,key,tmp);
891       for (k=0; k<size; k++){
892         if (!HT[(h1+k)%size]) {
893           HT[(h1+k)%size] = key;
894           HD[(h1+k)%size] = a->a + j*bs2;
895           break;
896 #if defined(PETSC_USE_INFO)
897         } else {
898           ct++;
899 #endif
900         }
901       }
902 #if defined(PETSC_USE_INFO)
903       if (k> max) max = k;
904 #endif
905     }
906   }
907   /* Loop Over B */
908   for (i=0; i<b->mbs; i++) {
909     for (j=bi[i]; j<bi[i+1]; j++) {
910       row = i+rstart;
911       col = garray[bj[j]];
912       key = row*Nbs + col + 1;
913       h1  = HASH(size,key,tmp);
914       for (k=0; k<size; k++){
915         if (!HT[(h1+k)%size]) {
916           HT[(h1+k)%size] = key;
917           HD[(h1+k)%size] = b->a + j*bs2;
918           break;
919 #if defined(PETSC_USE_INFO)
920         } else {
921           ct++;
922 #endif
923         }
924       }
925 #if defined(PETSC_USE_INFO)
926       if (k> max) max = k;
927 #endif
928     }
929   }
930 
931   /* Print Summary */
932 #if defined(PETSC_USE_INFO)
933   for (i=0,j=0; i<size; i++) {
934     if (HT[i]) {j++;}
935   }
936   ierr = PetscInfo2(0,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
937 #endif
938   PetscFunctionReturn(0);
939 }
940 
941 #undef __FUNCT__
942 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
943 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
944 {
945   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
946   PetscErrorCode ierr;
947   PetscInt       nstash,reallocs;
948   InsertMode     addv;
949 
950   PetscFunctionBegin;
951   if (baij->donotstash) {
952     PetscFunctionReturn(0);
953   }
954 
955   /* make sure all processors are either in INSERTMODE or ADDMODE */
956   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
957   if (addv == (ADD_VALUES|INSERT_VALUES)) {
958     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
959   }
960   mat->insertmode = addv; /* in case this processor had no cache */
961 
962   ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr);
963   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rangebs);CHKERRQ(ierr);
964   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
965   ierr = PetscInfo2(0,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
966   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
967   ierr = PetscInfo2(0,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
968   PetscFunctionReturn(0);
969 }
970 
971 #undef __FUNCT__
972 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
973 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
974 {
975   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
976   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
977   PetscErrorCode ierr;
978   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
979   PetscInt       *row,*col,other_disassembled;
980   PetscTruth     r1,r2,r3;
981   MatScalar      *val;
982   InsertMode     addv = mat->insertmode;
983   PetscMPIInt    n;
984 
985   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
986   PetscFunctionBegin;
987   if (!baij->donotstash) {
988     while (1) {
989       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
990       if (!flg) break;
991 
992       for (i=0; i<n;) {
993         /* Now identify the consecutive vals belonging to the same row */
994         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
995         if (j < n) ncols = j-i;
996         else       ncols = n-i;
997         /* Now assemble all these values with a single function call */
998         ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
999         i = j;
1000       }
1001     }
1002     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
1003     /* Now process the block-stash. Since the values are stashed column-oriented,
1004        set the roworiented flag to column oriented, and after MatSetValues()
1005        restore the original flags */
1006     r1 = baij->roworiented;
1007     r2 = a->roworiented;
1008     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
1009     baij->roworiented = PETSC_FALSE;
1010     a->roworiented    = PETSC_FALSE;
1011     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
1012     while (1) {
1013       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1014       if (!flg) break;
1015 
1016       for (i=0; i<n;) {
1017         /* Now identify the consecutive vals belonging to the same row */
1018         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1019         if (j < n) ncols = j-i;
1020         else       ncols = n-i;
1021         ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
1022         i = j;
1023       }
1024     }
1025     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
1026     baij->roworiented = r1;
1027     a->roworiented    = r2;
1028     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
1029   }
1030 
1031   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
1032   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
1033 
1034   /* determine if any processor has disassembled, if so we must
1035      also disassemble ourselfs, in order that we may reassemble. */
1036   /*
1037      if nonzero structure of submatrix B cannot change then we know that
1038      no processor disassembled thus we can skip this stuff
1039   */
1040   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
1041     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
1042     if (mat->was_assembled && !other_disassembled) {
1043       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
1044     }
1045   }
1046 
1047   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1048     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
1049   }
1050   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
1051   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1052   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1053 
1054 #if defined(PETSC_USE_INFO)
1055   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1056     ierr = PetscInfo1(0,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
1057     baij->ht_total_ct  = 0;
1058     baij->ht_insert_ct = 0;
1059   }
1060 #endif
1061   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1062     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1063     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1064     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1065   }
1066 
1067   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1068   baij->rowvalues = 0;
1069   PetscFunctionReturn(0);
1070 }
1071 
1072 #undef __FUNCT__
1073 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
1074 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1075 {
1076   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
1077   PetscErrorCode    ierr;
1078   PetscMPIInt       size = baij->size,rank = baij->rank;
1079   PetscInt          bs = mat->rmap.bs;
1080   PetscTruth        iascii,isdraw;
1081   PetscViewer       sviewer;
1082   PetscViewerFormat format;
1083 
1084   PetscFunctionBegin;
1085   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1086   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1087   if (iascii) {
1088     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1089     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1090       MatInfo info;
1091       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
1092       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1093       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
1094               rank,mat->rmap.N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
1095               mat->rmap.bs,(PetscInt)info.memory);CHKERRQ(ierr);
1096       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1097       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
1098       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1099       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
1100       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1101       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1102       PetscFunctionReturn(0);
1103     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1104       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1105       PetscFunctionReturn(0);
1106     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1107       PetscFunctionReturn(0);
1108     }
1109   }
1110 
1111   if (isdraw) {
1112     PetscDraw       draw;
1113     PetscTruth isnull;
1114     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1115     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1116   }
1117 
1118   if (size == 1) {
1119     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
1120     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1121   } else {
1122     /* assemble the entire matrix onto first processor. */
1123     Mat         A;
1124     Mat_SeqBAIJ *Aloc;
1125     PetscInt    M = mat->rmap.N,N = mat->cmap.N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1126     MatScalar   *a;
1127 
1128     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1129     /* Perhaps this should be the type of mat? */
1130     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
1131     if (!rank) {
1132       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1133     } else {
1134       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1135     }
1136     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1137     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1138     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1139 
1140     /* copy over the A part */
1141     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1142     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1143     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1144 
1145     for (i=0; i<mbs; i++) {
1146       rvals[0] = bs*(baij->rstartbs + i);
1147       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1148       for (j=ai[i]; j<ai[i+1]; j++) {
1149         col = (baij->cstartbs+aj[j])*bs;
1150         for (k=0; k<bs; k++) {
1151           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1152           col++; a += bs;
1153         }
1154       }
1155     }
1156     /* copy over the B part */
1157     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1158     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1159     for (i=0; i<mbs; i++) {
1160       rvals[0] = bs*(baij->rstartbs + i);
1161       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1162       for (j=ai[i]; j<ai[i+1]; j++) {
1163         col = baij->garray[aj[j]]*bs;
1164         for (k=0; k<bs; k++) {
1165           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1166           col++; a += bs;
1167         }
1168       }
1169     }
1170     ierr = PetscFree(rvals);CHKERRQ(ierr);
1171     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1172     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1173     /*
1174        Everyone has to call to draw the matrix since the graphics waits are
1175        synchronized across all processors that share the PetscDraw object
1176     */
1177     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1178     if (!rank) {
1179       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1180       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1181     }
1182     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1183     ierr = MatDestroy(A);CHKERRQ(ierr);
1184   }
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 #undef __FUNCT__
1189 #define __FUNCT__ "MatView_MPIBAIJ"
1190 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1191 {
1192   PetscErrorCode ierr;
1193   PetscTruth     iascii,isdraw,issocket,isbinary;
1194 
1195   PetscFunctionBegin;
1196   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1197   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1198   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1199   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1200   if (iascii || isdraw || issocket || isbinary) {
1201     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1202   } else {
1203     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1204   }
1205   PetscFunctionReturn(0);
1206 }
1207 
1208 #undef __FUNCT__
1209 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1210 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1211 {
1212   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1213   PetscErrorCode ierr;
1214 
1215   PetscFunctionBegin;
1216 #if defined(PETSC_USE_LOG)
1217   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap.N,mat->cmap.N);
1218 #endif
1219   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1220   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1221   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1222   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1223 #if defined (PETSC_USE_CTABLE)
1224   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1225 #else
1226   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1227 #endif
1228   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1229   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1230   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1231   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1232   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1233   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
1234 #if defined(PETSC_USE_MAT_SINGLE)
1235   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
1236 #endif
1237   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1238   ierr = PetscFree(baij);CHKERRQ(ierr);
1239 
1240   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1241   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1242   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1243   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1244   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1245   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1246   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1247   PetscFunctionReturn(0);
1248 }
1249 
1250 #undef __FUNCT__
1251 #define __FUNCT__ "MatMult_MPIBAIJ"
1252 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1253 {
1254   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1255   PetscErrorCode ierr;
1256   PetscInt       nt;
1257 
1258   PetscFunctionBegin;
1259   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1260   if (nt != A->cmap.n) {
1261     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1262   }
1263   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1264   if (nt != A->rmap.n) {
1265     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1266   }
1267   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1268   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1269   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1270   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1271   PetscFunctionReturn(0);
1272 }
1273 
1274 #undef __FUNCT__
1275 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1276 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1277 {
1278   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1279   PetscErrorCode ierr;
1280 
1281   PetscFunctionBegin;
1282   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1283   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1284   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1285   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1286   PetscFunctionReturn(0);
1287 }
1288 
1289 #undef __FUNCT__
1290 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1291 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1292 {
1293   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1294   PetscErrorCode ierr;
1295   PetscTruth     merged;
1296 
1297   PetscFunctionBegin;
1298   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1299   /* do nondiagonal part */
1300   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1301   if (!merged) {
1302     /* send it on its way */
1303     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1304     /* do local part */
1305     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1306     /* receive remote parts: note this assumes the values are not actually */
1307     /* inserted in yy until the next line */
1308     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1309   } else {
1310     /* do local part */
1311     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1312     /* send it on its way */
1313     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1314     /* values actually were received in the Begin() but we need to call this nop */
1315     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1316   }
1317   PetscFunctionReturn(0);
1318 }
1319 
1320 #undef __FUNCT__
1321 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1322 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1323 {
1324   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1325   PetscErrorCode ierr;
1326 
1327   PetscFunctionBegin;
1328   /* do nondiagonal part */
1329   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1330   /* send it on its way */
1331   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1332   /* do local part */
1333   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1334   /* receive remote parts: note this assumes the values are not actually */
1335   /* inserted in yy until the next line, which is true for my implementation*/
1336   /* but is not perhaps always true. */
1337   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1338   PetscFunctionReturn(0);
1339 }
1340 
1341 /*
1342   This only works correctly for square matrices where the subblock A->A is the
1343    diagonal block
1344 */
1345 #undef __FUNCT__
1346 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1347 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1348 {
1349   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1350   PetscErrorCode ierr;
1351 
1352   PetscFunctionBegin;
1353   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1354   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1355   PetscFunctionReturn(0);
1356 }
1357 
1358 #undef __FUNCT__
1359 #define __FUNCT__ "MatScale_MPIBAIJ"
1360 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1361 {
1362   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1363   PetscErrorCode ierr;
1364 
1365   PetscFunctionBegin;
1366   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1367   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1368   PetscFunctionReturn(0);
1369 }
1370 
1371 #undef __FUNCT__
1372 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1373 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1374 {
1375   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1376   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1377   PetscErrorCode ierr;
1378   PetscInt       bs = matin->rmap.bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1379   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap.rstart,brend = matin->rmap.rend;
1380   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1381 
1382   PetscFunctionBegin;
1383   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1384   mat->getrowactive = PETSC_TRUE;
1385 
1386   if (!mat->rowvalues && (idx || v)) {
1387     /*
1388         allocate enough space to hold information from the longest row.
1389     */
1390     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1391     PetscInt     max = 1,mbs = mat->mbs,tmp;
1392     for (i=0; i<mbs; i++) {
1393       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1394       if (max < tmp) { max = tmp; }
1395     }
1396     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1397     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1398   }
1399 
1400   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1401   lrow = row - brstart;
1402 
1403   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1404   if (!v)   {pvA = 0; pvB = 0;}
1405   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1406   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1407   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1408   nztot = nzA + nzB;
1409 
1410   cmap  = mat->garray;
1411   if (v  || idx) {
1412     if (nztot) {
1413       /* Sort by increasing column numbers, assuming A and B already sorted */
1414       PetscInt imark = -1;
1415       if (v) {
1416         *v = v_p = mat->rowvalues;
1417         for (i=0; i<nzB; i++) {
1418           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1419           else break;
1420         }
1421         imark = i;
1422         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1423         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1424       }
1425       if (idx) {
1426         *idx = idx_p = mat->rowindices;
1427         if (imark > -1) {
1428           for (i=0; i<imark; i++) {
1429             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1430           }
1431         } else {
1432           for (i=0; i<nzB; i++) {
1433             if (cmap[cworkB[i]/bs] < cstart)
1434               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1435             else break;
1436           }
1437           imark = i;
1438         }
1439         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1440         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1441       }
1442     } else {
1443       if (idx) *idx = 0;
1444       if (v)   *v   = 0;
1445     }
1446   }
1447   *nz = nztot;
1448   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1449   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1450   PetscFunctionReturn(0);
1451 }
1452 
1453 #undef __FUNCT__
1454 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1455 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1456 {
1457   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1458 
1459   PetscFunctionBegin;
1460   if (!baij->getrowactive) {
1461     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1462   }
1463   baij->getrowactive = PETSC_FALSE;
1464   PetscFunctionReturn(0);
1465 }
1466 
1467 #undef __FUNCT__
1468 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1469 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1470 {
1471   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1472   PetscErrorCode ierr;
1473 
1474   PetscFunctionBegin;
1475   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1476   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1477   PetscFunctionReturn(0);
1478 }
1479 
1480 #undef __FUNCT__
1481 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1482 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1483 {
1484   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1485   Mat            A = a->A,B = a->B;
1486   PetscErrorCode ierr;
1487   PetscReal      isend[5],irecv[5];
1488 
1489   PetscFunctionBegin;
1490   info->block_size     = (PetscReal)matin->rmap.bs;
1491   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1492   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1493   isend[3] = info->memory;  isend[4] = info->mallocs;
1494   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1495   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1496   isend[3] += info->memory;  isend[4] += info->mallocs;
1497   if (flag == MAT_LOCAL) {
1498     info->nz_used      = isend[0];
1499     info->nz_allocated = isend[1];
1500     info->nz_unneeded  = isend[2];
1501     info->memory       = isend[3];
1502     info->mallocs      = isend[4];
1503   } else if (flag == MAT_GLOBAL_MAX) {
1504     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1505     info->nz_used      = irecv[0];
1506     info->nz_allocated = irecv[1];
1507     info->nz_unneeded  = irecv[2];
1508     info->memory       = irecv[3];
1509     info->mallocs      = irecv[4];
1510   } else if (flag == MAT_GLOBAL_SUM) {
1511     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1512     info->nz_used      = irecv[0];
1513     info->nz_allocated = irecv[1];
1514     info->nz_unneeded  = irecv[2];
1515     info->memory       = irecv[3];
1516     info->mallocs      = irecv[4];
1517   } else {
1518     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1519   }
1520   info->rows_global       = (PetscReal)A->rmap.N;
1521   info->columns_global    = (PetscReal)A->cmap.N;
1522   info->rows_local        = (PetscReal)A->rmap.N;
1523   info->columns_local     = (PetscReal)A->cmap.N;
1524   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1525   info->fill_ratio_needed = 0;
1526   info->factor_mallocs    = 0;
1527   PetscFunctionReturn(0);
1528 }
1529 
1530 #undef __FUNCT__
1531 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1532 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op)
1533 {
1534   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1535   PetscErrorCode ierr;
1536 
1537   PetscFunctionBegin;
1538   switch (op) {
1539   case MAT_NO_NEW_NONZERO_LOCATIONS:
1540   case MAT_YES_NEW_NONZERO_LOCATIONS:
1541   case MAT_COLUMNS_UNSORTED:
1542   case MAT_COLUMNS_SORTED:
1543   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1544   case MAT_KEEP_ZEROED_ROWS:
1545   case MAT_NEW_NONZERO_LOCATION_ERR:
1546     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1547     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1548     break;
1549   case MAT_ROW_ORIENTED:
1550     a->roworiented = PETSC_TRUE;
1551     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1552     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1553     break;
1554   case MAT_ROWS_SORTED:
1555   case MAT_ROWS_UNSORTED:
1556   case MAT_YES_NEW_DIAGONALS:
1557     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1558     break;
1559   case MAT_COLUMN_ORIENTED:
1560     a->roworiented = PETSC_FALSE;
1561     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1562     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1563     break;
1564   case MAT_IGNORE_OFF_PROC_ENTRIES:
1565     a->donotstash = PETSC_TRUE;
1566     break;
1567   case MAT_NO_NEW_DIAGONALS:
1568     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1569   case MAT_USE_HASH_TABLE:
1570     a->ht_flag = PETSC_TRUE;
1571     break;
1572   case MAT_SYMMETRIC:
1573   case MAT_STRUCTURALLY_SYMMETRIC:
1574   case MAT_HERMITIAN:
1575   case MAT_SYMMETRY_ETERNAL:
1576     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1577     break;
1578   case MAT_NOT_SYMMETRIC:
1579   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1580   case MAT_NOT_HERMITIAN:
1581   case MAT_NOT_SYMMETRY_ETERNAL:
1582     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1583     break;
1584   default:
1585     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1586   }
1587   PetscFunctionReturn(0);
1588 }
1589 
1590 #undef __FUNCT__
1591 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1592 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1593 {
1594   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1595   Mat_SeqBAIJ    *Aloc;
1596   Mat            B;
1597   PetscErrorCode ierr;
1598   PetscInt       M=A->rmap.N,N=A->cmap.N,*ai,*aj,i,*rvals,j,k,col;
1599   PetscInt       bs=A->rmap.bs,mbs=baij->mbs;
1600   MatScalar      *a;
1601 
1602   PetscFunctionBegin;
1603   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1604   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
1605   ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr);
1606   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1607   ierr = MatMPIBAIJSetPreallocation(B,A->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1608 
1609   /* copy over the A part */
1610   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1611   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1612   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1613 
1614   for (i=0; i<mbs; i++) {
1615     rvals[0] = bs*(baij->rstartbs + i);
1616     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1617     for (j=ai[i]; j<ai[i+1]; j++) {
1618       col = (baij->cstartbs+aj[j])*bs;
1619       for (k=0; k<bs; k++) {
1620         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1621         col++; a += bs;
1622       }
1623     }
1624   }
1625   /* copy over the B part */
1626   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1627   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1628   for (i=0; i<mbs; i++) {
1629     rvals[0] = bs*(baij->rstartbs + i);
1630     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1631     for (j=ai[i]; j<ai[i+1]; j++) {
1632       col = baij->garray[aj[j]]*bs;
1633       for (k=0; k<bs; k++) {
1634         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1635         col++; a += bs;
1636       }
1637     }
1638   }
1639   ierr = PetscFree(rvals);CHKERRQ(ierr);
1640   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1641   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1642 
1643   if (matout) {
1644     *matout = B;
1645   } else {
1646     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1647   }
1648   PetscFunctionReturn(0);
1649 }
1650 
1651 #undef __FUNCT__
1652 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1653 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1654 {
1655   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1656   Mat            a = baij->A,b = baij->B;
1657   PetscErrorCode ierr;
1658   PetscInt       s1,s2,s3;
1659 
1660   PetscFunctionBegin;
1661   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1662   if (rr) {
1663     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1664     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1665     /* Overlap communication with computation. */
1666     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1667   }
1668   if (ll) {
1669     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1670     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1671     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1672   }
1673   /* scale  the diagonal block */
1674   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1675 
1676   if (rr) {
1677     /* Do a scatter end and then right scale the off-diagonal block */
1678     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1679     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1680   }
1681 
1682   PetscFunctionReturn(0);
1683 }
1684 
1685 #undef __FUNCT__
1686 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1687 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
1688 {
1689   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1690   PetscErrorCode ierr;
1691   PetscMPIInt    imdex,size = l->size,n,rank = l->rank;
1692   PetscInt       i,*owners = A->rmap.range;
1693   PetscInt       *nprocs,j,idx,nsends,row;
1694   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
1695   PetscInt       *rvalues,tag = A->tag,count,base,slen,*source,lastidx = -1;
1696   PetscInt       *lens,*lrows,*values,rstart_bs=A->rmap.rstart;
1697   MPI_Comm       comm = A->comm;
1698   MPI_Request    *send_waits,*recv_waits;
1699   MPI_Status     recv_status,*send_status;
1700 #if defined(PETSC_DEBUG)
1701   PetscTruth     found = PETSC_FALSE;
1702 #endif
1703 
1704   PetscFunctionBegin;
1705   /*  first count number of contributors to each processor */
1706   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1707   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1708   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1709   j = 0;
1710   for (i=0; i<N; i++) {
1711     if (lastidx > (idx = rows[i])) j = 0;
1712     lastidx = idx;
1713     for (; j<size; j++) {
1714       if (idx >= owners[j] && idx < owners[j+1]) {
1715         nprocs[2*j]++;
1716         nprocs[2*j+1] = 1;
1717         owner[i] = j;
1718 #if defined(PETSC_DEBUG)
1719         found = PETSC_TRUE;
1720 #endif
1721         break;
1722       }
1723     }
1724 #if defined(PETSC_DEBUG)
1725     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1726     found = PETSC_FALSE;
1727 #endif
1728   }
1729   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1730 
1731   /* inform other processors of number of messages and max length*/
1732   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1733 
1734   /* post receives:   */
1735   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1736   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1737   for (i=0; i<nrecvs; i++) {
1738     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1739   }
1740 
1741   /* do sends:
1742      1) starts[i] gives the starting index in svalues for stuff going to
1743      the ith processor
1744   */
1745   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1746   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1747   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1748   starts[0]  = 0;
1749   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1750   for (i=0; i<N; i++) {
1751     svalues[starts[owner[i]]++] = rows[i];
1752   }
1753 
1754   starts[0] = 0;
1755   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1756   count = 0;
1757   for (i=0; i<size; i++) {
1758     if (nprocs[2*i+1]) {
1759       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1760     }
1761   }
1762   ierr = PetscFree(starts);CHKERRQ(ierr);
1763 
1764   base = owners[rank];
1765 
1766   /*  wait on receives */
1767   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
1768   source = lens + nrecvs;
1769   count  = nrecvs; slen = 0;
1770   while (count) {
1771     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1772     /* unpack receives into our local space */
1773     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1774     source[imdex]  = recv_status.MPI_SOURCE;
1775     lens[imdex]    = n;
1776     slen          += n;
1777     count--;
1778   }
1779   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1780 
1781   /* move the data into the send scatter */
1782   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1783   count = 0;
1784   for (i=0; i<nrecvs; i++) {
1785     values = rvalues + i*nmax;
1786     for (j=0; j<lens[i]; j++) {
1787       lrows[count++] = values[j] - base;
1788     }
1789   }
1790   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1791   ierr = PetscFree(lens);CHKERRQ(ierr);
1792   ierr = PetscFree(owner);CHKERRQ(ierr);
1793   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1794 
1795   /* actually zap the local rows */
1796   /*
1797         Zero the required rows. If the "diagonal block" of the matrix
1798      is square and the user wishes to set the diagonal we use separate
1799      code so that MatSetValues() is not called for each diagonal allocating
1800      new memory, thus calling lots of mallocs and slowing things down.
1801 
1802        Contributed by: Matthew Knepley
1803   */
1804   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1805   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr);
1806   if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
1807     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr);
1808   } else if (diag != 0.0) {
1809     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1810     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1811       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1812 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1813     }
1814     for (i=0; i<slen; i++) {
1815       row  = lrows[i] + rstart_bs;
1816       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1817     }
1818     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1819     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1820   } else {
1821     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1822   }
1823 
1824   ierr = PetscFree(lrows);CHKERRQ(ierr);
1825 
1826   /* wait on sends */
1827   if (nsends) {
1828     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1829     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1830     ierr = PetscFree(send_status);CHKERRQ(ierr);
1831   }
1832   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1833   ierr = PetscFree(svalues);CHKERRQ(ierr);
1834 
1835   PetscFunctionReturn(0);
1836 }
1837 
1838 #undef __FUNCT__
1839 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1840 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1841 {
1842   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1843   PetscErrorCode ierr;
1844 
1845   PetscFunctionBegin;
1846   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1847   PetscFunctionReturn(0);
1848 }
1849 
1850 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1851 
1852 #undef __FUNCT__
1853 #define __FUNCT__ "MatEqual_MPIBAIJ"
1854 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1855 {
1856   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1857   Mat            a,b,c,d;
1858   PetscTruth     flg;
1859   PetscErrorCode ierr;
1860 
1861   PetscFunctionBegin;
1862   a = matA->A; b = matA->B;
1863   c = matB->A; d = matB->B;
1864 
1865   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1866   if (flg) {
1867     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1868   }
1869   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1870   PetscFunctionReturn(0);
1871 }
1872 
1873 #undef __FUNCT__
1874 #define __FUNCT__ "MatCopy_MPIBAIJ"
1875 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1876 {
1877   PetscErrorCode ierr;
1878   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1879   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1880 
1881   PetscFunctionBegin;
1882   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1883   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1884     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1885   } else {
1886     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1887     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1888   }
1889   PetscFunctionReturn(0);
1890 }
1891 
1892 #undef __FUNCT__
1893 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1894 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1895 {
1896   PetscErrorCode ierr;
1897 
1898   PetscFunctionBegin;
1899   ierr =  MatMPIBAIJSetPreallocation(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1900   PetscFunctionReturn(0);
1901 }
1902 
1903 #include "petscblaslapack.h"
1904 #undef __FUNCT__
1905 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1906 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1907 {
1908   PetscErrorCode ierr;
1909   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1910   PetscBLASInt   bnz,one=1;
1911   Mat_SeqBAIJ    *x,*y;
1912 
1913   PetscFunctionBegin;
1914   if (str == SAME_NONZERO_PATTERN) {
1915     PetscScalar alpha = a;
1916     x = (Mat_SeqBAIJ *)xx->A->data;
1917     y = (Mat_SeqBAIJ *)yy->A->data;
1918     bnz = (PetscBLASInt)x->nz;
1919     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1920     x = (Mat_SeqBAIJ *)xx->B->data;
1921     y = (Mat_SeqBAIJ *)yy->B->data;
1922     bnz = (PetscBLASInt)x->nz;
1923     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1924   } else {
1925     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1926   }
1927   PetscFunctionReturn(0);
1928 }
1929 
1930 #undef __FUNCT__
1931 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1932 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1933 {
1934   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1935   PetscErrorCode ierr;
1936 
1937   PetscFunctionBegin;
1938   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1939   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1940   PetscFunctionReturn(0);
1941 }
1942 
1943 #undef __FUNCT__
1944 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
1945 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1946 {
1947   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1948   PetscErrorCode ierr;
1949 
1950   PetscFunctionBegin;
1951   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1952   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1953   PetscFunctionReturn(0);
1954 }
1955 
1956 /* -------------------------------------------------------------------*/
1957 static struct _MatOps MatOps_Values = {
1958        MatSetValues_MPIBAIJ,
1959        MatGetRow_MPIBAIJ,
1960        MatRestoreRow_MPIBAIJ,
1961        MatMult_MPIBAIJ,
1962 /* 4*/ MatMultAdd_MPIBAIJ,
1963        MatMultTranspose_MPIBAIJ,
1964        MatMultTransposeAdd_MPIBAIJ,
1965        0,
1966        0,
1967        0,
1968 /*10*/ 0,
1969        0,
1970        0,
1971        0,
1972        MatTranspose_MPIBAIJ,
1973 /*15*/ MatGetInfo_MPIBAIJ,
1974        MatEqual_MPIBAIJ,
1975        MatGetDiagonal_MPIBAIJ,
1976        MatDiagonalScale_MPIBAIJ,
1977        MatNorm_MPIBAIJ,
1978 /*20*/ MatAssemblyBegin_MPIBAIJ,
1979        MatAssemblyEnd_MPIBAIJ,
1980        0,
1981        MatSetOption_MPIBAIJ,
1982        MatZeroEntries_MPIBAIJ,
1983 /*25*/ MatZeroRows_MPIBAIJ,
1984        0,
1985        0,
1986        0,
1987        0,
1988 /*30*/ MatSetUpPreallocation_MPIBAIJ,
1989        0,
1990        0,
1991        0,
1992        0,
1993 /*35*/ MatDuplicate_MPIBAIJ,
1994        0,
1995        0,
1996        0,
1997        0,
1998 /*40*/ MatAXPY_MPIBAIJ,
1999        MatGetSubMatrices_MPIBAIJ,
2000        MatIncreaseOverlap_MPIBAIJ,
2001        MatGetValues_MPIBAIJ,
2002        MatCopy_MPIBAIJ,
2003 /*45*/ 0,
2004        MatScale_MPIBAIJ,
2005        0,
2006        0,
2007        0,
2008 /*50*/ 0,
2009        0,
2010        0,
2011        0,
2012        0,
2013 /*55*/ 0,
2014        0,
2015        MatSetUnfactored_MPIBAIJ,
2016        0,
2017        MatSetValuesBlocked_MPIBAIJ,
2018 /*60*/ 0,
2019        MatDestroy_MPIBAIJ,
2020        MatView_MPIBAIJ,
2021        0,
2022        0,
2023 /*65*/ 0,
2024        0,
2025        0,
2026        0,
2027        0,
2028 /*70*/ MatGetRowMax_MPIBAIJ,
2029        0,
2030        0,
2031        0,
2032        0,
2033 /*75*/ 0,
2034        0,
2035        0,
2036        0,
2037        0,
2038 /*80*/ 0,
2039        0,
2040        0,
2041        0,
2042        MatLoad_MPIBAIJ,
2043 /*85*/ 0,
2044        0,
2045        0,
2046        0,
2047        0,
2048 /*90*/ 0,
2049        0,
2050        0,
2051        0,
2052        0,
2053 /*95*/ 0,
2054        0,
2055        0,
2056        0,
2057        0,
2058 /*100*/0,
2059        0,
2060        0,
2061        0,
2062        0,
2063 /*105*/0,
2064        MatRealPart_MPIBAIJ,
2065        MatImaginaryPart_MPIBAIJ};
2066 
2067 
2068 EXTERN_C_BEGIN
2069 #undef __FUNCT__
2070 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2071 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
2072 {
2073   PetscFunctionBegin;
2074   *a      = ((Mat_MPIBAIJ *)A->data)->A;
2075   *iscopy = PETSC_FALSE;
2076   PetscFunctionReturn(0);
2077 }
2078 EXTERN_C_END
2079 
2080 EXTERN_C_BEGIN
2081 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2082 EXTERN_C_END
2083 
2084 #undef __FUNCT__
2085 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2086 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
2087 {
2088   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)B->data;
2089   PetscInt       m = B->rmap.n/bs,cstart = baij->cstartbs, cend = baij->cendbs,j,nnz,i,d;
2090   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = baij->rstartbs,ii;
2091   const PetscInt *JJ;
2092   PetscScalar    *values;
2093   PetscErrorCode ierr;
2094 
2095   PetscFunctionBegin;
2096 #if defined(PETSC_OPT_g)
2097   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"Ii[0] must be 0 it is %D",Ii[0]);
2098 #endif
2099   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2100   o_nnz = d_nnz + m;
2101 
2102   for (i=0; i<m; i++) {
2103     nnz     = Ii[i+1]- Ii[i];
2104     JJ      = J + Ii[i];
2105     nnz_max = PetscMax(nnz_max,nnz);
2106 #if defined(PETSC_OPT_g)
2107     if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2108 #endif
2109     for (j=0; j<nnz; j++) {
2110       if (*JJ >= cstart) break;
2111       JJ++;
2112     }
2113     d = 0;
2114     for (; j<nnz; j++) {
2115       if (*JJ++ >= cend) break;
2116       d++;
2117     }
2118     d_nnz[i] = d;
2119     o_nnz[i] = nnz - d;
2120   }
2121   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2122   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2123 
2124   if (v) values = (PetscScalar*)v;
2125   else {
2126     ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2127     ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2128   }
2129 
2130   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2131   for (i=0; i<m; i++) {
2132     ii   = i + rstart;
2133     nnz  = Ii[i+1]- Ii[i];
2134     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2135   }
2136   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2137   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2138   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2139 
2140   if (!v) {
2141     ierr = PetscFree(values);CHKERRQ(ierr);
2142   }
2143   PetscFunctionReturn(0);
2144 }
2145 
2146 #undef __FUNCT__
2147 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2148 /*@C
2149    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2150    (the default parallel PETSc format).
2151 
2152    Collective on MPI_Comm
2153 
2154    Input Parameters:
2155 +  A - the matrix
2156 .  i - the indices into j for the start of each local row (starts with zero)
2157 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2158 -  v - optional values in the matrix
2159 
2160    Level: developer
2161 
2162 .keywords: matrix, aij, compressed row, sparse, parallel
2163 
2164 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2165 @*/
2166 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2167 {
2168   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
2169 
2170   PetscFunctionBegin;
2171   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2172   if (f) {
2173     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
2174   }
2175   PetscFunctionReturn(0);
2176 }
2177 
2178 EXTERN_C_BEGIN
2179 #undef __FUNCT__
2180 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2181 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2182 {
2183   Mat_MPIBAIJ    *b;
2184   PetscErrorCode ierr;
2185   PetscInt       i;
2186 
2187   PetscFunctionBegin;
2188   B->preallocated = PETSC_TRUE;
2189   ierr = PetscOptionsBegin(B->comm,B->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr);
2190     ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2191   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2192 
2193   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2194   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2195   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2196   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2197   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2198 
2199   B->rmap.bs  = bs;
2200   B->cmap.bs  = bs;
2201   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
2202   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
2203 
2204   if (d_nnz) {
2205     for (i=0; i<B->rmap.n/bs; i++) {
2206       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2207     }
2208   }
2209   if (o_nnz) {
2210     for (i=0; i<B->rmap.n/bs; i++) {
2211       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2212     }
2213   }
2214 
2215   b = (Mat_MPIBAIJ*)B->data;
2216   b->bs2 = bs*bs;
2217   b->mbs = B->rmap.n/bs;
2218   b->nbs = B->cmap.n/bs;
2219   b->Mbs = B->rmap.N/bs;
2220   b->Nbs = B->cmap.N/bs;
2221 
2222   for (i=0; i<=b->size; i++) {
2223     b->rangebs[i] = B->rmap.range[i]/bs;
2224   }
2225   b->rstartbs = B->rmap.rstart/bs;
2226   b->rendbs   = B->rmap.rend/bs;
2227   b->cstartbs = B->cmap.rstart/bs;
2228   b->cendbs   = B->cmap.rend/bs;
2229 
2230   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2231   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
2232   ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2233   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2234   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2235   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2236   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
2237   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2238   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2239   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2240 
2241   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
2242 
2243   PetscFunctionReturn(0);
2244 }
2245 EXTERN_C_END
2246 
2247 EXTERN_C_BEGIN
2248 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2249 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2250 EXTERN_C_END
2251 
2252 /*MC
2253    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2254 
2255    Options Database Keys:
2256 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2257 . -mat_block_size <bs> - set the blocksize used to store the matrix
2258 - -mat_use_hash_table <fact>
2259 
2260   Level: beginner
2261 
2262 .seealso: MatCreateMPIBAIJ
2263 M*/
2264 
2265 EXTERN_C_BEGIN
2266 #undef __FUNCT__
2267 #define __FUNCT__ "MatCreate_MPIBAIJ"
2268 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B)
2269 {
2270   Mat_MPIBAIJ    *b;
2271   PetscErrorCode ierr;
2272   PetscTruth     flg;
2273 
2274   PetscFunctionBegin;
2275   ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr);
2276   B->data = (void*)b;
2277 
2278 
2279   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2280   B->mapping    = 0;
2281   B->factor     = 0;
2282   B->assembled  = PETSC_FALSE;
2283 
2284   B->insertmode = NOT_SET_VALUES;
2285   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
2286   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
2287 
2288   /* build local table of row and column ownerships */
2289   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
2290 
2291   /* build cache for off array entries formed */
2292   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
2293   b->donotstash  = PETSC_FALSE;
2294   b->colmap      = PETSC_NULL;
2295   b->garray      = PETSC_NULL;
2296   b->roworiented = PETSC_TRUE;
2297 
2298 #if defined(PETSC_USE_MAT_SINGLE)
2299   /* stuff for MatSetValues_XXX in single precision */
2300   b->setvalueslen     = 0;
2301   b->setvaluescopy    = PETSC_NULL;
2302 #endif
2303 
2304   /* stuff used in block assembly */
2305   b->barray       = 0;
2306 
2307   /* stuff used for matrix vector multiply */
2308   b->lvec         = 0;
2309   b->Mvctx        = 0;
2310 
2311   /* stuff for MatGetRow() */
2312   b->rowindices   = 0;
2313   b->rowvalues    = 0;
2314   b->getrowactive = PETSC_FALSE;
2315 
2316   /* hash table stuff */
2317   b->ht           = 0;
2318   b->hd           = 0;
2319   b->ht_size      = 0;
2320   b->ht_flag      = PETSC_FALSE;
2321   b->ht_fact      = 0;
2322   b->ht_total_ct  = 0;
2323   b->ht_insert_ct = 0;
2324 
2325   ierr = PetscOptionsBegin(B->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix","Mat");CHKERRQ(ierr);
2326     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2327     if (flg) {
2328       PetscReal fact = 1.39;
2329       ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2330       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
2331       if (fact <= 1.0) fact = 1.39;
2332       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2333       ierr = PetscInfo1(0,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2334     }
2335   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2336 
2337   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2338                                      "MatStoreValues_MPIBAIJ",
2339                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2340   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2341                                      "MatRetrieveValues_MPIBAIJ",
2342                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2343   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2344                                      "MatGetDiagonalBlock_MPIBAIJ",
2345                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2346   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
2347                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
2348                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2349   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
2350 				     "MatMPIBAIJSetPreallocationCSR_MPIAIJ",
2351 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
2352   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
2353                                      "MatDiagonalScaleLocal_MPIBAIJ",
2354                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
2355   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
2356                                      "MatSetHashTableFactor_MPIBAIJ",
2357                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2358   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
2359   PetscFunctionReturn(0);
2360 }
2361 EXTERN_C_END
2362 
2363 /*MC
2364    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2365 
2366    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2367    and MATMPIBAIJ otherwise.
2368 
2369    Options Database Keys:
2370 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2371 
2372   Level: beginner
2373 
2374 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2375 M*/
2376 
2377 EXTERN_C_BEGIN
2378 #undef __FUNCT__
2379 #define __FUNCT__ "MatCreate_BAIJ"
2380 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A)
2381 {
2382   PetscErrorCode ierr;
2383   PetscMPIInt    size;
2384 
2385   PetscFunctionBegin;
2386   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2387   if (size == 1) {
2388     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
2389   } else {
2390     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
2391   }
2392   PetscFunctionReturn(0);
2393 }
2394 EXTERN_C_END
2395 
2396 #undef __FUNCT__
2397 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
2398 /*@C
2399    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
2400    (block compressed row).  For good matrix assembly performance
2401    the user should preallocate the matrix storage by setting the parameters
2402    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2403    performance can be increased by more than a factor of 50.
2404 
2405    Collective on Mat
2406 
2407    Input Parameters:
2408 +  A - the matrix
2409 .  bs   - size of blockk
2410 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2411            submatrix  (same for all local rows)
2412 .  d_nnz - array containing the number of block nonzeros in the various block rows
2413            of the in diagonal portion of the local (possibly different for each block
2414            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2415 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2416            submatrix (same for all local rows).
2417 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2418            off-diagonal portion of the local submatrix (possibly different for
2419            each block row) or PETSC_NULL.
2420 
2421    If the *_nnz parameter is given then the *_nz parameter is ignored
2422 
2423    Options Database Keys:
2424 +   -mat_block_size - size of the blocks to use
2425 -   -mat_use_hash_table <fact>
2426 
2427    Notes:
2428    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2429    than it must be used on all processors that share the object for that argument.
2430 
2431    Storage Information:
2432    For a square global matrix we define each processor's diagonal portion
2433    to be its local rows and the corresponding columns (a square submatrix);
2434    each processor's off-diagonal portion encompasses the remainder of the
2435    local matrix (a rectangular submatrix).
2436 
2437    The user can specify preallocated storage for the diagonal part of
2438    the local submatrix with either d_nz or d_nnz (not both).  Set
2439    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2440    memory allocation.  Likewise, specify preallocated storage for the
2441    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2442 
2443    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2444    the figure below we depict these three local rows and all columns (0-11).
2445 
2446 .vb
2447            0 1 2 3 4 5 6 7 8 9 10 11
2448           -------------------
2449    row 3  |  o o o d d d o o o o o o
2450    row 4  |  o o o d d d o o o o o o
2451    row 5  |  o o o d d d o o o o o o
2452           -------------------
2453 .ve
2454 
2455    Thus, any entries in the d locations are stored in the d (diagonal)
2456    submatrix, and any entries in the o locations are stored in the
2457    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2458    stored simply in the MATSEQBAIJ format for compressed row storage.
2459 
2460    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2461    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2462    In general, for PDE problems in which most nonzeros are near the diagonal,
2463    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2464    or you will get TERRIBLE performance; see the users' manual chapter on
2465    matrices.
2466 
2467    Level: intermediate
2468 
2469 .keywords: matrix, block, aij, compressed row, sparse, parallel
2470 
2471 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
2472 @*/
2473 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2474 {
2475   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2476 
2477   PetscFunctionBegin;
2478   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2479   if (f) {
2480     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2481   }
2482   PetscFunctionReturn(0);
2483 }
2484 
2485 #undef __FUNCT__
2486 #define __FUNCT__ "MatCreateMPIBAIJ"
2487 /*@C
2488    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
2489    (block compressed row).  For good matrix assembly performance
2490    the user should preallocate the matrix storage by setting the parameters
2491    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2492    performance can be increased by more than a factor of 50.
2493 
2494    Collective on MPI_Comm
2495 
2496    Input Parameters:
2497 +  comm - MPI communicator
2498 .  bs   - size of blockk
2499 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2500            This value should be the same as the local size used in creating the
2501            y vector for the matrix-vector product y = Ax.
2502 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2503            This value should be the same as the local size used in creating the
2504            x vector for the matrix-vector product y = Ax.
2505 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2506 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2507 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
2508            submatrix  (same for all local rows)
2509 .  d_nnz - array containing the number of nonzero blocks in the various block rows
2510            of the in diagonal portion of the local (possibly different for each block
2511            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2512 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
2513            submatrix (same for all local rows).
2514 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
2515            off-diagonal portion of the local submatrix (possibly different for
2516            each block row) or PETSC_NULL.
2517 
2518    Output Parameter:
2519 .  A - the matrix
2520 
2521    Options Database Keys:
2522 +   -mat_block_size - size of the blocks to use
2523 -   -mat_use_hash_table <fact>
2524 
2525    Notes:
2526    If the *_nnz parameter is given then the *_nz parameter is ignored
2527 
2528    A nonzero block is any block that as 1 or more nonzeros in it
2529 
2530    The user MUST specify either the local or global matrix dimensions
2531    (possibly both).
2532 
2533    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2534    than it must be used on all processors that share the object for that argument.
2535 
2536    Storage Information:
2537    For a square global matrix we define each processor's diagonal portion
2538    to be its local rows and the corresponding columns (a square submatrix);
2539    each processor's off-diagonal portion encompasses the remainder of the
2540    local matrix (a rectangular submatrix).
2541 
2542    The user can specify preallocated storage for the diagonal part of
2543    the local submatrix with either d_nz or d_nnz (not both).  Set
2544    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2545    memory allocation.  Likewise, specify preallocated storage for the
2546    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2547 
2548    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2549    the figure below we depict these three local rows and all columns (0-11).
2550 
2551 .vb
2552            0 1 2 3 4 5 6 7 8 9 10 11
2553           -------------------
2554    row 3  |  o o o d d d o o o o o o
2555    row 4  |  o o o d d d o o o o o o
2556    row 5  |  o o o d d d o o o o o o
2557           -------------------
2558 .ve
2559 
2560    Thus, any entries in the d locations are stored in the d (diagonal)
2561    submatrix, and any entries in the o locations are stored in the
2562    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2563    stored simply in the MATSEQBAIJ format for compressed row storage.
2564 
2565    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2566    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2567    In general, for PDE problems in which most nonzeros are near the diagonal,
2568    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2569    or you will get TERRIBLE performance; see the users' manual chapter on
2570    matrices.
2571 
2572    Level: intermediate
2573 
2574 .keywords: matrix, block, aij, compressed row, sparse, parallel
2575 
2576 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2577 @*/
2578 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2579 {
2580   PetscErrorCode ierr;
2581   PetscMPIInt    size;
2582 
2583   PetscFunctionBegin;
2584   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2585   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2586   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2587   if (size > 1) {
2588     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
2589     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2590   } else {
2591     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2592     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2593   }
2594   PetscFunctionReturn(0);
2595 }
2596 
2597 #undef __FUNCT__
2598 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
2599 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2600 {
2601   Mat            mat;
2602   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2603   PetscErrorCode ierr;
2604   PetscInt       len=0;
2605 
2606   PetscFunctionBegin;
2607   *newmat       = 0;
2608   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
2609   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
2610   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2611   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2612 
2613   mat->factor       = matin->factor;
2614   mat->preallocated = PETSC_TRUE;
2615   mat->assembled    = PETSC_TRUE;
2616   mat->insertmode   = NOT_SET_VALUES;
2617 
2618   a      = (Mat_MPIBAIJ*)mat->data;
2619   mat->rmap.bs  = matin->rmap.bs;
2620   a->bs2   = oldmat->bs2;
2621   a->mbs   = oldmat->mbs;
2622   a->nbs   = oldmat->nbs;
2623   a->Mbs   = oldmat->Mbs;
2624   a->Nbs   = oldmat->Nbs;
2625 
2626   ierr = PetscMapCopy(matin->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
2627   ierr = PetscMapCopy(matin->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
2628 
2629   a->size         = oldmat->size;
2630   a->rank         = oldmat->rank;
2631   a->donotstash   = oldmat->donotstash;
2632   a->roworiented  = oldmat->roworiented;
2633   a->rowindices   = 0;
2634   a->rowvalues    = 0;
2635   a->getrowactive = PETSC_FALSE;
2636   a->barray       = 0;
2637   a->rstartbs     = oldmat->rstartbs;
2638   a->rendbs       = oldmat->rendbs;
2639   a->cstartbs     = oldmat->cstartbs;
2640   a->cendbs       = oldmat->cendbs;
2641 
2642   /* hash table stuff */
2643   a->ht           = 0;
2644   a->hd           = 0;
2645   a->ht_size      = 0;
2646   a->ht_flag      = oldmat->ht_flag;
2647   a->ht_fact      = oldmat->ht_fact;
2648   a->ht_total_ct  = 0;
2649   a->ht_insert_ct = 0;
2650 
2651   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
2652   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2653   ierr = MatStashCreate_Private(matin->comm,matin->rmap.bs,&mat->bstash);CHKERRQ(ierr);
2654   if (oldmat->colmap) {
2655 #if defined (PETSC_USE_CTABLE)
2656   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2657 #else
2658   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2659   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2660   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2661 #endif
2662   } else a->colmap = 0;
2663 
2664   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2665     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2666     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2667     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2668   } else a->garray = 0;
2669 
2670   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2671   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2672   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2673   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2674 
2675   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2676   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2677   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2678   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2679   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2680   *newmat = mat;
2681 
2682   PetscFunctionReturn(0);
2683 }
2684 
2685 #include "petscsys.h"
2686 
2687 #undef __FUNCT__
2688 #define __FUNCT__ "MatLoad_MPIBAIJ"
2689 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2690 {
2691   Mat            A;
2692   PetscErrorCode ierr;
2693   int            fd;
2694   PetscInt       i,nz,j,rstart,rend;
2695   PetscScalar    *vals,*buf;
2696   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2697   MPI_Status     status;
2698   PetscMPIInt    rank,size,maxnz;
2699   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
2700   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
2701   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
2702   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
2703   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
2704   PetscInt       dcount,kmax,k,nzcount,tmp,mend;
2705 
2706   PetscFunctionBegin;
2707   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix","Mat");CHKERRQ(ierr);
2708     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2709   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2710 
2711   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2712   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2713   if (!rank) {
2714     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2715     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2716     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2717   }
2718 
2719   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2720   M = header[1]; N = header[2];
2721 
2722   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2723 
2724   /*
2725      This code adds extra rows to make sure the number of rows is
2726      divisible by the blocksize
2727   */
2728   Mbs        = M/bs;
2729   extra_rows = bs - M + bs*Mbs;
2730   if (extra_rows == bs) extra_rows = 0;
2731   else                  Mbs++;
2732   if (extra_rows && !rank) {
2733     ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2734   }
2735 
2736   /* determine ownership of all rows */
2737   mbs        = Mbs/size + ((Mbs % size) > rank);
2738   m          = mbs*bs;
2739   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
2740   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2741 
2742   /* process 0 needs enough room for process with most rows */
2743   if (!rank) {
2744     mmax = rowners[1];
2745     for (i=2; i<size; i++) {
2746       mmax = PetscMax(mmax,rowners[i]);
2747     }
2748     mmax*=bs;
2749   } else mmax = m;
2750 
2751   rowners[0] = 0;
2752   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
2753   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2754   rstart = rowners[rank];
2755   rend   = rowners[rank+1];
2756 
2757   /* distribute row lengths to all processors */
2758   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
2759   if (!rank) {
2760     mend = m;
2761     if (size == 1) mend = mend - extra_rows;
2762     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
2763     for (j=mend; j<m; j++) locrowlens[j] = 1;
2764     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2765     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2766     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2767     for (j=0; j<m; j++) {
2768       procsnz[0] += locrowlens[j];
2769     }
2770     for (i=1; i<size; i++) {
2771       mend = browners[i+1] - browners[i];
2772       if (i == size-1) mend = mend - extra_rows;
2773       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
2774       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
2775       /* calculate the number of nonzeros on each processor */
2776       for (j=0; j<browners[i+1]-browners[i]; j++) {
2777         procsnz[i] += rowlengths[j];
2778       }
2779       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2780     }
2781     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2782   } else {
2783     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2784   }
2785 
2786   if (!rank) {
2787     /* determine max buffer needed and allocate it */
2788     maxnz = procsnz[0];
2789     for (i=1; i<size; i++) {
2790       maxnz = PetscMax(maxnz,procsnz[i]);
2791     }
2792     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2793 
2794     /* read in my part of the matrix column indices  */
2795     nz     = procsnz[0];
2796     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2797     mycols = ibuf;
2798     if (size == 1)  nz -= extra_rows;
2799     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2800     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2801 
2802     /* read in every ones (except the last) and ship off */
2803     for (i=1; i<size-1; i++) {
2804       nz   = procsnz[i];
2805       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2806       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2807     }
2808     /* read in the stuff for the last proc */
2809     if (size != 1) {
2810       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2811       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2812       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2813       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2814     }
2815     ierr = PetscFree(cols);CHKERRQ(ierr);
2816   } else {
2817     /* determine buffer space needed for message */
2818     nz = 0;
2819     for (i=0; i<m; i++) {
2820       nz += locrowlens[i];
2821     }
2822     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2823     mycols = ibuf;
2824     /* receive message of column indices*/
2825     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2826     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2827     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2828   }
2829 
2830   /* loop over local rows, determining number of off diagonal entries */
2831   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2832   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2833   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2834   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2835   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2836   rowcount = 0; nzcount = 0;
2837   for (i=0; i<mbs; i++) {
2838     dcount  = 0;
2839     odcount = 0;
2840     for (j=0; j<bs; j++) {
2841       kmax = locrowlens[rowcount];
2842       for (k=0; k<kmax; k++) {
2843         tmp = mycols[nzcount++]/bs;
2844         if (!mask[tmp]) {
2845           mask[tmp] = 1;
2846           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2847           else masked1[dcount++] = tmp;
2848         }
2849       }
2850       rowcount++;
2851     }
2852 
2853     dlens[i]  = dcount;
2854     odlens[i] = odcount;
2855 
2856     /* zero out the mask elements we set */
2857     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2858     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2859   }
2860 
2861   /* create our matrix */
2862   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2863   ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2864   ierr = MatSetType(A,type);CHKERRQ(ierr)
2865   ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2866 
2867   /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */
2868   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2869 
2870   if (!rank) {
2871     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2872     /* read in my part of the matrix numerical values  */
2873     nz = procsnz[0];
2874     vals = buf;
2875     mycols = ibuf;
2876     if (size == 1)  nz -= extra_rows;
2877     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2878     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2879 
2880     /* insert into matrix */
2881     jj      = rstart*bs;
2882     for (i=0; i<m; i++) {
2883       ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2884       mycols += locrowlens[i];
2885       vals   += locrowlens[i];
2886       jj++;
2887     }
2888     /* read in other processors (except the last one) and ship out */
2889     for (i=1; i<size-1; i++) {
2890       nz   = procsnz[i];
2891       vals = buf;
2892       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2893       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2894     }
2895     /* the last proc */
2896     if (size != 1){
2897       nz   = procsnz[i] - extra_rows;
2898       vals = buf;
2899       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2900       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2901       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2902     }
2903     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2904   } else {
2905     /* receive numeric values */
2906     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2907 
2908     /* receive message of values*/
2909     vals   = buf;
2910     mycols = ibuf;
2911     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2912     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2913     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2914 
2915     /* insert into matrix */
2916     jj      = rstart*bs;
2917     for (i=0; i<m; i++) {
2918       ierr    = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2919       mycols += locrowlens[i];
2920       vals   += locrowlens[i];
2921       jj++;
2922     }
2923   }
2924   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2925   ierr = PetscFree(buf);CHKERRQ(ierr);
2926   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2927   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2928   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2929   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2930   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2931   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2932 
2933   *newmat = A;
2934   PetscFunctionReturn(0);
2935 }
2936 
2937 #undef __FUNCT__
2938 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
2939 /*@
2940    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2941 
2942    Input Parameters:
2943 .  mat  - the matrix
2944 .  fact - factor
2945 
2946    Collective on Mat
2947 
2948    Level: advanced
2949 
2950   Notes:
2951    This can also be set by the command line option: -mat_use_hash_table <fact>
2952 
2953 .keywords: matrix, hashtable, factor, HT
2954 
2955 .seealso: MatSetOption()
2956 @*/
2957 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2958 {
2959   PetscErrorCode ierr,(*f)(Mat,PetscReal);
2960 
2961   PetscFunctionBegin;
2962   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
2963   if (f) {
2964     ierr = (*f)(mat,fact);CHKERRQ(ierr);
2965   }
2966   PetscFunctionReturn(0);
2967 }
2968 
2969 EXTERN_C_BEGIN
2970 #undef __FUNCT__
2971 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
2972 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
2973 {
2974   Mat_MPIBAIJ *baij;
2975 
2976   PetscFunctionBegin;
2977   baij = (Mat_MPIBAIJ*)mat->data;
2978   baij->ht_fact = fact;
2979   PetscFunctionReturn(0);
2980 }
2981 EXTERN_C_END
2982 
2983 #undef __FUNCT__
2984 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
2985 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2986 {
2987   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2988   PetscFunctionBegin;
2989   *Ad     = a->A;
2990   *Ao     = a->B;
2991   *colmap = a->garray;
2992   PetscFunctionReturn(0);
2993 }
2994