xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision e9f7bc9ebdb6b7145bf88dbdd2c0c400f06b4c30)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4 
5 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
7 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
8 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
9 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
10 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
11 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
12 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
14 EXTERN PetscErrorCode MatPrintHelp_SeqBAIJ(Mat);
15 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar);
16 
17 /*  UGLY, ugly, ugly
18    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
19    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
20    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
21    converts the entries into single precision and then calls ..._MatScalar() to put them
22    into the single precision data structures.
23 */
24 #if defined(PETSC_USE_MAT_SINGLE)
25 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
26 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
27 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
28 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
29 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt*,PetscInt,const PetscInt*,const MatScalar*,InsertMode);
30 #else
31 #define MatSetValuesBlocked_SeqBAIJ_MatScalar      MatSetValuesBlocked_SeqBAIJ
32 #define MatSetValues_MPIBAIJ_MatScalar             MatSetValues_MPIBAIJ
33 #define MatSetValuesBlocked_MPIBAIJ_MatScalar      MatSetValuesBlocked_MPIBAIJ
34 #define MatSetValues_MPIBAIJ_HT_MatScalar          MatSetValues_MPIBAIJ_HT
35 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar   MatSetValuesBlocked_MPIBAIJ_HT
36 #endif
37 
38 #undef __FUNCT__
39 #define __FUNCT__ "MatGetRowMax_MPIBAIJ"
40 PetscErrorCode MatGetRowMax_MPIBAIJ(Mat A,Vec v)
41 {
42   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
43   PetscErrorCode ierr;
44   PetscInt       i;
45   PetscScalar    *va,*vb;
46   Vec            vtmp;
47 
48   PetscFunctionBegin;
49 
50   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
51   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
52 
53   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap.n,&vtmp);CHKERRQ(ierr);
54   ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr);
55   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
56 
57   for (i=0; i<A->rmap.n; i++){
58     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i];
59   }
60 
61   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
62   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
63   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
64 
65   PetscFunctionReturn(0);
66 }
67 
68 EXTERN_C_BEGIN
69 #undef __FUNCT__
70 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
71 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat)
72 {
73   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
74   PetscErrorCode ierr;
75 
76   PetscFunctionBegin;
77   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
78   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
79   PetscFunctionReturn(0);
80 }
81 EXTERN_C_END
82 
83 EXTERN_C_BEGIN
84 #undef __FUNCT__
85 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
86 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat)
87 {
88   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
89   PetscErrorCode ierr;
90 
91   PetscFunctionBegin;
92   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
93   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
94   PetscFunctionReturn(0);
95 }
96 EXTERN_C_END
97 
98 /*
99      Local utility routine that creates a mapping from the global column
100    number to the local number in the off-diagonal part of the local
101    storage of the matrix.  This is done in a non scable way since the
102    length of colmap equals the global matrix length.
103 */
104 #undef __FUNCT__
105 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
106 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
107 {
108   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
109   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
110   PetscErrorCode ierr;
111   PetscInt       nbs = B->nbs,i,bs=mat->rmap.bs;
112 
113   PetscFunctionBegin;
114 #if defined (PETSC_USE_CTABLE)
115   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
116   for (i=0; i<nbs; i++){
117     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
118   }
119 #else
120   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
121   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
122   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
123   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
124 #endif
125   PetscFunctionReturn(0);
126 }
127 
128 #define CHUNKSIZE  10
129 
130 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
131 { \
132  \
133     brow = row/bs;  \
134     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
135     rmax = aimax[brow]; nrow = ailen[brow]; \
136       bcol = col/bs; \
137       ridx = row % bs; cidx = col % bs; \
138       low = 0; high = nrow; \
139       while (high-low > 3) { \
140         t = (low+high)/2; \
141         if (rp[t] > bcol) high = t; \
142         else              low  = t; \
143       } \
144       for (_i=low; _i<high; _i++) { \
145         if (rp[_i] > bcol) break; \
146         if (rp[_i] == bcol) { \
147           bap  = ap +  bs2*_i + bs*cidx + ridx; \
148           if (addv == ADD_VALUES) *bap += value;  \
149           else                    *bap  = value;  \
150           goto a_noinsert; \
151         } \
152       } \
153       if (a->nonew == 1) goto a_noinsert; \
154       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
155       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew); \
156       N = nrow++ - 1;  \
157       /* shift up all the later entries in this row */ \
158       for (ii=N; ii>=_i; ii--) { \
159         rp[ii+1] = rp[ii]; \
160         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
161       } \
162       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
163       rp[_i]                      = bcol;  \
164       ap[bs2*_i + bs*cidx + ridx] = value;  \
165       a_noinsert:; \
166     ailen[brow] = nrow; \
167 }
168 
169 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
170 { \
171     brow = row/bs;  \
172     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
173     rmax = bimax[brow]; nrow = bilen[brow]; \
174       bcol = col/bs; \
175       ridx = row % bs; cidx = col % bs; \
176       low = 0; high = nrow; \
177       while (high-low > 3) { \
178         t = (low+high)/2; \
179         if (rp[t] > bcol) high = t; \
180         else              low  = t; \
181       } \
182       for (_i=low; _i<high; _i++) { \
183         if (rp[_i] > bcol) break; \
184         if (rp[_i] == bcol) { \
185           bap  = ap +  bs2*_i + bs*cidx + ridx; \
186           if (addv == ADD_VALUES) *bap += value;  \
187           else                    *bap  = value;  \
188           goto b_noinsert; \
189         } \
190       } \
191       if (b->nonew == 1) goto b_noinsert; \
192       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
193       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew); \
194       CHKMEMQ;\
195       N = nrow++ - 1;  \
196       /* shift up all the later entries in this row */ \
197       for (ii=N; ii>=_i; ii--) { \
198         rp[ii+1] = rp[ii]; \
199         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
200       } \
201       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
202       rp[_i]                      = bcol;  \
203       ap[bs2*_i + bs*cidx + ridx] = value;  \
204       b_noinsert:; \
205     bilen[brow] = nrow; \
206 }
207 
208 #if defined(PETSC_USE_MAT_SINGLE)
209 #undef __FUNCT__
210 #define __FUNCT__ "MatSetValues_MPIBAIJ"
211 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
212 {
213   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
214   PetscErrorCode ierr;
215   PetscInt       i,N = m*n;
216   MatScalar      *vsingle;
217 
218   PetscFunctionBegin;
219   if (N > b->setvalueslen) {
220     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
221     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
222     b->setvalueslen  = N;
223   }
224   vsingle = b->setvaluescopy;
225 
226   for (i=0; i<N; i++) {
227     vsingle[i] = v[i];
228   }
229   ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
230   PetscFunctionReturn(0);
231 }
232 
233 #undef __FUNCT__
234 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
235 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
236 {
237   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
238   PetscErrorCode ierr;
239   PetscInt       i,N = m*n*b->bs2;
240   MatScalar      *vsingle;
241 
242   PetscFunctionBegin;
243   if (N > b->setvalueslen) {
244     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
245     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
246     b->setvalueslen  = N;
247   }
248   vsingle = b->setvaluescopy;
249   for (i=0; i<N; i++) {
250     vsingle[i] = v[i];
251   }
252   ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
253   PetscFunctionReturn(0);
254 }
255 
256 #undef __FUNCT__
257 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
258 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
259 {
260   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
261   PetscErrorCode ierr;
262   PetscInt       i,N = m*n;
263   MatScalar      *vsingle;
264 
265   PetscFunctionBegin;
266   if (N > b->setvalueslen) {
267     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
268     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
269     b->setvalueslen  = N;
270   }
271   vsingle = b->setvaluescopy;
272   for (i=0; i<N; i++) {
273     vsingle[i] = v[i];
274   }
275   ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
276   PetscFunctionReturn(0);
277 }
278 
279 #undef __FUNCT__
280 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
281 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
282 {
283   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
284   PetscErrorCode ierr;
285   PetscInt       i,N = m*n*b->bs2;
286   MatScalar      *vsingle;
287 
288   PetscFunctionBegin;
289   if (N > b->setvalueslen) {
290     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
291     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
292     b->setvalueslen  = N;
293   }
294   vsingle = b->setvaluescopy;
295   for (i=0; i<N; i++) {
296     vsingle[i] = v[i];
297   }
298   ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
299   PetscFunctionReturn(0);
300 }
301 #endif
302 
303 #undef __FUNCT__
304 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
305 PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
306 {
307   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
308   MatScalar      value;
309   PetscTruth     roworiented = baij->roworiented;
310   PetscErrorCode ierr;
311   PetscInt       i,j,row,col;
312   PetscInt       rstart_orig=mat->rmap.rstart;
313   PetscInt       rend_orig=mat->rmap.rend,cstart_orig=mat->cmap.rstart;
314   PetscInt       cend_orig=mat->cmap.rend,bs=mat->rmap.bs;
315 
316   /* Some Variables required in the macro */
317   Mat            A = baij->A;
318   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
319   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
320   MatScalar      *aa=a->a;
321 
322   Mat            B = baij->B;
323   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
324   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
325   MatScalar      *ba=b->a;
326 
327   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
328   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
329   MatScalar      *ap,*bap;
330 
331   PetscFunctionBegin;
332   for (i=0; i<m; i++) {
333     if (im[i] < 0) continue;
334 #if defined(PETSC_USE_DEBUG)
335     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
336 #endif
337     if (im[i] >= rstart_orig && im[i] < rend_orig) {
338       row = im[i] - rstart_orig;
339       for (j=0; j<n; j++) {
340         if (in[j] >= cstart_orig && in[j] < cend_orig){
341           col = in[j] - cstart_orig;
342           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
343           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
344           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
345         } else if (in[j] < 0) continue;
346 #if defined(PETSC_USE_DEBUG)
347         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap.N-1);}
348 #endif
349         else {
350           if (mat->was_assembled) {
351             if (!baij->colmap) {
352               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
353             }
354 #if defined (PETSC_USE_CTABLE)
355             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
356             col  = col - 1;
357 #else
358             col = baij->colmap[in[j]/bs] - 1;
359 #endif
360             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
361               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
362               col =  in[j];
363               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
364               B = baij->B;
365               b = (Mat_SeqBAIJ*)(B)->data;
366               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
367               ba=b->a;
368             } else col += in[j]%bs;
369           } else col = in[j];
370           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
371           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
372           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
373         }
374       }
375     } else {
376       if (!baij->donotstash) {
377         if (roworiented) {
378           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
379         } else {
380           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
381         }
382       }
383     }
384   }
385   PetscFunctionReturn(0);
386 }
387 
388 #undef __FUNCT__
389 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar"
390 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
391 {
392   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
393   const MatScalar *value;
394   MatScalar       *barray=baij->barray;
395   PetscTruth      roworiented = baij->roworiented;
396   PetscErrorCode  ierr;
397   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
398   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
399   PetscInt        cend=baij->cendbs,bs=mat->rmap.bs,bs2=baij->bs2;
400 
401   PetscFunctionBegin;
402   if(!barray) {
403     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
404     baij->barray = barray;
405   }
406 
407   if (roworiented) {
408     stepval = (n-1)*bs;
409   } else {
410     stepval = (m-1)*bs;
411   }
412   for (i=0; i<m; i++) {
413     if (im[i] < 0) continue;
414 #if defined(PETSC_USE_DEBUG)
415     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
416 #endif
417     if (im[i] >= rstart && im[i] < rend) {
418       row = im[i] - rstart;
419       for (j=0; j<n; j++) {
420         /* If NumCol = 1 then a copy is not required */
421         if ((roworiented) && (n == 1)) {
422           barray = (MatScalar*)v + i*bs2;
423         } else if((!roworiented) && (m == 1)) {
424           barray = (MatScalar*)v + j*bs2;
425         } else { /* Here a copy is required */
426           if (roworiented) {
427             value = v + i*(stepval+bs)*bs + j*bs;
428           } else {
429             value = v + j*(stepval+bs)*bs + i*bs;
430           }
431           for (ii=0; ii<bs; ii++,value+=stepval) {
432             for (jj=0; jj<bs; jj++) {
433               *barray++  = *value++;
434             }
435           }
436           barray -=bs2;
437         }
438 
439         if (in[j] >= cstart && in[j] < cend){
440           col  = in[j] - cstart;
441           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
442         }
443         else if (in[j] < 0) continue;
444 #if defined(PETSC_USE_DEBUG)
445         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
446 #endif
447         else {
448           if (mat->was_assembled) {
449             if (!baij->colmap) {
450               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
451             }
452 
453 #if defined(PETSC_USE_DEBUG)
454 #if defined (PETSC_USE_CTABLE)
455             { PetscInt data;
456               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
457               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
458             }
459 #else
460             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
461 #endif
462 #endif
463 #if defined (PETSC_USE_CTABLE)
464 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
465             col  = (col - 1)/bs;
466 #else
467             col = (baij->colmap[in[j]] - 1)/bs;
468 #endif
469             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
470               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
471               col =  in[j];
472             }
473           }
474           else col = in[j];
475           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
476         }
477       }
478     } else {
479       if (!baij->donotstash) {
480         if (roworiented) {
481           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
482         } else {
483           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
484         }
485       }
486     }
487   }
488   PetscFunctionReturn(0);
489 }
490 
491 #define HASH_KEY 0.6180339887
492 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
493 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
494 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
495 #undef __FUNCT__
496 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar"
497 PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
498 {
499   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
500   PetscTruth     roworiented = baij->roworiented;
501   PetscErrorCode ierr;
502   PetscInt       i,j,row,col;
503   PetscInt       rstart_orig=mat->rmap.rstart;
504   PetscInt       rend_orig=mat->rmap.rend,Nbs=baij->Nbs;
505   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap.bs,*HT=baij->ht,idx;
506   PetscReal      tmp;
507   MatScalar      **HD = baij->hd,value;
508 #if defined(PETSC_USE_DEBUG)
509   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
510 #endif
511 
512   PetscFunctionBegin;
513 
514   for (i=0; i<m; i++) {
515 #if defined(PETSC_USE_DEBUG)
516     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
517     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
518 #endif
519       row = im[i];
520     if (row >= rstart_orig && row < rend_orig) {
521       for (j=0; j<n; j++) {
522         col = in[j];
523         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
524         /* Look up PetscInto the Hash Table */
525         key = (row/bs)*Nbs+(col/bs)+1;
526         h1  = HASH(size,key,tmp);
527 
528 
529         idx = h1;
530 #if defined(PETSC_USE_DEBUG)
531         insert_ct++;
532         total_ct++;
533         if (HT[idx] != key) {
534           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
535           if (idx == size) {
536             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
537             if (idx == h1) {
538               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
539             }
540           }
541         }
542 #else
543         if (HT[idx] != key) {
544           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
545           if (idx == size) {
546             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
547             if (idx == h1) {
548               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
549             }
550           }
551         }
552 #endif
553         /* A HASH table entry is found, so insert the values at the correct address */
554         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
555         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
556       }
557     } else {
558       if (!baij->donotstash) {
559         if (roworiented) {
560           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
561         } else {
562           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
563         }
564       }
565     }
566   }
567 #if defined(PETSC_USE_DEBUG)
568   baij->ht_total_ct = total_ct;
569   baij->ht_insert_ct = insert_ct;
570 #endif
571   PetscFunctionReturn(0);
572 }
573 
574 #undef __FUNCT__
575 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar"
576 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
577 {
578   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
579   PetscTruth      roworiented = baij->roworiented;
580   PetscErrorCode  ierr;
581   PetscInt        i,j,ii,jj,row,col;
582   PetscInt        rstart=baij->rstartbs;
583   PetscInt        rend=mat->rmap.rend,stepval,bs=mat->rmap.bs,bs2=baij->bs2;
584   PetscInt        h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
585   PetscReal       tmp;
586   MatScalar       **HD = baij->hd,*baij_a;
587   const MatScalar *v_t,*value;
588 #if defined(PETSC_USE_DEBUG)
589   PetscInt        total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
590 #endif
591 
592   PetscFunctionBegin;
593 
594   if (roworiented) {
595     stepval = (n-1)*bs;
596   } else {
597     stepval = (m-1)*bs;
598   }
599   for (i=0; i<m; i++) {
600 #if defined(PETSC_USE_DEBUG)
601     if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
602     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
603 #endif
604     row   = im[i];
605     v_t   = v + i*bs2;
606     if (row >= rstart && row < rend) {
607       for (j=0; j<n; j++) {
608         col = in[j];
609 
610         /* Look up into the Hash Table */
611         key = row*Nbs+col+1;
612         h1  = HASH(size,key,tmp);
613 
614         idx = h1;
615 #if defined(PETSC_USE_DEBUG)
616         total_ct++;
617         insert_ct++;
618        if (HT[idx] != key) {
619           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
620           if (idx == size) {
621             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
622             if (idx == h1) {
623               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
624             }
625           }
626         }
627 #else
628         if (HT[idx] != key) {
629           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
630           if (idx == size) {
631             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
632             if (idx == h1) {
633               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
634             }
635           }
636         }
637 #endif
638         baij_a = HD[idx];
639         if (roworiented) {
640           /*value = v + i*(stepval+bs)*bs + j*bs;*/
641           /* value = v + (i*(stepval+bs)+j)*bs; */
642           value = v_t;
643           v_t  += bs;
644           if (addv == ADD_VALUES) {
645             for (ii=0; ii<bs; ii++,value+=stepval) {
646               for (jj=ii; jj<bs2; jj+=bs) {
647                 baij_a[jj]  += *value++;
648               }
649             }
650           } else {
651             for (ii=0; ii<bs; ii++,value+=stepval) {
652               for (jj=ii; jj<bs2; jj+=bs) {
653                 baij_a[jj]  = *value++;
654               }
655             }
656           }
657         } else {
658           value = v + j*(stepval+bs)*bs + i*bs;
659           if (addv == ADD_VALUES) {
660             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
661               for (jj=0; jj<bs; jj++) {
662                 baij_a[jj]  += *value++;
663               }
664             }
665           } else {
666             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
667               for (jj=0; jj<bs; jj++) {
668                 baij_a[jj]  = *value++;
669               }
670             }
671           }
672         }
673       }
674     } else {
675       if (!baij->donotstash) {
676         if (roworiented) {
677           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
678         } else {
679           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
680         }
681       }
682     }
683   }
684 #if defined(PETSC_USE_DEBUG)
685   baij->ht_total_ct = total_ct;
686   baij->ht_insert_ct = insert_ct;
687 #endif
688   PetscFunctionReturn(0);
689 }
690 
691 #undef __FUNCT__
692 #define __FUNCT__ "MatGetValues_MPIBAIJ"
693 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
694 {
695   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
696   PetscErrorCode ierr;
697   PetscInt       bs=mat->rmap.bs,i,j,bsrstart = mat->rmap.rstart,bsrend = mat->rmap.rend;
698   PetscInt       bscstart = mat->cmap.rstart,bscend = mat->cmap.rend,row,col,data;
699 
700   PetscFunctionBegin;
701   for (i=0; i<m; i++) {
702     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
703     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
704     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
705       row = idxm[i] - bsrstart;
706       for (j=0; j<n; j++) {
707         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]);
708         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
709         if (idxn[j] >= bscstart && idxn[j] < bscend){
710           col = idxn[j] - bscstart;
711           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
712         } else {
713           if (!baij->colmap) {
714             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
715           }
716 #if defined (PETSC_USE_CTABLE)
717           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
718           data --;
719 #else
720           data = baij->colmap[idxn[j]/bs]-1;
721 #endif
722           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
723           else {
724             col  = data + idxn[j]%bs;
725             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
726           }
727         }
728       }
729     } else {
730       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
731     }
732   }
733  PetscFunctionReturn(0);
734 }
735 
736 #undef __FUNCT__
737 #define __FUNCT__ "MatNorm_MPIBAIJ"
738 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
739 {
740   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
741   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
742   PetscErrorCode ierr;
743   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap.bs,nz,row,col;
744   PetscReal      sum = 0.0;
745   MatScalar      *v;
746 
747   PetscFunctionBegin;
748   if (baij->size == 1) {
749     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
750   } else {
751     if (type == NORM_FROBENIUS) {
752       v = amat->a;
753       nz = amat->nz*bs2;
754       for (i=0; i<nz; i++) {
755 #if defined(PETSC_USE_COMPLEX)
756         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
757 #else
758         sum += (*v)*(*v); v++;
759 #endif
760       }
761       v = bmat->a;
762       nz = bmat->nz*bs2;
763       for (i=0; i<nz; i++) {
764 #if defined(PETSC_USE_COMPLEX)
765         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
766 #else
767         sum += (*v)*(*v); v++;
768 #endif
769       }
770       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
771       *nrm = sqrt(*nrm);
772     } else if (type == NORM_1) { /* max column sum */
773       PetscReal *tmp,*tmp2;
774       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
775       ierr = PetscMalloc((2*mat->cmap.N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
776       tmp2 = tmp + mat->cmap.N;
777       ierr = PetscMemzero(tmp,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
778       v = amat->a; jj = amat->j;
779       for (i=0; i<amat->nz; i++) {
780         for (j=0; j<bs; j++){
781           col = bs*(cstart + *jj) + j; /* column index */
782           for (row=0; row<bs; row++){
783             tmp[col] += PetscAbsScalar(*v);  v++;
784           }
785         }
786         jj++;
787       }
788       v = bmat->a; jj = bmat->j;
789       for (i=0; i<bmat->nz; i++) {
790         for (j=0; j<bs; j++){
791           col = bs*garray[*jj] + j;
792           for (row=0; row<bs; row++){
793             tmp[col] += PetscAbsScalar(*v); v++;
794           }
795         }
796         jj++;
797       }
798       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
799       *nrm = 0.0;
800       for (j=0; j<mat->cmap.N; j++) {
801         if (tmp2[j] > *nrm) *nrm = tmp2[j];
802       }
803       ierr = PetscFree(tmp);CHKERRQ(ierr);
804     } else if (type == NORM_INFINITY) { /* max row sum */
805       PetscReal *sums;
806       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr)
807       sum = 0.0;
808       for (j=0; j<amat->mbs; j++) {
809         for (row=0; row<bs; row++) sums[row] = 0.0;
810         v = amat->a + bs2*amat->i[j];
811         nz = amat->i[j+1]-amat->i[j];
812         for (i=0; i<nz; i++) {
813           for (col=0; col<bs; col++){
814             for (row=0; row<bs; row++){
815               sums[row] += PetscAbsScalar(*v); v++;
816             }
817           }
818         }
819         v = bmat->a + bs2*bmat->i[j];
820         nz = bmat->i[j+1]-bmat->i[j];
821         for (i=0; i<nz; i++) {
822           for (col=0; col<bs; col++){
823             for (row=0; row<bs; row++){
824               sums[row] += PetscAbsScalar(*v); v++;
825             }
826           }
827         }
828         for (row=0; row<bs; row++){
829           if (sums[row] > sum) sum = sums[row];
830         }
831       }
832       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,mat->comm);CHKERRQ(ierr);
833       ierr = PetscFree(sums);CHKERRQ(ierr);
834     } else {
835       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
836     }
837   }
838   PetscFunctionReturn(0);
839 }
840 
841 /*
842   Creates the hash table, and sets the table
843   This table is created only once.
844   If new entried need to be added to the matrix
845   then the hash table has to be destroyed and
846   recreated.
847 */
848 #undef __FUNCT__
849 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
850 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
851 {
852   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
853   Mat            A = baij->A,B=baij->B;
854   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
855   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
856   PetscErrorCode ierr;
857   PetscInt       size,bs2=baij->bs2,rstart=baij->rstartbs;
858   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
859   PetscInt       *HT,key;
860   MatScalar      **HD;
861   PetscReal      tmp;
862 #if defined(PETSC_USE_INFO)
863   PetscInt       ct=0,max=0;
864 #endif
865 
866   PetscFunctionBegin;
867   baij->ht_size=(PetscInt)(factor*nz);
868   size = baij->ht_size;
869 
870   if (baij->ht) {
871     PetscFunctionReturn(0);
872   }
873 
874   /* Allocate Memory for Hash Table */
875   ierr     = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr);
876   baij->ht = (PetscInt*)(baij->hd + size);
877   HD       = baij->hd;
878   HT       = baij->ht;
879 
880 
881   ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr);
882 
883 
884   /* Loop Over A */
885   for (i=0; i<a->mbs; i++) {
886     for (j=ai[i]; j<ai[i+1]; j++) {
887       row = i+rstart;
888       col = aj[j]+cstart;
889 
890       key = row*Nbs + col + 1;
891       h1  = HASH(size,key,tmp);
892       for (k=0; k<size; k++){
893         if (!HT[(h1+k)%size]) {
894           HT[(h1+k)%size] = key;
895           HD[(h1+k)%size] = a->a + j*bs2;
896           break;
897 #if defined(PETSC_USE_INFO)
898         } else {
899           ct++;
900 #endif
901         }
902       }
903 #if defined(PETSC_USE_INFO)
904       if (k> max) max = k;
905 #endif
906     }
907   }
908   /* Loop Over B */
909   for (i=0; i<b->mbs; i++) {
910     for (j=bi[i]; j<bi[i+1]; j++) {
911       row = i+rstart;
912       col = garray[bj[j]];
913       key = row*Nbs + col + 1;
914       h1  = HASH(size,key,tmp);
915       for (k=0; k<size; k++){
916         if (!HT[(h1+k)%size]) {
917           HT[(h1+k)%size] = key;
918           HD[(h1+k)%size] = b->a + j*bs2;
919           break;
920 #if defined(PETSC_USE_INFO)
921         } else {
922           ct++;
923 #endif
924         }
925       }
926 #if defined(PETSC_USE_INFO)
927       if (k> max) max = k;
928 #endif
929     }
930   }
931 
932   /* Print Summary */
933 #if defined(PETSC_USE_INFO)
934   for (i=0,j=0; i<size; i++) {
935     if (HT[i]) {j++;}
936   }
937   ierr = PetscInfo2(0,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
938 #endif
939   PetscFunctionReturn(0);
940 }
941 
942 #undef __FUNCT__
943 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
944 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
945 {
946   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
947   PetscErrorCode ierr;
948   PetscInt       nstash,reallocs;
949   InsertMode     addv;
950 
951   PetscFunctionBegin;
952   if (baij->donotstash) {
953     PetscFunctionReturn(0);
954   }
955 
956   /* make sure all processors are either in INSERTMODE or ADDMODE */
957   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
958   if (addv == (ADD_VALUES|INSERT_VALUES)) {
959     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
960   }
961   mat->insertmode = addv; /* in case this processor had no cache */
962 
963   ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr);
964   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rangebs);CHKERRQ(ierr);
965   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
966   ierr = PetscInfo2(0,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
967   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
968   ierr = PetscInfo2(0,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
969   PetscFunctionReturn(0);
970 }
971 
972 #undef __FUNCT__
973 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
974 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
975 {
976   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
977   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
978   PetscErrorCode ierr;
979   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
980   PetscInt       *row,*col,other_disassembled;
981   PetscTruth     r1,r2,r3;
982   MatScalar      *val;
983   InsertMode     addv = mat->insertmode;
984   PetscMPIInt    n;
985 
986   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
987   PetscFunctionBegin;
988   if (!baij->donotstash) {
989     while (1) {
990       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
991       if (!flg) break;
992 
993       for (i=0; i<n;) {
994         /* Now identify the consecutive vals belonging to the same row */
995         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
996         if (j < n) ncols = j-i;
997         else       ncols = n-i;
998         /* Now assemble all these values with a single function call */
999         ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
1000         i = j;
1001       }
1002     }
1003     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
1004     /* Now process the block-stash. Since the values are stashed column-oriented,
1005        set the roworiented flag to column oriented, and after MatSetValues()
1006        restore the original flags */
1007     r1 = baij->roworiented;
1008     r2 = a->roworiented;
1009     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
1010     baij->roworiented = PETSC_FALSE;
1011     a->roworiented    = PETSC_FALSE;
1012     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
1013     while (1) {
1014       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1015       if (!flg) break;
1016 
1017       for (i=0; i<n;) {
1018         /* Now identify the consecutive vals belonging to the same row */
1019         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1020         if (j < n) ncols = j-i;
1021         else       ncols = n-i;
1022         ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
1023         i = j;
1024       }
1025     }
1026     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
1027     baij->roworiented = r1;
1028     a->roworiented    = r2;
1029     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
1030   }
1031 
1032   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
1033   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
1034 
1035   /* determine if any processor has disassembled, if so we must
1036      also disassemble ourselfs, in order that we may reassemble. */
1037   /*
1038      if nonzero structure of submatrix B cannot change then we know that
1039      no processor disassembled thus we can skip this stuff
1040   */
1041   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
1042     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
1043     if (mat->was_assembled && !other_disassembled) {
1044       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
1045     }
1046   }
1047 
1048   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1049     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
1050   }
1051   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
1052   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1053   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1054 
1055 #if defined(PETSC_USE_INFO)
1056   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1057     ierr = PetscInfo1(0,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
1058     baij->ht_total_ct  = 0;
1059     baij->ht_insert_ct = 0;
1060   }
1061 #endif
1062   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1063     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1064     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1065     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1066   }
1067 
1068   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1069   baij->rowvalues = 0;
1070   PetscFunctionReturn(0);
1071 }
1072 
1073 #undef __FUNCT__
1074 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
1075 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1076 {
1077   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
1078   PetscErrorCode    ierr;
1079   PetscMPIInt       size = baij->size,rank = baij->rank;
1080   PetscInt          bs = mat->rmap.bs;
1081   PetscTruth        iascii,isdraw;
1082   PetscViewer       sviewer;
1083   PetscViewerFormat format;
1084 
1085   PetscFunctionBegin;
1086   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1087   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1088   if (iascii) {
1089     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1090     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1091       MatInfo info;
1092       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
1093       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1094       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
1095               rank,mat->rmap.N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
1096               mat->rmap.bs,(PetscInt)info.memory);CHKERRQ(ierr);
1097       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1098       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
1099       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1100       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
1101       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1102       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1103       PetscFunctionReturn(0);
1104     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1105       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1106       PetscFunctionReturn(0);
1107     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1108       PetscFunctionReturn(0);
1109     }
1110   }
1111 
1112   if (isdraw) {
1113     PetscDraw       draw;
1114     PetscTruth isnull;
1115     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1116     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1117   }
1118 
1119   if (size == 1) {
1120     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
1121     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1122   } else {
1123     /* assemble the entire matrix onto first processor. */
1124     Mat         A;
1125     Mat_SeqBAIJ *Aloc;
1126     PetscInt    M = mat->rmap.N,N = mat->cmap.N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1127     MatScalar   *a;
1128 
1129     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1130     /* Perhaps this should be the type of mat? */
1131     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
1132     if (!rank) {
1133       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1134     } else {
1135       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1136     }
1137     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1138     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1139     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1140 
1141     /* copy over the A part */
1142     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1143     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1144     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1145 
1146     for (i=0; i<mbs; i++) {
1147       rvals[0] = bs*(baij->rstartbs + i);
1148       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1149       for (j=ai[i]; j<ai[i+1]; j++) {
1150         col = (baij->cstartbs+aj[j])*bs;
1151         for (k=0; k<bs; k++) {
1152           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1153           col++; a += bs;
1154         }
1155       }
1156     }
1157     /* copy over the B part */
1158     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1159     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1160     for (i=0; i<mbs; i++) {
1161       rvals[0] = bs*(baij->rstartbs + i);
1162       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1163       for (j=ai[i]; j<ai[i+1]; j++) {
1164         col = baij->garray[aj[j]]*bs;
1165         for (k=0; k<bs; k++) {
1166           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1167           col++; a += bs;
1168         }
1169       }
1170     }
1171     ierr = PetscFree(rvals);CHKERRQ(ierr);
1172     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1173     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1174     /*
1175        Everyone has to call to draw the matrix since the graphics waits are
1176        synchronized across all processors that share the PetscDraw object
1177     */
1178     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1179     if (!rank) {
1180       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1181       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1182     }
1183     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1184     ierr = MatDestroy(A);CHKERRQ(ierr);
1185   }
1186   PetscFunctionReturn(0);
1187 }
1188 
1189 #undef __FUNCT__
1190 #define __FUNCT__ "MatView_MPIBAIJ"
1191 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1192 {
1193   PetscErrorCode ierr;
1194   PetscTruth     iascii,isdraw,issocket,isbinary;
1195 
1196   PetscFunctionBegin;
1197   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1198   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1199   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1200   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1201   if (iascii || isdraw || issocket || isbinary) {
1202     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1203   } else {
1204     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1205   }
1206   PetscFunctionReturn(0);
1207 }
1208 
1209 #undef __FUNCT__
1210 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1211 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1212 {
1213   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1214   PetscErrorCode ierr;
1215 
1216   PetscFunctionBegin;
1217 #if defined(PETSC_USE_LOG)
1218   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap.N,mat->cmap.N);
1219 #endif
1220   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1221   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1222   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1223   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1224 #if defined (PETSC_USE_CTABLE)
1225   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1226 #else
1227   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1228 #endif
1229   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1230   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1231   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1232   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1233   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1234   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
1235 #if defined(PETSC_USE_MAT_SINGLE)
1236   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
1237 #endif
1238   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1239   ierr = PetscFree(baij);CHKERRQ(ierr);
1240 
1241   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1242   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1243   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1244   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1245   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1246   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1247   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1248   PetscFunctionReturn(0);
1249 }
1250 
1251 #undef __FUNCT__
1252 #define __FUNCT__ "MatMult_MPIBAIJ"
1253 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1254 {
1255   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1256   PetscErrorCode ierr;
1257   PetscInt       nt;
1258 
1259   PetscFunctionBegin;
1260   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1261   if (nt != A->cmap.n) {
1262     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1263   }
1264   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1265   if (nt != A->rmap.n) {
1266     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1267   }
1268   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1269   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1270   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1271   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1272   PetscFunctionReturn(0);
1273 }
1274 
1275 #undef __FUNCT__
1276 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1277 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1278 {
1279   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1280   PetscErrorCode ierr;
1281 
1282   PetscFunctionBegin;
1283   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1284   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1285   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1286   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1287   PetscFunctionReturn(0);
1288 }
1289 
1290 #undef __FUNCT__
1291 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1292 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1293 {
1294   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1295   PetscErrorCode ierr;
1296   PetscTruth     merged;
1297 
1298   PetscFunctionBegin;
1299   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1300   /* do nondiagonal part */
1301   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1302   if (!merged) {
1303     /* send it on its way */
1304     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1305     /* do local part */
1306     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1307     /* receive remote parts: note this assumes the values are not actually */
1308     /* inserted in yy until the next line */
1309     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1310   } else {
1311     /* do local part */
1312     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1313     /* send it on its way */
1314     ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1315     /* values actually were received in the Begin() but we need to call this nop */
1316     ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1317   }
1318   PetscFunctionReturn(0);
1319 }
1320 
1321 #undef __FUNCT__
1322 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1323 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1324 {
1325   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1326   PetscErrorCode ierr;
1327 
1328   PetscFunctionBegin;
1329   /* do nondiagonal part */
1330   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1331   /* send it on its way */
1332   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1333   /* do local part */
1334   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1335   /* receive remote parts: note this assumes the values are not actually */
1336   /* inserted in yy until the next line, which is true for my implementation*/
1337   /* but is not perhaps always true. */
1338   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1339   PetscFunctionReturn(0);
1340 }
1341 
1342 /*
1343   This only works correctly for square matrices where the subblock A->A is the
1344    diagonal block
1345 */
1346 #undef __FUNCT__
1347 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1348 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1349 {
1350   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1351   PetscErrorCode ierr;
1352 
1353   PetscFunctionBegin;
1354   if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1355   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1356   PetscFunctionReturn(0);
1357 }
1358 
1359 #undef __FUNCT__
1360 #define __FUNCT__ "MatScale_MPIBAIJ"
1361 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1362 {
1363   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1364   PetscErrorCode ierr;
1365 
1366   PetscFunctionBegin;
1367   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1368   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1369   PetscFunctionReturn(0);
1370 }
1371 
1372 #undef __FUNCT__
1373 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1374 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1375 {
1376   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1377   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1378   PetscErrorCode ierr;
1379   PetscInt       bs = matin->rmap.bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1380   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap.rstart,brend = matin->rmap.rend;
1381   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1382 
1383   PetscFunctionBegin;
1384   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1385   mat->getrowactive = PETSC_TRUE;
1386 
1387   if (!mat->rowvalues && (idx || v)) {
1388     /*
1389         allocate enough space to hold information from the longest row.
1390     */
1391     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1392     PetscInt     max = 1,mbs = mat->mbs,tmp;
1393     for (i=0; i<mbs; i++) {
1394       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1395       if (max < tmp) { max = tmp; }
1396     }
1397     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1398     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1399   }
1400 
1401   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1402   lrow = row - brstart;
1403 
1404   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1405   if (!v)   {pvA = 0; pvB = 0;}
1406   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1407   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1408   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1409   nztot = nzA + nzB;
1410 
1411   cmap  = mat->garray;
1412   if (v  || idx) {
1413     if (nztot) {
1414       /* Sort by increasing column numbers, assuming A and B already sorted */
1415       PetscInt imark = -1;
1416       if (v) {
1417         *v = v_p = mat->rowvalues;
1418         for (i=0; i<nzB; i++) {
1419           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1420           else break;
1421         }
1422         imark = i;
1423         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1424         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1425       }
1426       if (idx) {
1427         *idx = idx_p = mat->rowindices;
1428         if (imark > -1) {
1429           for (i=0; i<imark; i++) {
1430             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1431           }
1432         } else {
1433           for (i=0; i<nzB; i++) {
1434             if (cmap[cworkB[i]/bs] < cstart)
1435               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1436             else break;
1437           }
1438           imark = i;
1439         }
1440         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1441         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1442       }
1443     } else {
1444       if (idx) *idx = 0;
1445       if (v)   *v   = 0;
1446     }
1447   }
1448   *nz = nztot;
1449   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1450   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1451   PetscFunctionReturn(0);
1452 }
1453 
1454 #undef __FUNCT__
1455 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1456 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1457 {
1458   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1459 
1460   PetscFunctionBegin;
1461   if (!baij->getrowactive) {
1462     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1463   }
1464   baij->getrowactive = PETSC_FALSE;
1465   PetscFunctionReturn(0);
1466 }
1467 
1468 #undef __FUNCT__
1469 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1470 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1471 {
1472   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1473   PetscErrorCode ierr;
1474 
1475   PetscFunctionBegin;
1476   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1477   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1478   PetscFunctionReturn(0);
1479 }
1480 
1481 #undef __FUNCT__
1482 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1483 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1484 {
1485   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1486   Mat            A = a->A,B = a->B;
1487   PetscErrorCode ierr;
1488   PetscReal      isend[5],irecv[5];
1489 
1490   PetscFunctionBegin;
1491   info->block_size     = (PetscReal)matin->rmap.bs;
1492   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1493   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1494   isend[3] = info->memory;  isend[4] = info->mallocs;
1495   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1496   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1497   isend[3] += info->memory;  isend[4] += info->mallocs;
1498   if (flag == MAT_LOCAL) {
1499     info->nz_used      = isend[0];
1500     info->nz_allocated = isend[1];
1501     info->nz_unneeded  = isend[2];
1502     info->memory       = isend[3];
1503     info->mallocs      = isend[4];
1504   } else if (flag == MAT_GLOBAL_MAX) {
1505     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1506     info->nz_used      = irecv[0];
1507     info->nz_allocated = irecv[1];
1508     info->nz_unneeded  = irecv[2];
1509     info->memory       = irecv[3];
1510     info->mallocs      = irecv[4];
1511   } else if (flag == MAT_GLOBAL_SUM) {
1512     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1513     info->nz_used      = irecv[0];
1514     info->nz_allocated = irecv[1];
1515     info->nz_unneeded  = irecv[2];
1516     info->memory       = irecv[3];
1517     info->mallocs      = irecv[4];
1518   } else {
1519     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1520   }
1521   info->rows_global       = (PetscReal)A->rmap.N;
1522   info->columns_global    = (PetscReal)A->cmap.N;
1523   info->rows_local        = (PetscReal)A->rmap.N;
1524   info->columns_local     = (PetscReal)A->cmap.N;
1525   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1526   info->fill_ratio_needed = 0;
1527   info->factor_mallocs    = 0;
1528   PetscFunctionReturn(0);
1529 }
1530 
1531 #undef __FUNCT__
1532 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1533 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op)
1534 {
1535   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1536   PetscErrorCode ierr;
1537 
1538   PetscFunctionBegin;
1539   switch (op) {
1540   case MAT_NO_NEW_NONZERO_LOCATIONS:
1541   case MAT_YES_NEW_NONZERO_LOCATIONS:
1542   case MAT_COLUMNS_UNSORTED:
1543   case MAT_COLUMNS_SORTED:
1544   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1545   case MAT_KEEP_ZEROED_ROWS:
1546   case MAT_NEW_NONZERO_LOCATION_ERR:
1547     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1548     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1549     break;
1550   case MAT_ROW_ORIENTED:
1551     a->roworiented = PETSC_TRUE;
1552     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1553     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1554     break;
1555   case MAT_ROWS_SORTED:
1556   case MAT_ROWS_UNSORTED:
1557   case MAT_YES_NEW_DIAGONALS:
1558     ierr = PetscInfo(A,"Option ignored\n");CHKERRQ(ierr);
1559     break;
1560   case MAT_COLUMN_ORIENTED:
1561     a->roworiented = PETSC_FALSE;
1562     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1563     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1564     break;
1565   case MAT_IGNORE_OFF_PROC_ENTRIES:
1566     a->donotstash = PETSC_TRUE;
1567     break;
1568   case MAT_NO_NEW_DIAGONALS:
1569     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1570   case MAT_USE_HASH_TABLE:
1571     a->ht_flag = PETSC_TRUE;
1572     break;
1573   case MAT_SYMMETRIC:
1574   case MAT_STRUCTURALLY_SYMMETRIC:
1575   case MAT_HERMITIAN:
1576   case MAT_SYMMETRY_ETERNAL:
1577     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1578     break;
1579   case MAT_NOT_SYMMETRIC:
1580   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1581   case MAT_NOT_HERMITIAN:
1582   case MAT_NOT_SYMMETRY_ETERNAL:
1583     break;
1584   default:
1585     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1586   }
1587   PetscFunctionReturn(0);
1588 }
1589 
1590 #undef __FUNCT__
1591 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1592 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1593 {
1594   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1595   Mat_SeqBAIJ    *Aloc;
1596   Mat            B;
1597   PetscErrorCode ierr;
1598   PetscInt       M=A->rmap.N,N=A->cmap.N,*ai,*aj,i,*rvals,j,k,col;
1599   PetscInt       bs=A->rmap.bs,mbs=baij->mbs;
1600   MatScalar      *a;
1601 
1602   PetscFunctionBegin;
1603   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1604   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
1605   ierr = MatSetSizes(B,A->cmap.n,A->rmap.n,N,M);CHKERRQ(ierr);
1606   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1607   ierr = MatMPIBAIJSetPreallocation(B,A->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1608 
1609   /* copy over the A part */
1610   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1611   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1612   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1613 
1614   for (i=0; i<mbs; i++) {
1615     rvals[0] = bs*(baij->rstartbs + i);
1616     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1617     for (j=ai[i]; j<ai[i+1]; j++) {
1618       col = (baij->cstartbs+aj[j])*bs;
1619       for (k=0; k<bs; k++) {
1620         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1621         col++; a += bs;
1622       }
1623     }
1624   }
1625   /* copy over the B part */
1626   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1627   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1628   for (i=0; i<mbs; i++) {
1629     rvals[0] = bs*(baij->rstartbs + i);
1630     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1631     for (j=ai[i]; j<ai[i+1]; j++) {
1632       col = baij->garray[aj[j]]*bs;
1633       for (k=0; k<bs; k++) {
1634         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1635         col++; a += bs;
1636       }
1637     }
1638   }
1639   ierr = PetscFree(rvals);CHKERRQ(ierr);
1640   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1641   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1642 
1643   if (matout) {
1644     *matout = B;
1645   } else {
1646     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1647   }
1648   PetscFunctionReturn(0);
1649 }
1650 
1651 #undef __FUNCT__
1652 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1653 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1654 {
1655   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1656   Mat            a = baij->A,b = baij->B;
1657   PetscErrorCode ierr;
1658   PetscInt       s1,s2,s3;
1659 
1660   PetscFunctionBegin;
1661   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1662   if (rr) {
1663     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1664     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1665     /* Overlap communication with computation. */
1666     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1667   }
1668   if (ll) {
1669     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1670     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1671     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1672   }
1673   /* scale  the diagonal block */
1674   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1675 
1676   if (rr) {
1677     /* Do a scatter end and then right scale the off-diagonal block */
1678     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1679     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1680   }
1681 
1682   PetscFunctionReturn(0);
1683 }
1684 
1685 #undef __FUNCT__
1686 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1687 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
1688 {
1689   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1690   PetscErrorCode ierr;
1691   PetscMPIInt    imdex,size = l->size,n,rank = l->rank;
1692   PetscInt       i,*owners = A->rmap.range;
1693   PetscInt       *nprocs,j,idx,nsends,row;
1694   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
1695   PetscInt       *rvalues,tag = A->tag,count,base,slen,*source,lastidx = -1;
1696   PetscInt       *lens,*lrows,*values,rstart_bs=A->rmap.rstart;
1697   MPI_Comm       comm = A->comm;
1698   MPI_Request    *send_waits,*recv_waits;
1699   MPI_Status     recv_status,*send_status;
1700 #if defined(PETSC_DEBUG)
1701   PetscTruth     found = PETSC_FALSE;
1702 #endif
1703 
1704   PetscFunctionBegin;
1705   /*  first count number of contributors to each processor */
1706   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1707   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1708   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1709   j = 0;
1710   for (i=0; i<N; i++) {
1711     if (lastidx > (idx = rows[i])) j = 0;
1712     lastidx = idx;
1713     for (; j<size; j++) {
1714       if (idx >= owners[j] && idx < owners[j+1]) {
1715         nprocs[2*j]++;
1716         nprocs[2*j+1] = 1;
1717         owner[i] = j;
1718 #if defined(PETSC_DEBUG)
1719         found = PETSC_TRUE;
1720 #endif
1721         break;
1722       }
1723     }
1724 #if defined(PETSC_DEBUG)
1725     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1726     found = PETSC_FALSE;
1727 #endif
1728   }
1729   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1730 
1731   /* inform other processors of number of messages and max length*/
1732   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1733 
1734   /* post receives:   */
1735   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1736   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1737   for (i=0; i<nrecvs; i++) {
1738     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1739   }
1740 
1741   /* do sends:
1742      1) starts[i] gives the starting index in svalues for stuff going to
1743      the ith processor
1744   */
1745   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1746   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1747   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1748   starts[0]  = 0;
1749   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1750   for (i=0; i<N; i++) {
1751     svalues[starts[owner[i]]++] = rows[i];
1752   }
1753 
1754   starts[0] = 0;
1755   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1756   count = 0;
1757   for (i=0; i<size; i++) {
1758     if (nprocs[2*i+1]) {
1759       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1760     }
1761   }
1762   ierr = PetscFree(starts);CHKERRQ(ierr);
1763 
1764   base = owners[rank];
1765 
1766   /*  wait on receives */
1767   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
1768   source = lens + nrecvs;
1769   count  = nrecvs; slen = 0;
1770   while (count) {
1771     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1772     /* unpack receives into our local space */
1773     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1774     source[imdex]  = recv_status.MPI_SOURCE;
1775     lens[imdex]    = n;
1776     slen          += n;
1777     count--;
1778   }
1779   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1780 
1781   /* move the data into the send scatter */
1782   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1783   count = 0;
1784   for (i=0; i<nrecvs; i++) {
1785     values = rvalues + i*nmax;
1786     for (j=0; j<lens[i]; j++) {
1787       lrows[count++] = values[j] - base;
1788     }
1789   }
1790   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1791   ierr = PetscFree(lens);CHKERRQ(ierr);
1792   ierr = PetscFree(owner);CHKERRQ(ierr);
1793   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1794 
1795   /* actually zap the local rows */
1796   /*
1797         Zero the required rows. If the "diagonal block" of the matrix
1798      is square and the user wishes to set the diagonal we use separate
1799      code so that MatSetValues() is not called for each diagonal allocating
1800      new memory, thus calling lots of mallocs and slowing things down.
1801 
1802        Contributed by: Matthew Knepley
1803   */
1804   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1805   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr);
1806   if ((diag != 0.0) && (l->A->rmap.N == l->A->cmap.N)) {
1807     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr);
1808   } else if (diag != 0.0) {
1809     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1810     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1811       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1812 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1813     }
1814     for (i=0; i<slen; i++) {
1815       row  = lrows[i] + rstart_bs;
1816       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1817     }
1818     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1819     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1820   } else {
1821     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1822   }
1823 
1824   ierr = PetscFree(lrows);CHKERRQ(ierr);
1825 
1826   /* wait on sends */
1827   if (nsends) {
1828     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1829     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1830     ierr = PetscFree(send_status);CHKERRQ(ierr);
1831   }
1832   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1833   ierr = PetscFree(svalues);CHKERRQ(ierr);
1834 
1835   PetscFunctionReturn(0);
1836 }
1837 
1838 #undef __FUNCT__
1839 #define __FUNCT__ "MatPrintHelp_MPIBAIJ"
1840 PetscErrorCode MatPrintHelp_MPIBAIJ(Mat A)
1841 {
1842   Mat_MPIBAIJ       *a   = (Mat_MPIBAIJ*)A->data;
1843   MPI_Comm          comm = A->comm;
1844   static PetscTruth called = PETSC_FALSE;
1845   PetscErrorCode    ierr;
1846 
1847   PetscFunctionBegin;
1848   if (!a->rank) {
1849     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1850   }
1851   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1852   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1853   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1854   PetscFunctionReturn(0);
1855 }
1856 
1857 #undef __FUNCT__
1858 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1859 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1860 {
1861   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1862   PetscErrorCode ierr;
1863 
1864   PetscFunctionBegin;
1865   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1866   PetscFunctionReturn(0);
1867 }
1868 
1869 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1870 
1871 #undef __FUNCT__
1872 #define __FUNCT__ "MatEqual_MPIBAIJ"
1873 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1874 {
1875   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1876   Mat            a,b,c,d;
1877   PetscTruth     flg;
1878   PetscErrorCode ierr;
1879 
1880   PetscFunctionBegin;
1881   a = matA->A; b = matA->B;
1882   c = matB->A; d = matB->B;
1883 
1884   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1885   if (flg) {
1886     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1887   }
1888   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1889   PetscFunctionReturn(0);
1890 }
1891 
1892 #undef __FUNCT__
1893 #define __FUNCT__ "MatCopy_MPIBAIJ"
1894 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1895 {
1896   PetscErrorCode ierr;
1897   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1898   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1899 
1900   PetscFunctionBegin;
1901   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1902   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1903     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1904   } else {
1905     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1906     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1907   }
1908   PetscFunctionReturn(0);
1909 }
1910 
1911 #undef __FUNCT__
1912 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1913 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1914 {
1915   PetscErrorCode ierr;
1916 
1917   PetscFunctionBegin;
1918   ierr =  MatMPIBAIJSetPreallocation(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1919   PetscFunctionReturn(0);
1920 }
1921 
1922 #include "petscblaslapack.h"
1923 #undef __FUNCT__
1924 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1925 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1926 {
1927   PetscErrorCode ierr;
1928   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1929   PetscBLASInt   bnz,one=1;
1930   Mat_SeqBAIJ    *x,*y;
1931 
1932   PetscFunctionBegin;
1933   if (str == SAME_NONZERO_PATTERN) {
1934     PetscScalar alpha = a;
1935     x = (Mat_SeqBAIJ *)xx->A->data;
1936     y = (Mat_SeqBAIJ *)yy->A->data;
1937     bnz = (PetscBLASInt)x->nz;
1938     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1939     x = (Mat_SeqBAIJ *)xx->B->data;
1940     y = (Mat_SeqBAIJ *)yy->B->data;
1941     bnz = (PetscBLASInt)x->nz;
1942     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1943   } else {
1944     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1945   }
1946   PetscFunctionReturn(0);
1947 }
1948 
1949 #undef __FUNCT__
1950 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1951 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1952 {
1953   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1954   PetscErrorCode ierr;
1955 
1956   PetscFunctionBegin;
1957   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1958   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1959   PetscFunctionReturn(0);
1960 }
1961 
1962 #undef __FUNCT__
1963 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
1964 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1965 {
1966   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1967   PetscErrorCode ierr;
1968 
1969   PetscFunctionBegin;
1970   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1971   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1972   PetscFunctionReturn(0);
1973 }
1974 
1975 /* -------------------------------------------------------------------*/
1976 static struct _MatOps MatOps_Values = {
1977        MatSetValues_MPIBAIJ,
1978        MatGetRow_MPIBAIJ,
1979        MatRestoreRow_MPIBAIJ,
1980        MatMult_MPIBAIJ,
1981 /* 4*/ MatMultAdd_MPIBAIJ,
1982        MatMultTranspose_MPIBAIJ,
1983        MatMultTransposeAdd_MPIBAIJ,
1984        0,
1985        0,
1986        0,
1987 /*10*/ 0,
1988        0,
1989        0,
1990        0,
1991        MatTranspose_MPIBAIJ,
1992 /*15*/ MatGetInfo_MPIBAIJ,
1993        MatEqual_MPIBAIJ,
1994        MatGetDiagonal_MPIBAIJ,
1995        MatDiagonalScale_MPIBAIJ,
1996        MatNorm_MPIBAIJ,
1997 /*20*/ MatAssemblyBegin_MPIBAIJ,
1998        MatAssemblyEnd_MPIBAIJ,
1999        0,
2000        MatSetOption_MPIBAIJ,
2001        MatZeroEntries_MPIBAIJ,
2002 /*25*/ MatZeroRows_MPIBAIJ,
2003        0,
2004        0,
2005        0,
2006        0,
2007 /*30*/ MatSetUpPreallocation_MPIBAIJ,
2008        0,
2009        0,
2010        0,
2011        0,
2012 /*35*/ MatDuplicate_MPIBAIJ,
2013        0,
2014        0,
2015        0,
2016        0,
2017 /*40*/ MatAXPY_MPIBAIJ,
2018        MatGetSubMatrices_MPIBAIJ,
2019        MatIncreaseOverlap_MPIBAIJ,
2020        MatGetValues_MPIBAIJ,
2021        MatCopy_MPIBAIJ,
2022 /*45*/ MatPrintHelp_MPIBAIJ,
2023        MatScale_MPIBAIJ,
2024        0,
2025        0,
2026        0,
2027 /*50*/ 0,
2028        0,
2029        0,
2030        0,
2031        0,
2032 /*55*/ 0,
2033        0,
2034        MatSetUnfactored_MPIBAIJ,
2035        0,
2036        MatSetValuesBlocked_MPIBAIJ,
2037 /*60*/ 0,
2038        MatDestroy_MPIBAIJ,
2039        MatView_MPIBAIJ,
2040        0,
2041        0,
2042 /*65*/ 0,
2043        0,
2044        0,
2045        0,
2046        0,
2047 /*70*/ MatGetRowMax_MPIBAIJ,
2048        0,
2049        0,
2050        0,
2051        0,
2052 /*75*/ 0,
2053        0,
2054        0,
2055        0,
2056        0,
2057 /*80*/ 0,
2058        0,
2059        0,
2060        0,
2061        MatLoad_MPIBAIJ,
2062 /*85*/ 0,
2063        0,
2064        0,
2065        0,
2066        0,
2067 /*90*/ 0,
2068        0,
2069        0,
2070        0,
2071        0,
2072 /*95*/ 0,
2073        0,
2074        0,
2075        0,
2076        0,
2077 /*100*/0,
2078        0,
2079        0,
2080        0,
2081        0,
2082 /*105*/0,
2083        MatRealPart_MPIBAIJ,
2084        MatImaginaryPart_MPIBAIJ};
2085 
2086 
2087 EXTERN_C_BEGIN
2088 #undef __FUNCT__
2089 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2090 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
2091 {
2092   PetscFunctionBegin;
2093   *a      = ((Mat_MPIBAIJ *)A->data)->A;
2094   *iscopy = PETSC_FALSE;
2095   PetscFunctionReturn(0);
2096 }
2097 EXTERN_C_END
2098 
2099 EXTERN_C_BEGIN
2100 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2101 EXTERN_C_END
2102 
2103 #undef __FUNCT__
2104 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2105 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
2106 {
2107   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)B->data;
2108   PetscInt       m = B->rmap.n/bs,cstart = baij->cstartbs, cend = baij->cendbs,j,nnz,i,d;
2109   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart = baij->rstartbs,ii;
2110   const PetscInt *JJ;
2111   PetscScalar    *values;
2112   PetscErrorCode ierr;
2113 
2114   PetscFunctionBegin;
2115 #if defined(PETSC_OPT_g)
2116   if (Ii[0]) SETERRQ1(PETSC_ERR_ARG_RANGE,"Ii[0] must be 0 it is %D",Ii[0]);
2117 #endif
2118   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2119   o_nnz = d_nnz + m;
2120 
2121   for (i=0; i<m; i++) {
2122     nnz     = Ii[i+1]- Ii[i];
2123     JJ      = J + Ii[i];
2124     nnz_max = PetscMax(nnz_max,nnz);
2125 #if defined(PETSC_OPT_g)
2126     if (nnz < 0) SETERRQ1(PETSC_ERR_ARG_RANGE,"Local row %D has a negative %D number of columns",i,nnz);
2127 #endif
2128     for (j=0; j<nnz; j++) {
2129       if (*JJ >= cstart) break;
2130       JJ++;
2131     }
2132     d = 0;
2133     for (; j<nnz; j++) {
2134       if (*JJ++ >= cend) break;
2135       d++;
2136     }
2137     d_nnz[i] = d;
2138     o_nnz[i] = nnz - d;
2139   }
2140   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2141   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2142 
2143   if (v) values = (PetscScalar*)v;
2144   else {
2145     ierr = PetscMalloc(bs*bs*(nnz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2146     ierr = PetscMemzero(values,bs*bs*nnz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2147   }
2148 
2149   ierr = MatSetOption(B,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2150   for (i=0; i<m; i++) {
2151     ii   = i + rstart;
2152     nnz  = Ii[i+1]- Ii[i];
2153     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&ii,nnz,J+Ii[i],values+(v ? Ii[i] : 0),INSERT_VALUES);CHKERRQ(ierr);
2154   }
2155   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2156   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2157   ierr = MatSetOption(B,MAT_COLUMNS_UNSORTED);CHKERRQ(ierr);
2158 
2159   if (!v) {
2160     ierr = PetscFree(values);CHKERRQ(ierr);
2161   }
2162   PetscFunctionReturn(0);
2163 }
2164 
2165 #undef __FUNCT__
2166 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2167 /*@C
2168    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2169    (the default parallel PETSc format).
2170 
2171    Collective on MPI_Comm
2172 
2173    Input Parameters:
2174 +  A - the matrix
2175 .  i - the indices into j for the start of each local row (starts with zero)
2176 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2177 -  v - optional values in the matrix
2178 
2179    Level: developer
2180 
2181 .keywords: matrix, aij, compressed row, sparse, parallel
2182 
2183 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2184 @*/
2185 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2186 {
2187   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
2188 
2189   PetscFunctionBegin;
2190   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2191   if (f) {
2192     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
2193   }
2194   PetscFunctionReturn(0);
2195 }
2196 
2197 EXTERN_C_BEGIN
2198 #undef __FUNCT__
2199 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2200 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2201 {
2202   Mat_MPIBAIJ    *b;
2203   PetscErrorCode ierr;
2204   PetscInt       i;
2205 
2206   PetscFunctionBegin;
2207   B->preallocated = PETSC_TRUE;
2208   ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2209 
2210   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2211   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2212   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2213   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2214   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2215 
2216   B->rmap.bs  = bs;
2217   B->cmap.bs  = bs;
2218   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
2219   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
2220 
2221   if (d_nnz) {
2222     for (i=0; i<B->rmap.n/bs; i++) {
2223       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2224     }
2225   }
2226   if (o_nnz) {
2227     for (i=0; i<B->rmap.n/bs; i++) {
2228       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2229     }
2230   }
2231 
2232   b = (Mat_MPIBAIJ*)B->data;
2233   b->bs2 = bs*bs;
2234   b->mbs = B->rmap.n/bs;
2235   b->nbs = B->cmap.n/bs;
2236   b->Mbs = B->rmap.N/bs;
2237   b->Nbs = B->cmap.N/bs;
2238 
2239   for (i=0; i<=b->size; i++) {
2240     b->rangebs[i] = B->rmap.range[i]/bs;
2241   }
2242   b->rstartbs = B->rmap.rstart/bs;
2243   b->rendbs   = B->rmap.rend/bs;
2244   b->cstartbs = B->cmap.rstart/bs;
2245   b->cendbs   = B->cmap.rend/bs;
2246 
2247   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2248   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
2249   ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2250   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2251   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2252   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2253   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
2254   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2255   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2256   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2257 
2258   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
2259 
2260   PetscFunctionReturn(0);
2261 }
2262 EXTERN_C_END
2263 
2264 EXTERN_C_BEGIN
2265 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2266 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2267 EXTERN_C_END
2268 
2269 /*MC
2270    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2271 
2272    Options Database Keys:
2273 . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2274 
2275   Level: beginner
2276 
2277 .seealso: MatCreateMPIBAIJ
2278 M*/
2279 
2280 EXTERN_C_BEGIN
2281 #undef __FUNCT__
2282 #define __FUNCT__ "MatCreate_MPIBAIJ"
2283 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B)
2284 {
2285   Mat_MPIBAIJ    *b;
2286   PetscErrorCode ierr;
2287   PetscTruth     flg;
2288 
2289   PetscFunctionBegin;
2290   ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr);
2291   B->data = (void*)b;
2292 
2293 
2294   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2295   B->mapping    = 0;
2296   B->factor     = 0;
2297   B->assembled  = PETSC_FALSE;
2298 
2299   B->insertmode = NOT_SET_VALUES;
2300   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
2301   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
2302 
2303   /* build local table of row and column ownerships */
2304   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
2305 
2306   /* build cache for off array entries formed */
2307   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
2308   b->donotstash  = PETSC_FALSE;
2309   b->colmap      = PETSC_NULL;
2310   b->garray      = PETSC_NULL;
2311   b->roworiented = PETSC_TRUE;
2312 
2313 #if defined(PETSC_USE_MAT_SINGLE)
2314   /* stuff for MatSetValues_XXX in single precision */
2315   b->setvalueslen     = 0;
2316   b->setvaluescopy    = PETSC_NULL;
2317 #endif
2318 
2319   /* stuff used in block assembly */
2320   b->barray       = 0;
2321 
2322   /* stuff used for matrix vector multiply */
2323   b->lvec         = 0;
2324   b->Mvctx        = 0;
2325 
2326   /* stuff for MatGetRow() */
2327   b->rowindices   = 0;
2328   b->rowvalues    = 0;
2329   b->getrowactive = PETSC_FALSE;
2330 
2331   /* hash table stuff */
2332   b->ht           = 0;
2333   b->hd           = 0;
2334   b->ht_size      = 0;
2335   b->ht_flag      = PETSC_FALSE;
2336   b->ht_fact      = 0;
2337   b->ht_total_ct  = 0;
2338   b->ht_insert_ct = 0;
2339 
2340   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2341   if (flg) {
2342     PetscReal fact = 1.39;
2343     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2344     ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2345     if (fact <= 1.0) fact = 1.39;
2346     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2347     ierr = PetscInfo1(0,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2348   }
2349   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2350                                      "MatStoreValues_MPIBAIJ",
2351                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2352   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2353                                      "MatRetrieveValues_MPIBAIJ",
2354                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2355   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2356                                      "MatGetDiagonalBlock_MPIBAIJ",
2357                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2358   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
2359                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
2360                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2361   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
2362 				     "MatMPIBAIJSetPreallocationCSR_MPIAIJ",
2363 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
2364   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
2365                                      "MatDiagonalScaleLocal_MPIBAIJ",
2366                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
2367   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
2368                                      "MatSetHashTableFactor_MPIBAIJ",
2369                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2370   PetscFunctionReturn(0);
2371 }
2372 EXTERN_C_END
2373 
2374 /*MC
2375    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2376 
2377    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2378    and MATMPIBAIJ otherwise.
2379 
2380    Options Database Keys:
2381 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2382 
2383   Level: beginner
2384 
2385 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2386 M*/
2387 
2388 EXTERN_C_BEGIN
2389 #undef __FUNCT__
2390 #define __FUNCT__ "MatCreate_BAIJ"
2391 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A)
2392 {
2393   PetscErrorCode ierr;
2394   PetscMPIInt    size;
2395 
2396   PetscFunctionBegin;
2397   ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr);
2398   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2399   if (size == 1) {
2400     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
2401   } else {
2402     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
2403   }
2404   PetscFunctionReturn(0);
2405 }
2406 EXTERN_C_END
2407 
2408 #undef __FUNCT__
2409 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
2410 /*@C
2411    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
2412    (block compressed row).  For good matrix assembly performance
2413    the user should preallocate the matrix storage by setting the parameters
2414    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2415    performance can be increased by more than a factor of 50.
2416 
2417    Collective on Mat
2418 
2419    Input Parameters:
2420 +  A - the matrix
2421 .  bs   - size of blockk
2422 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2423            submatrix  (same for all local rows)
2424 .  d_nnz - array containing the number of block nonzeros in the various block rows
2425            of the in diagonal portion of the local (possibly different for each block
2426            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2427 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2428            submatrix (same for all local rows).
2429 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2430            off-diagonal portion of the local submatrix (possibly different for
2431            each block row) or PETSC_NULL.
2432 
2433    If the *_nnz parameter is given then the *_nz parameter is ignored
2434 
2435    Options Database Keys:
2436 .   -mat_no_unroll - uses code that does not unroll the loops in the
2437                      block calculations (much slower)
2438 .   -mat_block_size - size of the blocks to use
2439 
2440    Notes:
2441    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2442    than it must be used on all processors that share the object for that argument.
2443 
2444    Storage Information:
2445    For a square global matrix we define each processor's diagonal portion
2446    to be its local rows and the corresponding columns (a square submatrix);
2447    each processor's off-diagonal portion encompasses the remainder of the
2448    local matrix (a rectangular submatrix).
2449 
2450    The user can specify preallocated storage for the diagonal part of
2451    the local submatrix with either d_nz or d_nnz (not both).  Set
2452    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2453    memory allocation.  Likewise, specify preallocated storage for the
2454    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2455 
2456    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2457    the figure below we depict these three local rows and all columns (0-11).
2458 
2459 .vb
2460            0 1 2 3 4 5 6 7 8 9 10 11
2461           -------------------
2462    row 3  |  o o o d d d o o o o o o
2463    row 4  |  o o o d d d o o o o o o
2464    row 5  |  o o o d d d o o o o o o
2465           -------------------
2466 .ve
2467 
2468    Thus, any entries in the d locations are stored in the d (diagonal)
2469    submatrix, and any entries in the o locations are stored in the
2470    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2471    stored simply in the MATSEQBAIJ format for compressed row storage.
2472 
2473    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2474    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2475    In general, for PDE problems in which most nonzeros are near the diagonal,
2476    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2477    or you will get TERRIBLE performance; see the users' manual chapter on
2478    matrices.
2479 
2480    Level: intermediate
2481 
2482 .keywords: matrix, block, aij, compressed row, sparse, parallel
2483 
2484 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
2485 @*/
2486 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2487 {
2488   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2489 
2490   PetscFunctionBegin;
2491   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2492   if (f) {
2493     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2494   }
2495   PetscFunctionReturn(0);
2496 }
2497 
2498 #undef __FUNCT__
2499 #define __FUNCT__ "MatCreateMPIBAIJ"
2500 /*@C
2501    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
2502    (block compressed row).  For good matrix assembly performance
2503    the user should preallocate the matrix storage by setting the parameters
2504    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2505    performance can be increased by more than a factor of 50.
2506 
2507    Collective on MPI_Comm
2508 
2509    Input Parameters:
2510 +  comm - MPI communicator
2511 .  bs   - size of blockk
2512 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2513            This value should be the same as the local size used in creating the
2514            y vector for the matrix-vector product y = Ax.
2515 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2516            This value should be the same as the local size used in creating the
2517            x vector for the matrix-vector product y = Ax.
2518 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2519 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2520 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
2521            submatrix  (same for all local rows)
2522 .  d_nnz - array containing the number of nonzero blocks in the various block rows
2523            of the in diagonal portion of the local (possibly different for each block
2524            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2525 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
2526            submatrix (same for all local rows).
2527 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
2528            off-diagonal portion of the local submatrix (possibly different for
2529            each block row) or PETSC_NULL.
2530 
2531    Output Parameter:
2532 .  A - the matrix
2533 
2534    Options Database Keys:
2535 .   -mat_no_unroll - uses code that does not unroll the loops in the
2536                      block calculations (much slower)
2537 .   -mat_block_size - size of the blocks to use
2538 
2539    Notes:
2540    If the *_nnz parameter is given then the *_nz parameter is ignored
2541 
2542    A nonzero block is any block that as 1 or more nonzeros in it
2543 
2544    The user MUST specify either the local or global matrix dimensions
2545    (possibly both).
2546 
2547    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2548    than it must be used on all processors that share the object for that argument.
2549 
2550    Storage Information:
2551    For a square global matrix we define each processor's diagonal portion
2552    to be its local rows and the corresponding columns (a square submatrix);
2553    each processor's off-diagonal portion encompasses the remainder of the
2554    local matrix (a rectangular submatrix).
2555 
2556    The user can specify preallocated storage for the diagonal part of
2557    the local submatrix with either d_nz or d_nnz (not both).  Set
2558    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2559    memory allocation.  Likewise, specify preallocated storage for the
2560    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2561 
2562    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2563    the figure below we depict these three local rows and all columns (0-11).
2564 
2565 .vb
2566            0 1 2 3 4 5 6 7 8 9 10 11
2567           -------------------
2568    row 3  |  o o o d d d o o o o o o
2569    row 4  |  o o o d d d o o o o o o
2570    row 5  |  o o o d d d o o o o o o
2571           -------------------
2572 .ve
2573 
2574    Thus, any entries in the d locations are stored in the d (diagonal)
2575    submatrix, and any entries in the o locations are stored in the
2576    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2577    stored simply in the MATSEQBAIJ format for compressed row storage.
2578 
2579    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2580    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2581    In general, for PDE problems in which most nonzeros are near the diagonal,
2582    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2583    or you will get TERRIBLE performance; see the users' manual chapter on
2584    matrices.
2585 
2586    Level: intermediate
2587 
2588 .keywords: matrix, block, aij, compressed row, sparse, parallel
2589 
2590 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2591 @*/
2592 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2593 {
2594   PetscErrorCode ierr;
2595   PetscMPIInt    size;
2596 
2597   PetscFunctionBegin;
2598   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2599   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2600   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2601   if (size > 1) {
2602     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
2603     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2604   } else {
2605     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2606     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2607   }
2608   PetscFunctionReturn(0);
2609 }
2610 
2611 #undef __FUNCT__
2612 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
2613 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2614 {
2615   Mat            mat;
2616   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2617   PetscErrorCode ierr;
2618   PetscInt       len=0;
2619 
2620   PetscFunctionBegin;
2621   *newmat       = 0;
2622   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
2623   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
2624   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2625   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2626 
2627   mat->factor       = matin->factor;
2628   mat->preallocated = PETSC_TRUE;
2629   mat->assembled    = PETSC_TRUE;
2630   mat->insertmode   = NOT_SET_VALUES;
2631 
2632   a      = (Mat_MPIBAIJ*)mat->data;
2633   mat->rmap.bs  = matin->rmap.bs;
2634   a->bs2   = oldmat->bs2;
2635   a->mbs   = oldmat->mbs;
2636   a->nbs   = oldmat->nbs;
2637   a->Mbs   = oldmat->Mbs;
2638   a->Nbs   = oldmat->Nbs;
2639 
2640   ierr = PetscMapCopy(matin->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
2641   ierr = PetscMapCopy(matin->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
2642 
2643   a->size         = oldmat->size;
2644   a->rank         = oldmat->rank;
2645   a->donotstash   = oldmat->donotstash;
2646   a->roworiented  = oldmat->roworiented;
2647   a->rowindices   = 0;
2648   a->rowvalues    = 0;
2649   a->getrowactive = PETSC_FALSE;
2650   a->barray       = 0;
2651   a->rstartbs     = oldmat->rstartbs;
2652   a->rendbs       = oldmat->rendbs;
2653   a->cstartbs     = oldmat->cstartbs;
2654   a->cendbs       = oldmat->cendbs;
2655 
2656   /* hash table stuff */
2657   a->ht           = 0;
2658   a->hd           = 0;
2659   a->ht_size      = 0;
2660   a->ht_flag      = oldmat->ht_flag;
2661   a->ht_fact      = oldmat->ht_fact;
2662   a->ht_total_ct  = 0;
2663   a->ht_insert_ct = 0;
2664 
2665   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
2666   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2667   ierr = MatStashCreate_Private(matin->comm,matin->rmap.bs,&mat->bstash);CHKERRQ(ierr);
2668   if (oldmat->colmap) {
2669 #if defined (PETSC_USE_CTABLE)
2670   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2671 #else
2672   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2673   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2674   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2675 #endif
2676   } else a->colmap = 0;
2677 
2678   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2679     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2680     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2681     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2682   } else a->garray = 0;
2683 
2684   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2685   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2686   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2687   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2688 
2689   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2690   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2691   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2692   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2693   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2694   *newmat = mat;
2695 
2696   PetscFunctionReturn(0);
2697 }
2698 
2699 #include "petscsys.h"
2700 
2701 #undef __FUNCT__
2702 #define __FUNCT__ "MatLoad_MPIBAIJ"
2703 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2704 {
2705   Mat            A;
2706   PetscErrorCode ierr;
2707   int            fd;
2708   PetscInt       i,nz,j,rstart,rend;
2709   PetscScalar    *vals,*buf;
2710   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2711   MPI_Status     status;
2712   PetscMPIInt    rank,size,maxnz;
2713   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
2714   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
2715   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
2716   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
2717   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
2718   PetscInt       dcount,kmax,k,nzcount,tmp,mend;
2719 
2720   PetscFunctionBegin;
2721   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2722 
2723   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2724   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2725   if (!rank) {
2726     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2727     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2728     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2729   }
2730 
2731   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2732   M = header[1]; N = header[2];
2733 
2734   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2735 
2736   /*
2737      This code adds extra rows to make sure the number of rows is
2738      divisible by the blocksize
2739   */
2740   Mbs        = M/bs;
2741   extra_rows = bs - M + bs*Mbs;
2742   if (extra_rows == bs) extra_rows = 0;
2743   else                  Mbs++;
2744   if (extra_rows && !rank) {
2745     ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2746   }
2747 
2748   /* determine ownership of all rows */
2749   mbs        = Mbs/size + ((Mbs % size) > rank);
2750   m          = mbs*bs;
2751   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
2752   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2753 
2754   /* process 0 needs enough room for process with most rows */
2755   if (!rank) {
2756     mmax = rowners[1];
2757     for (i=2; i<size; i++) {
2758       mmax = PetscMax(mmax,rowners[i]);
2759     }
2760     mmax*=bs;
2761   } else mmax = m;
2762 
2763   rowners[0] = 0;
2764   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
2765   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2766   rstart = rowners[rank];
2767   rend   = rowners[rank+1];
2768 
2769   /* distribute row lengths to all processors */
2770   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
2771   if (!rank) {
2772     mend = m;
2773     if (size == 1) mend = mend - extra_rows;
2774     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
2775     for (j=mend; j<m; j++) locrowlens[j] = 1;
2776     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2777     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2778     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2779     for (j=0; j<m; j++) {
2780       procsnz[0] += locrowlens[j];
2781     }
2782     for (i=1; i<size; i++) {
2783       mend = browners[i+1] - browners[i];
2784       if (i == size-1) mend = mend - extra_rows;
2785       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
2786       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
2787       /* calculate the number of nonzeros on each processor */
2788       for (j=0; j<browners[i+1]-browners[i]; j++) {
2789         procsnz[i] += rowlengths[j];
2790       }
2791       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2792     }
2793     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2794   } else {
2795     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2796   }
2797 
2798   if (!rank) {
2799     /* determine max buffer needed and allocate it */
2800     maxnz = procsnz[0];
2801     for (i=1; i<size; i++) {
2802       maxnz = PetscMax(maxnz,procsnz[i]);
2803     }
2804     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2805 
2806     /* read in my part of the matrix column indices  */
2807     nz     = procsnz[0];
2808     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2809     mycols = ibuf;
2810     if (size == 1)  nz -= extra_rows;
2811     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2812     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2813 
2814     /* read in every ones (except the last) and ship off */
2815     for (i=1; i<size-1; i++) {
2816       nz   = procsnz[i];
2817       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2818       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2819     }
2820     /* read in the stuff for the last proc */
2821     if (size != 1) {
2822       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2823       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2824       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2825       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2826     }
2827     ierr = PetscFree(cols);CHKERRQ(ierr);
2828   } else {
2829     /* determine buffer space needed for message */
2830     nz = 0;
2831     for (i=0; i<m; i++) {
2832       nz += locrowlens[i];
2833     }
2834     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2835     mycols = ibuf;
2836     /* receive message of column indices*/
2837     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2838     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2839     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2840   }
2841 
2842   /* loop over local rows, determining number of off diagonal entries */
2843   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2844   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2845   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2846   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2847   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2848   rowcount = 0; nzcount = 0;
2849   for (i=0; i<mbs; i++) {
2850     dcount  = 0;
2851     odcount = 0;
2852     for (j=0; j<bs; j++) {
2853       kmax = locrowlens[rowcount];
2854       for (k=0; k<kmax; k++) {
2855         tmp = mycols[nzcount++]/bs;
2856         if (!mask[tmp]) {
2857           mask[tmp] = 1;
2858           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2859           else masked1[dcount++] = tmp;
2860         }
2861       }
2862       rowcount++;
2863     }
2864 
2865     dlens[i]  = dcount;
2866     odlens[i] = odcount;
2867 
2868     /* zero out the mask elements we set */
2869     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2870     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2871   }
2872 
2873   /* create our matrix */
2874   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2875   ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2876   ierr = MatSetType(A,type);CHKERRQ(ierr)
2877   ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2878 
2879   /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */
2880   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2881 
2882   if (!rank) {
2883     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2884     /* read in my part of the matrix numerical values  */
2885     nz = procsnz[0];
2886     vals = buf;
2887     mycols = ibuf;
2888     if (size == 1)  nz -= extra_rows;
2889     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2890     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2891 
2892     /* insert into matrix */
2893     jj      = rstart*bs;
2894     for (i=0; i<m; i++) {
2895       ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2896       mycols += locrowlens[i];
2897       vals   += locrowlens[i];
2898       jj++;
2899     }
2900     /* read in other processors (except the last one) and ship out */
2901     for (i=1; i<size-1; i++) {
2902       nz   = procsnz[i];
2903       vals = buf;
2904       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2905       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2906     }
2907     /* the last proc */
2908     if (size != 1){
2909       nz   = procsnz[i] - extra_rows;
2910       vals = buf;
2911       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2912       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2913       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2914     }
2915     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2916   } else {
2917     /* receive numeric values */
2918     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2919 
2920     /* receive message of values*/
2921     vals   = buf;
2922     mycols = ibuf;
2923     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2924     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2925     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2926 
2927     /* insert into matrix */
2928     jj      = rstart*bs;
2929     for (i=0; i<m; i++) {
2930       ierr    = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2931       mycols += locrowlens[i];
2932       vals   += locrowlens[i];
2933       jj++;
2934     }
2935   }
2936   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2937   ierr = PetscFree(buf);CHKERRQ(ierr);
2938   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2939   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2940   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2941   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2942   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2943   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2944 
2945   *newmat = A;
2946   PetscFunctionReturn(0);
2947 }
2948 
2949 #undef __FUNCT__
2950 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
2951 /*@
2952    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2953 
2954    Input Parameters:
2955 .  mat  - the matrix
2956 .  fact - factor
2957 
2958    Collective on Mat
2959 
2960    Level: advanced
2961 
2962   Notes:
2963    This can also be set by the command line option: -mat_use_hash_table fact
2964 
2965 .keywords: matrix, hashtable, factor, HT
2966 
2967 .seealso: MatSetOption()
2968 @*/
2969 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2970 {
2971   PetscErrorCode ierr,(*f)(Mat,PetscReal);
2972 
2973   PetscFunctionBegin;
2974   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
2975   if (f) {
2976     ierr = (*f)(mat,fact);CHKERRQ(ierr);
2977   }
2978   PetscFunctionReturn(0);
2979 }
2980 
2981 EXTERN_C_BEGIN
2982 #undef __FUNCT__
2983 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
2984 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
2985 {
2986   Mat_MPIBAIJ *baij;
2987 
2988   PetscFunctionBegin;
2989   baij = (Mat_MPIBAIJ*)mat->data;
2990   baij->ht_fact = fact;
2991   PetscFunctionReturn(0);
2992 }
2993 EXTERN_C_END
2994 
2995 #undef __FUNCT__
2996 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
2997 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2998 {
2999   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3000   PetscFunctionBegin;
3001   *Ad     = a->A;
3002   *Ao     = a->B;
3003   *colmap = a->garray;
3004   PetscFunctionReturn(0);
3005 }
3006