xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision fdbc33f4b8b49deff60a8ac5ab5f09c55e460e44)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4 
5 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
7 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
8 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
9 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
10 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
11 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
12 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
14 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar);
15 
16 #undef __FUNCT__
17 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
18 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
19 {
20   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
21   PetscErrorCode ierr;
22   PetscInt       i,*idxb = 0;
23   PetscScalar    *va,*vb;
24   Vec            vtmp;
25 
26   PetscFunctionBegin;
27   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
28   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
29   if (idx) {
30     for (i=0; i<A->cmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;}
31   }
32 
33   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
34   if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
35   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
36   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
37 
38   for (i=0; i<A->rmap->n; i++){
39     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);}
40   }
41 
42   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
43   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
44   if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);}
45   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
46   PetscFunctionReturn(0);
47 }
48 
49 EXTERN_C_BEGIN
50 #undef __FUNCT__
51 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
52 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat)
53 {
54   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
55   PetscErrorCode ierr;
56 
57   PetscFunctionBegin;
58   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
59   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
60   PetscFunctionReturn(0);
61 }
62 EXTERN_C_END
63 
64 EXTERN_C_BEGIN
65 #undef __FUNCT__
66 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
67 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat)
68 {
69   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
70   PetscErrorCode ierr;
71 
72   PetscFunctionBegin;
73   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
74   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
75   PetscFunctionReturn(0);
76 }
77 EXTERN_C_END
78 
79 /*
80      Local utility routine that creates a mapping from the global column
81    number to the local number in the off-diagonal part of the local
82    storage of the matrix.  This is done in a non scable way since the
83    length of colmap equals the global matrix length.
84 */
85 #undef __FUNCT__
86 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
87 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
88 {
89   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
90   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
91   PetscErrorCode ierr;
92   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
93 
94   PetscFunctionBegin;
95 #if defined (PETSC_USE_CTABLE)
96   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
97   for (i=0; i<nbs; i++){
98     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
99   }
100 #else
101   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
102   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
103   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
104   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
105 #endif
106   PetscFunctionReturn(0);
107 }
108 
109 #define CHUNKSIZE  10
110 
111 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
112 { \
113  \
114     brow = row/bs;  \
115     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
116     rmax = aimax[brow]; nrow = ailen[brow]; \
117       bcol = col/bs; \
118       ridx = row % bs; cidx = col % bs; \
119       low = 0; high = nrow; \
120       while (high-low > 3) { \
121         t = (low+high)/2; \
122         if (rp[t] > bcol) high = t; \
123         else              low  = t; \
124       } \
125       for (_i=low; _i<high; _i++) { \
126         if (rp[_i] > bcol) break; \
127         if (rp[_i] == bcol) { \
128           bap  = ap +  bs2*_i + bs*cidx + ridx; \
129           if (addv == ADD_VALUES) *bap += value;  \
130           else                    *bap  = value;  \
131           goto a_noinsert; \
132         } \
133       } \
134       if (a->nonew == 1) goto a_noinsert; \
135       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
136       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
137       N = nrow++ - 1;  \
138       /* shift up all the later entries in this row */ \
139       for (ii=N; ii>=_i; ii--) { \
140         rp[ii+1] = rp[ii]; \
141         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
142       } \
143       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
144       rp[_i]                      = bcol;  \
145       ap[bs2*_i + bs*cidx + ridx] = value;  \
146       a_noinsert:; \
147     ailen[brow] = nrow; \
148 }
149 
150 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
151 { \
152     brow = row/bs;  \
153     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
154     rmax = bimax[brow]; nrow = bilen[brow]; \
155       bcol = col/bs; \
156       ridx = row % bs; cidx = col % bs; \
157       low = 0; high = nrow; \
158       while (high-low > 3) { \
159         t = (low+high)/2; \
160         if (rp[t] > bcol) high = t; \
161         else              low  = t; \
162       } \
163       for (_i=low; _i<high; _i++) { \
164         if (rp[_i] > bcol) break; \
165         if (rp[_i] == bcol) { \
166           bap  = ap +  bs2*_i + bs*cidx + ridx; \
167           if (addv == ADD_VALUES) *bap += value;  \
168           else                    *bap  = value;  \
169           goto b_noinsert; \
170         } \
171       } \
172       if (b->nonew == 1) goto b_noinsert; \
173       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
174       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
175       CHKMEMQ;\
176       N = nrow++ - 1;  \
177       /* shift up all the later entries in this row */ \
178       for (ii=N; ii>=_i; ii--) { \
179         rp[ii+1] = rp[ii]; \
180         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
181       } \
182       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
183       rp[_i]                      = bcol;  \
184       ap[bs2*_i + bs*cidx + ridx] = value;  \
185       b_noinsert:; \
186     bilen[brow] = nrow; \
187 }
188 
189 #undef __FUNCT__
190 #define __FUNCT__ "MatSetValues_MPIBAIJ"
191 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
192 {
193   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
194   MatScalar      value;
195   PetscTruth     roworiented = baij->roworiented;
196   PetscErrorCode ierr;
197   PetscInt       i,j,row,col;
198   PetscInt       rstart_orig=mat->rmap->rstart;
199   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
200   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
201 
202   /* Some Variables required in the macro */
203   Mat            A = baij->A;
204   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
205   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
206   MatScalar      *aa=a->a;
207 
208   Mat            B = baij->B;
209   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
210   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
211   MatScalar      *ba=b->a;
212 
213   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
214   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
215   MatScalar      *ap,*bap;
216 
217   PetscFunctionBegin;
218   for (i=0; i<m; i++) {
219     if (im[i] < 0) continue;
220 #if defined(PETSC_USE_DEBUG)
221     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
222 #endif
223     if (im[i] >= rstart_orig && im[i] < rend_orig) {
224       row = im[i] - rstart_orig;
225       for (j=0; j<n; j++) {
226         if (in[j] >= cstart_orig && in[j] < cend_orig){
227           col = in[j] - cstart_orig;
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
230           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
231         } else if (in[j] < 0) continue;
232 #if defined(PETSC_USE_DEBUG)
233         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap->N-1);}
234 #endif
235         else {
236           if (mat->was_assembled) {
237             if (!baij->colmap) {
238               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
239             }
240 #if defined (PETSC_USE_CTABLE)
241             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
242             col  = col - 1;
243 #else
244             col = baij->colmap[in[j]/bs] - 1;
245 #endif
246             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
247               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
248               col =  in[j];
249               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
250               B = baij->B;
251               b = (Mat_SeqBAIJ*)(B)->data;
252               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
253               ba=b->a;
254             } else col += in[j]%bs;
255           } else col = in[j];
256           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
257           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
258           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
259         }
260       }
261     } else {
262       if (!baij->donotstash) {
263         if (roworiented) {
264           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
265         } else {
266           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
267         }
268       }
269     }
270   }
271   PetscFunctionReturn(0);
272 }
273 
274 #undef __FUNCT__
275 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
276 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
277 {
278   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
279   const PetscScalar *value;
280   MatScalar         *barray=baij->barray;
281   PetscTruth        roworiented = baij->roworiented;
282   PetscErrorCode    ierr;
283   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
284   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
285   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
286 
287   PetscFunctionBegin;
288   if(!barray) {
289     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
290     baij->barray = barray;
291   }
292 
293   if (roworiented) {
294     stepval = (n-1)*bs;
295   } else {
296     stepval = (m-1)*bs;
297   }
298   for (i=0; i<m; i++) {
299     if (im[i] < 0) continue;
300 #if defined(PETSC_USE_DEBUG)
301     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
302 #endif
303     if (im[i] >= rstart && im[i] < rend) {
304       row = im[i] - rstart;
305       for (j=0; j<n; j++) {
306         /* If NumCol = 1 then a copy is not required */
307         if ((roworiented) && (n == 1)) {
308           barray = (MatScalar*)v + i*bs2;
309         } else if((!roworiented) && (m == 1)) {
310           barray = (MatScalar*)v + j*bs2;
311         } else { /* Here a copy is required */
312           if (roworiented) {
313             value = v + i*(stepval+bs)*bs + j*bs;
314           } else {
315             value = v + j*(stepval+bs)*bs + i*bs;
316           }
317           for (ii=0; ii<bs; ii++,value+=stepval) {
318             for (jj=0; jj<bs; jj++) {
319               *barray++  = *value++;
320             }
321           }
322           barray -=bs2;
323         }
324 
325         if (in[j] >= cstart && in[j] < cend){
326           col  = in[j] - cstart;
327           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
328         }
329         else if (in[j] < 0) continue;
330 #if defined(PETSC_USE_DEBUG)
331         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
332 #endif
333         else {
334           if (mat->was_assembled) {
335             if (!baij->colmap) {
336               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
337             }
338 
339 #if defined(PETSC_USE_DEBUG)
340 #if defined (PETSC_USE_CTABLE)
341             { PetscInt data;
342               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
343               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
344             }
345 #else
346             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
347 #endif
348 #endif
349 #if defined (PETSC_USE_CTABLE)
350 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
351             col  = (col - 1)/bs;
352 #else
353             col = (baij->colmap[in[j]] - 1)/bs;
354 #endif
355             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
356               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
357               col =  in[j];
358             }
359           }
360           else col = in[j];
361           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
362         }
363       }
364     } else {
365       if (!baij->donotstash) {
366         if (roworiented) {
367           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
368         } else {
369           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
370         }
371       }
372     }
373   }
374   PetscFunctionReturn(0);
375 }
376 
377 #define HASH_KEY 0.6180339887
378 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
379 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
380 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
381 #undef __FUNCT__
382 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
383 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
384 {
385   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
386   PetscTruth     roworiented = baij->roworiented;
387   PetscErrorCode ierr;
388   PetscInt       i,j,row,col;
389   PetscInt       rstart_orig=mat->rmap->rstart;
390   PetscInt       rend_orig=mat->rmap->rend,Nbs=baij->Nbs;
391   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
392   PetscReal      tmp;
393   MatScalar      **HD = baij->hd,value;
394 #if defined(PETSC_USE_DEBUG)
395   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
396 #endif
397 
398   PetscFunctionBegin;
399 
400   for (i=0; i<m; i++) {
401 #if defined(PETSC_USE_DEBUG)
402     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
403     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
404 #endif
405       row = im[i];
406     if (row >= rstart_orig && row < rend_orig) {
407       for (j=0; j<n; j++) {
408         col = in[j];
409         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
410         /* Look up PetscInto the Hash Table */
411         key = (row/bs)*Nbs+(col/bs)+1;
412         h1  = HASH(size,key,tmp);
413 
414 
415         idx = h1;
416 #if defined(PETSC_USE_DEBUG)
417         insert_ct++;
418         total_ct++;
419         if (HT[idx] != key) {
420           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
421           if (idx == size) {
422             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
423             if (idx == h1) {
424               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
425             }
426           }
427         }
428 #else
429         if (HT[idx] != key) {
430           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
431           if (idx == size) {
432             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
433             if (idx == h1) {
434               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
435             }
436           }
437         }
438 #endif
439         /* A HASH table entry is found, so insert the values at the correct address */
440         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
441         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
442       }
443     } else {
444       if (!baij->donotstash) {
445         if (roworiented) {
446           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
447         } else {
448           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
449         }
450       }
451     }
452   }
453 #if defined(PETSC_USE_DEBUG)
454   baij->ht_total_ct = total_ct;
455   baij->ht_insert_ct = insert_ct;
456 #endif
457   PetscFunctionReturn(0);
458 }
459 
460 #undef __FUNCT__
461 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
462 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
463 {
464   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
465   PetscTruth        roworiented = baij->roworiented;
466   PetscErrorCode    ierr;
467   PetscInt          i,j,ii,jj,row,col;
468   PetscInt          rstart=baij->rstartbs;
469   PetscInt          rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
470   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
471   PetscReal         tmp;
472   MatScalar         **HD = baij->hd,*baij_a;
473   const PetscScalar *v_t,*value;
474 #if defined(PETSC_USE_DEBUG)
475   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
476 #endif
477 
478   PetscFunctionBegin;
479 
480   if (roworiented) {
481     stepval = (n-1)*bs;
482   } else {
483     stepval = (m-1)*bs;
484   }
485   for (i=0; i<m; i++) {
486 #if defined(PETSC_USE_DEBUG)
487     if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
488     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
489 #endif
490     row   = im[i];
491     v_t   = v + i*nbs2;
492     if (row >= rstart && row < rend) {
493       for (j=0; j<n; j++) {
494         col = in[j];
495 
496         /* Look up into the Hash Table */
497         key = row*Nbs+col+1;
498         h1  = HASH(size,key,tmp);
499 
500         idx = h1;
501 #if defined(PETSC_USE_DEBUG)
502         total_ct++;
503         insert_ct++;
504        if (HT[idx] != key) {
505           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
506           if (idx == size) {
507             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
508             if (idx == h1) {
509               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
510             }
511           }
512         }
513 #else
514         if (HT[idx] != key) {
515           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
516           if (idx == size) {
517             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
518             if (idx == h1) {
519               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
520             }
521           }
522         }
523 #endif
524         baij_a = HD[idx];
525         if (roworiented) {
526           /*value = v + i*(stepval+bs)*bs + j*bs;*/
527           /* value = v + (i*(stepval+bs)+j)*bs; */
528           value = v_t;
529           v_t  += bs;
530           if (addv == ADD_VALUES) {
531             for (ii=0; ii<bs; ii++,value+=stepval) {
532               for (jj=ii; jj<bs2; jj+=bs) {
533                 baij_a[jj]  += *value++;
534               }
535             }
536           } else {
537             for (ii=0; ii<bs; ii++,value+=stepval) {
538               for (jj=ii; jj<bs2; jj+=bs) {
539                 baij_a[jj]  = *value++;
540               }
541             }
542           }
543         } else {
544           value = v + j*(stepval+bs)*bs + i*bs;
545           if (addv == ADD_VALUES) {
546             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
547               for (jj=0; jj<bs; jj++) {
548                 baij_a[jj]  += *value++;
549               }
550             }
551           } else {
552             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
553               for (jj=0; jj<bs; jj++) {
554                 baij_a[jj]  = *value++;
555               }
556             }
557           }
558         }
559       }
560     } else {
561       if (!baij->donotstash) {
562         if (roworiented) {
563           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
564         } else {
565           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
566         }
567       }
568     }
569   }
570 #if defined(PETSC_USE_DEBUG)
571   baij->ht_total_ct = total_ct;
572   baij->ht_insert_ct = insert_ct;
573 #endif
574   PetscFunctionReturn(0);
575 }
576 
577 #undef __FUNCT__
578 #define __FUNCT__ "MatGetValues_MPIBAIJ"
579 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
580 {
581   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
582   PetscErrorCode ierr;
583   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
584   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
585 
586   PetscFunctionBegin;
587   for (i=0; i<m; i++) {
588     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
589     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
590     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
591       row = idxm[i] - bsrstart;
592       for (j=0; j<n; j++) {
593         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
594         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
595         if (idxn[j] >= bscstart && idxn[j] < bscend){
596           col = idxn[j] - bscstart;
597           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
598         } else {
599           if (!baij->colmap) {
600             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
601           }
602 #if defined (PETSC_USE_CTABLE)
603           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
604           data --;
605 #else
606           data = baij->colmap[idxn[j]/bs]-1;
607 #endif
608           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
609           else {
610             col  = data + idxn[j]%bs;
611             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
612           }
613         }
614       }
615     } else {
616       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
617     }
618   }
619  PetscFunctionReturn(0);
620 }
621 
622 #undef __FUNCT__
623 #define __FUNCT__ "MatNorm_MPIBAIJ"
624 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
625 {
626   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
627   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
628   PetscErrorCode ierr;
629   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
630   PetscReal      sum = 0.0;
631   MatScalar      *v;
632 
633   PetscFunctionBegin;
634   if (baij->size == 1) {
635     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
636   } else {
637     if (type == NORM_FROBENIUS) {
638       v = amat->a;
639       nz = amat->nz*bs2;
640       for (i=0; i<nz; i++) {
641 #if defined(PETSC_USE_COMPLEX)
642         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
643 #else
644         sum += (*v)*(*v); v++;
645 #endif
646       }
647       v = bmat->a;
648       nz = bmat->nz*bs2;
649       for (i=0; i<nz; i++) {
650 #if defined(PETSC_USE_COMPLEX)
651         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
652 #else
653         sum += (*v)*(*v); v++;
654 #endif
655       }
656       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
657       *nrm = sqrt(*nrm);
658     } else if (type == NORM_1) { /* max column sum */
659       PetscReal *tmp,*tmp2;
660       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
661       ierr = PetscMalloc((2*mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
662       tmp2 = tmp + mat->cmap->N;
663       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
664       v = amat->a; jj = amat->j;
665       for (i=0; i<amat->nz; i++) {
666         for (j=0; j<bs; j++){
667           col = bs*(cstart + *jj) + j; /* column index */
668           for (row=0; row<bs; row++){
669             tmp[col] += PetscAbsScalar(*v);  v++;
670           }
671         }
672         jj++;
673       }
674       v = bmat->a; jj = bmat->j;
675       for (i=0; i<bmat->nz; i++) {
676         for (j=0; j<bs; j++){
677           col = bs*garray[*jj] + j;
678           for (row=0; row<bs; row++){
679             tmp[col] += PetscAbsScalar(*v); v++;
680           }
681         }
682         jj++;
683       }
684       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
685       *nrm = 0.0;
686       for (j=0; j<mat->cmap->N; j++) {
687         if (tmp2[j] > *nrm) *nrm = tmp2[j];
688       }
689       ierr = PetscFree(tmp);CHKERRQ(ierr);
690     } else if (type == NORM_INFINITY) { /* max row sum */
691       PetscReal *sums;
692       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr)
693       sum = 0.0;
694       for (j=0; j<amat->mbs; j++) {
695         for (row=0; row<bs; row++) sums[row] = 0.0;
696         v = amat->a + bs2*amat->i[j];
697         nz = amat->i[j+1]-amat->i[j];
698         for (i=0; i<nz; i++) {
699           for (col=0; col<bs; col++){
700             for (row=0; row<bs; row++){
701               sums[row] += PetscAbsScalar(*v); v++;
702             }
703           }
704         }
705         v = bmat->a + bs2*bmat->i[j];
706         nz = bmat->i[j+1]-bmat->i[j];
707         for (i=0; i<nz; i++) {
708           for (col=0; col<bs; col++){
709             for (row=0; row<bs; row++){
710               sums[row] += PetscAbsScalar(*v); v++;
711             }
712           }
713         }
714         for (row=0; row<bs; row++){
715           if (sums[row] > sum) sum = sums[row];
716         }
717       }
718       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
719       ierr = PetscFree(sums);CHKERRQ(ierr);
720     } else {
721       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
722     }
723   }
724   PetscFunctionReturn(0);
725 }
726 
727 /*
728   Creates the hash table, and sets the table
729   This table is created only once.
730   If new entried need to be added to the matrix
731   then the hash table has to be destroyed and
732   recreated.
733 */
734 #undef __FUNCT__
735 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
736 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
737 {
738   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
739   Mat            A = baij->A,B=baij->B;
740   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
741   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
742   PetscErrorCode ierr;
743   PetscInt       size,bs2=baij->bs2,rstart=baij->rstartbs;
744   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
745   PetscInt       *HT,key;
746   MatScalar      **HD;
747   PetscReal      tmp;
748 #if defined(PETSC_USE_INFO)
749   PetscInt       ct=0,max=0;
750 #endif
751 
752   PetscFunctionBegin;
753   baij->ht_size=(PetscInt)(factor*nz);
754   size = baij->ht_size;
755 
756   if (baij->ht) {
757     PetscFunctionReturn(0);
758   }
759 
760   /* Allocate Memory for Hash Table */
761   ierr     = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr);
762   baij->ht = (PetscInt*)(baij->hd + size);
763   HD       = baij->hd;
764   HT       = baij->ht;
765 
766 
767   ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr);
768 
769 
770   /* Loop Over A */
771   for (i=0; i<a->mbs; i++) {
772     for (j=ai[i]; j<ai[i+1]; j++) {
773       row = i+rstart;
774       col = aj[j]+cstart;
775 
776       key = row*Nbs + col + 1;
777       h1  = HASH(size,key,tmp);
778       for (k=0; k<size; k++){
779         if (!HT[(h1+k)%size]) {
780           HT[(h1+k)%size] = key;
781           HD[(h1+k)%size] = a->a + j*bs2;
782           break;
783 #if defined(PETSC_USE_INFO)
784         } else {
785           ct++;
786 #endif
787         }
788       }
789 #if defined(PETSC_USE_INFO)
790       if (k> max) max = k;
791 #endif
792     }
793   }
794   /* Loop Over B */
795   for (i=0; i<b->mbs; i++) {
796     for (j=bi[i]; j<bi[i+1]; j++) {
797       row = i+rstart;
798       col = garray[bj[j]];
799       key = row*Nbs + col + 1;
800       h1  = HASH(size,key,tmp);
801       for (k=0; k<size; k++){
802         if (!HT[(h1+k)%size]) {
803           HT[(h1+k)%size] = key;
804           HD[(h1+k)%size] = b->a + j*bs2;
805           break;
806 #if defined(PETSC_USE_INFO)
807         } else {
808           ct++;
809 #endif
810         }
811       }
812 #if defined(PETSC_USE_INFO)
813       if (k> max) max = k;
814 #endif
815     }
816   }
817 
818   /* Print Summary */
819 #if defined(PETSC_USE_INFO)
820   for (i=0,j=0; i<size; i++) {
821     if (HT[i]) {j++;}
822   }
823   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
824 #endif
825   PetscFunctionReturn(0);
826 }
827 
828 #undef __FUNCT__
829 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
830 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
831 {
832   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
833   PetscErrorCode ierr;
834   PetscInt       nstash,reallocs;
835   InsertMode     addv;
836 
837   PetscFunctionBegin;
838   if (baij->donotstash) {
839     PetscFunctionReturn(0);
840   }
841 
842   /* make sure all processors are either in INSERTMODE or ADDMODE */
843   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
844   if (addv == (ADD_VALUES|INSERT_VALUES)) {
845     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
846   }
847   mat->insertmode = addv; /* in case this processor had no cache */
848 
849   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
850   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
851   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
852   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
853   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
854   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
855   PetscFunctionReturn(0);
856 }
857 
858 #undef __FUNCT__
859 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
860 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
861 {
862   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
863   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
864   PetscErrorCode ierr;
865   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
866   PetscInt       *row,*col;
867   PetscTruth     r1,r2,r3,other_disassembled;
868   MatScalar      *val;
869   InsertMode     addv = mat->insertmode;
870   PetscMPIInt    n;
871 
872   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
873   PetscFunctionBegin;
874   if (!baij->donotstash) {
875     while (1) {
876       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
877       if (!flg) break;
878 
879       for (i=0; i<n;) {
880         /* Now identify the consecutive vals belonging to the same row */
881         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
882         if (j < n) ncols = j-i;
883         else       ncols = n-i;
884         /* Now assemble all these values with a single function call */
885         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
886         i = j;
887       }
888     }
889     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
890     /* Now process the block-stash. Since the values are stashed column-oriented,
891        set the roworiented flag to column oriented, and after MatSetValues()
892        restore the original flags */
893     r1 = baij->roworiented;
894     r2 = a->roworiented;
895     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
896     baij->roworiented = PETSC_FALSE;
897     a->roworiented    = PETSC_FALSE;
898     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
899     while (1) {
900       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
901       if (!flg) break;
902 
903       for (i=0; i<n;) {
904         /* Now identify the consecutive vals belonging to the same row */
905         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
906         if (j < n) ncols = j-i;
907         else       ncols = n-i;
908         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
909         i = j;
910       }
911     }
912     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
913     baij->roworiented = r1;
914     a->roworiented    = r2;
915     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
916   }
917 
918   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
919   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
920 
921   /* determine if any processor has disassembled, if so we must
922      also disassemble ourselfs, in order that we may reassemble. */
923   /*
924      if nonzero structure of submatrix B cannot change then we know that
925      no processor disassembled thus we can skip this stuff
926   */
927   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
928     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
929     if (mat->was_assembled && !other_disassembled) {
930       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
931     }
932   }
933 
934   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
935     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
936   }
937   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
938   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
939   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
940 
941 #if defined(PETSC_USE_INFO)
942   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
943     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
944     baij->ht_total_ct  = 0;
945     baij->ht_insert_ct = 0;
946   }
947 #endif
948   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
949     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
950     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
951     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
952   }
953 
954   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
955   baij->rowvalues = 0;
956   PetscFunctionReturn(0);
957 }
958 
959 #undef __FUNCT__
960 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
961 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
962 {
963   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
964   PetscErrorCode    ierr;
965   PetscMPIInt       size = baij->size,rank = baij->rank;
966   PetscInt          bs = mat->rmap->bs;
967   PetscTruth        iascii,isdraw;
968   PetscViewer       sviewer;
969   PetscViewerFormat format;
970 
971   PetscFunctionBegin;
972   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
973   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
974   if (iascii) {
975     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
976     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
977       MatInfo info;
978       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
979       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
980       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
981               rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
982               mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
983       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
984       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
985       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
986       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
987       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
988       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
989       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
990       PetscFunctionReturn(0);
991     } else if (format == PETSC_VIEWER_ASCII_INFO) {
992       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
993       PetscFunctionReturn(0);
994     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
995       PetscFunctionReturn(0);
996     }
997   }
998 
999   if (isdraw) {
1000     PetscDraw       draw;
1001     PetscTruth isnull;
1002     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1003     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1004   }
1005 
1006   if (size == 1) {
1007     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1008     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1009   } else {
1010     /* assemble the entire matrix onto first processor. */
1011     Mat         A;
1012     Mat_SeqBAIJ *Aloc;
1013     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1014     MatScalar   *a;
1015 
1016     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1017     /* Perhaps this should be the type of mat? */
1018     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1019     if (!rank) {
1020       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1021     } else {
1022       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1023     }
1024     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1025     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1026     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1027 
1028     /* copy over the A part */
1029     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1030     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1031     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1032 
1033     for (i=0; i<mbs; i++) {
1034       rvals[0] = bs*(baij->rstartbs + i);
1035       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1036       for (j=ai[i]; j<ai[i+1]; j++) {
1037         col = (baij->cstartbs+aj[j])*bs;
1038         for (k=0; k<bs; k++) {
1039           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1040           col++; a += bs;
1041         }
1042       }
1043     }
1044     /* copy over the B part */
1045     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1046     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1047     for (i=0; i<mbs; i++) {
1048       rvals[0] = bs*(baij->rstartbs + i);
1049       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1050       for (j=ai[i]; j<ai[i+1]; j++) {
1051         col = baij->garray[aj[j]]*bs;
1052         for (k=0; k<bs; k++) {
1053           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1054           col++; a += bs;
1055         }
1056       }
1057     }
1058     ierr = PetscFree(rvals);CHKERRQ(ierr);
1059     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1060     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1061     /*
1062        Everyone has to call to draw the matrix since the graphics waits are
1063        synchronized across all processors that share the PetscDraw object
1064     */
1065     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1066     if (!rank) {
1067       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1068       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1069     }
1070     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1071     ierr = MatDestroy(A);CHKERRQ(ierr);
1072   }
1073   PetscFunctionReturn(0);
1074 }
1075 
1076 #undef __FUNCT__
1077 #define __FUNCT__ "MatView_MPIBAIJ"
1078 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1079 {
1080   PetscErrorCode ierr;
1081   PetscTruth     iascii,isdraw,issocket,isbinary;
1082 
1083   PetscFunctionBegin;
1084   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1085   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1086   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1087   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1088   if (iascii || isdraw || issocket || isbinary) {
1089     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1090   } else {
1091     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1092   }
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 #undef __FUNCT__
1097 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1098 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1099 {
1100   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1101   PetscErrorCode ierr;
1102 
1103   PetscFunctionBegin;
1104 #if defined(PETSC_USE_LOG)
1105   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1106 #endif
1107   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1108   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1109   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1110   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1111 #if defined (PETSC_USE_CTABLE)
1112   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
1113 #else
1114   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1115 #endif
1116   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1117   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1118   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1119   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1120   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1121   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
1122   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1123   ierr = PetscFree(baij);CHKERRQ(ierr);
1124 
1125   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1126   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1127   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1128   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1129   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1130   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1131   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1132   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1133   PetscFunctionReturn(0);
1134 }
1135 
1136 #undef __FUNCT__
1137 #define __FUNCT__ "MatMult_MPIBAIJ"
1138 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1139 {
1140   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1141   PetscErrorCode ierr;
1142   PetscInt       nt;
1143 
1144   PetscFunctionBegin;
1145   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1146   if (nt != A->cmap->n) {
1147     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1148   }
1149   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1150   if (nt != A->rmap->n) {
1151     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1152   }
1153   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1154   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1155   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1156   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1157   PetscFunctionReturn(0);
1158 }
1159 
1160 #undef __FUNCT__
1161 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1162 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1163 {
1164   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1165   PetscErrorCode ierr;
1166 
1167   PetscFunctionBegin;
1168   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1169   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1170   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1171   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1172   PetscFunctionReturn(0);
1173 }
1174 
1175 #undef __FUNCT__
1176 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1177 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1178 {
1179   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1180   PetscErrorCode ierr;
1181   PetscTruth     merged;
1182 
1183   PetscFunctionBegin;
1184   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1185   /* do nondiagonal part */
1186   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1187   if (!merged) {
1188     /* send it on its way */
1189     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1190     /* do local part */
1191     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1192     /* receive remote parts: note this assumes the values are not actually */
1193     /* inserted in yy until the next line */
1194     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1195   } else {
1196     /* do local part */
1197     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1198     /* send it on its way */
1199     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1200     /* values actually were received in the Begin() but we need to call this nop */
1201     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1202   }
1203   PetscFunctionReturn(0);
1204 }
1205 
1206 #undef __FUNCT__
1207 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1208 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1209 {
1210   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1211   PetscErrorCode ierr;
1212 
1213   PetscFunctionBegin;
1214   /* do nondiagonal part */
1215   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1216   /* send it on its way */
1217   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1218   /* do local part */
1219   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1220   /* receive remote parts: note this assumes the values are not actually */
1221   /* inserted in yy until the next line, which is true for my implementation*/
1222   /* but is not perhaps always true. */
1223   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1224   PetscFunctionReturn(0);
1225 }
1226 
1227 /*
1228   This only works correctly for square matrices where the subblock A->A is the
1229    diagonal block
1230 */
1231 #undef __FUNCT__
1232 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1233 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1234 {
1235   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1236   PetscErrorCode ierr;
1237 
1238   PetscFunctionBegin;
1239   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1240   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1241   PetscFunctionReturn(0);
1242 }
1243 
1244 #undef __FUNCT__
1245 #define __FUNCT__ "MatScale_MPIBAIJ"
1246 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1247 {
1248   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1249   PetscErrorCode ierr;
1250 
1251   PetscFunctionBegin;
1252   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1253   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1254   PetscFunctionReturn(0);
1255 }
1256 
1257 #undef __FUNCT__
1258 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1259 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1260 {
1261   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1262   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1263   PetscErrorCode ierr;
1264   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1265   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1266   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1267 
1268   PetscFunctionBegin;
1269   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1270   mat->getrowactive = PETSC_TRUE;
1271 
1272   if (!mat->rowvalues && (idx || v)) {
1273     /*
1274         allocate enough space to hold information from the longest row.
1275     */
1276     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1277     PetscInt     max = 1,mbs = mat->mbs,tmp;
1278     for (i=0; i<mbs; i++) {
1279       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1280       if (max < tmp) { max = tmp; }
1281     }
1282     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1283     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1284   }
1285 
1286   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1287   lrow = row - brstart;
1288 
1289   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1290   if (!v)   {pvA = 0; pvB = 0;}
1291   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1292   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1293   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1294   nztot = nzA + nzB;
1295 
1296   cmap  = mat->garray;
1297   if (v  || idx) {
1298     if (nztot) {
1299       /* Sort by increasing column numbers, assuming A and B already sorted */
1300       PetscInt imark = -1;
1301       if (v) {
1302         *v = v_p = mat->rowvalues;
1303         for (i=0; i<nzB; i++) {
1304           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1305           else break;
1306         }
1307         imark = i;
1308         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1309         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1310       }
1311       if (idx) {
1312         *idx = idx_p = mat->rowindices;
1313         if (imark > -1) {
1314           for (i=0; i<imark; i++) {
1315             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1316           }
1317         } else {
1318           for (i=0; i<nzB; i++) {
1319             if (cmap[cworkB[i]/bs] < cstart)
1320               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1321             else break;
1322           }
1323           imark = i;
1324         }
1325         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1326         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1327       }
1328     } else {
1329       if (idx) *idx = 0;
1330       if (v)   *v   = 0;
1331     }
1332   }
1333   *nz = nztot;
1334   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1335   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1336   PetscFunctionReturn(0);
1337 }
1338 
1339 #undef __FUNCT__
1340 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1341 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1342 {
1343   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1344 
1345   PetscFunctionBegin;
1346   if (!baij->getrowactive) {
1347     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1348   }
1349   baij->getrowactive = PETSC_FALSE;
1350   PetscFunctionReturn(0);
1351 }
1352 
1353 #undef __FUNCT__
1354 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1355 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1356 {
1357   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1358   PetscErrorCode ierr;
1359 
1360   PetscFunctionBegin;
1361   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1362   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1363   PetscFunctionReturn(0);
1364 }
1365 
1366 #undef __FUNCT__
1367 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1368 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1369 {
1370   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1371   Mat            A = a->A,B = a->B;
1372   PetscErrorCode ierr;
1373   PetscReal      isend[5],irecv[5];
1374 
1375   PetscFunctionBegin;
1376   info->block_size     = (PetscReal)matin->rmap->bs;
1377   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1378   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1379   isend[3] = info->memory;  isend[4] = info->mallocs;
1380   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1381   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1382   isend[3] += info->memory;  isend[4] += info->mallocs;
1383   if (flag == MAT_LOCAL) {
1384     info->nz_used      = isend[0];
1385     info->nz_allocated = isend[1];
1386     info->nz_unneeded  = isend[2];
1387     info->memory       = isend[3];
1388     info->mallocs      = isend[4];
1389   } else if (flag == MAT_GLOBAL_MAX) {
1390     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1391     info->nz_used      = irecv[0];
1392     info->nz_allocated = irecv[1];
1393     info->nz_unneeded  = irecv[2];
1394     info->memory       = irecv[3];
1395     info->mallocs      = irecv[4];
1396   } else if (flag == MAT_GLOBAL_SUM) {
1397     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1398     info->nz_used      = irecv[0];
1399     info->nz_allocated = irecv[1];
1400     info->nz_unneeded  = irecv[2];
1401     info->memory       = irecv[3];
1402     info->mallocs      = irecv[4];
1403   } else {
1404     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1405   }
1406   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1407   info->fill_ratio_needed = 0;
1408   info->factor_mallocs    = 0;
1409   PetscFunctionReturn(0);
1410 }
1411 
1412 #undef __FUNCT__
1413 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1414 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscTruth flg)
1415 {
1416   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1417   PetscErrorCode ierr;
1418 
1419   PetscFunctionBegin;
1420   switch (op) {
1421   case MAT_NEW_NONZERO_LOCATIONS:
1422   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1423   case MAT_KEEP_ZEROED_ROWS:
1424   case MAT_NEW_NONZERO_LOCATION_ERR:
1425     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1426     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1427     break;
1428   case MAT_ROW_ORIENTED:
1429     a->roworiented = flg;
1430     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1431     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1432     break;
1433   case MAT_NEW_DIAGONALS:
1434     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1435     break;
1436   case MAT_IGNORE_OFF_PROC_ENTRIES:
1437     a->donotstash = flg;
1438     break;
1439   case MAT_USE_HASH_TABLE:
1440     a->ht_flag = flg;
1441     break;
1442   case MAT_SYMMETRIC:
1443   case MAT_STRUCTURALLY_SYMMETRIC:
1444   case MAT_HERMITIAN:
1445   case MAT_SYMMETRY_ETERNAL:
1446     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1447     break;
1448   default:
1449     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1450   }
1451   PetscFunctionReturn(0);
1452 }
1453 
1454 #undef __FUNCT__
1455 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1456 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1457 {
1458   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1459   Mat_SeqBAIJ    *Aloc;
1460   Mat            B;
1461   PetscErrorCode ierr;
1462   PetscInt       M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1463   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1464   MatScalar      *a;
1465 
1466   PetscFunctionBegin;
1467   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1468   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1469     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1470     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1471     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1472     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1473   } else {
1474     B = *matout;
1475   }
1476 
1477   /* copy over the A part */
1478   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1479   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1480   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1481 
1482   for (i=0; i<mbs; i++) {
1483     rvals[0] = bs*(baij->rstartbs + i);
1484     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1485     for (j=ai[i]; j<ai[i+1]; j++) {
1486       col = (baij->cstartbs+aj[j])*bs;
1487       for (k=0; k<bs; k++) {
1488         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1489         col++; a += bs;
1490       }
1491     }
1492   }
1493   /* copy over the B part */
1494   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1495   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1496   for (i=0; i<mbs; i++) {
1497     rvals[0] = bs*(baij->rstartbs + i);
1498     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1499     for (j=ai[i]; j<ai[i+1]; j++) {
1500       col = baij->garray[aj[j]]*bs;
1501       for (k=0; k<bs; k++) {
1502         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1503         col++; a += bs;
1504       }
1505     }
1506   }
1507   ierr = PetscFree(rvals);CHKERRQ(ierr);
1508   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1509   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1510 
1511   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1512     *matout = B;
1513   } else {
1514     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1515   }
1516   PetscFunctionReturn(0);
1517 }
1518 
1519 #undef __FUNCT__
1520 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1521 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1522 {
1523   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1524   Mat            a = baij->A,b = baij->B;
1525   PetscErrorCode ierr;
1526   PetscInt       s1,s2,s3;
1527 
1528   PetscFunctionBegin;
1529   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1530   if (rr) {
1531     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1532     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1533     /* Overlap communication with computation. */
1534     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1535   }
1536   if (ll) {
1537     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1538     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1539     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1540   }
1541   /* scale  the diagonal block */
1542   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1543 
1544   if (rr) {
1545     /* Do a scatter end and then right scale the off-diagonal block */
1546     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1547     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1548   }
1549 
1550   PetscFunctionReturn(0);
1551 }
1552 
1553 #undef __FUNCT__
1554 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1555 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
1556 {
1557   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1558   PetscErrorCode ierr;
1559   PetscMPIInt    imdex,size = l->size,n,rank = l->rank;
1560   PetscInt       i,*owners = A->rmap->range;
1561   PetscInt       *nprocs,j,idx,nsends,row;
1562   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
1563   PetscInt       *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1564   PetscInt       *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1565   MPI_Comm       comm = ((PetscObject)A)->comm;
1566   MPI_Request    *send_waits,*recv_waits;
1567   MPI_Status     recv_status,*send_status;
1568 #if defined(PETSC_DEBUG)
1569   PetscTruth     found = PETSC_FALSE;
1570 #endif
1571 
1572   PetscFunctionBegin;
1573   /*  first count number of contributors to each processor */
1574   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1575   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1576   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1577   j = 0;
1578   for (i=0; i<N; i++) {
1579     if (lastidx > (idx = rows[i])) j = 0;
1580     lastidx = idx;
1581     for (; j<size; j++) {
1582       if (idx >= owners[j] && idx < owners[j+1]) {
1583         nprocs[2*j]++;
1584         nprocs[2*j+1] = 1;
1585         owner[i] = j;
1586 #if defined(PETSC_DEBUG)
1587         found = PETSC_TRUE;
1588 #endif
1589         break;
1590       }
1591     }
1592 #if defined(PETSC_DEBUG)
1593     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1594     found = PETSC_FALSE;
1595 #endif
1596   }
1597   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1598 
1599   /* inform other processors of number of messages and max length*/
1600   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1601 
1602   /* post receives:   */
1603   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1604   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1605   for (i=0; i<nrecvs; i++) {
1606     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1607   }
1608 
1609   /* do sends:
1610      1) starts[i] gives the starting index in svalues for stuff going to
1611      the ith processor
1612   */
1613   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1614   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1615   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1616   starts[0]  = 0;
1617   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1618   for (i=0; i<N; i++) {
1619     svalues[starts[owner[i]]++] = rows[i];
1620   }
1621 
1622   starts[0] = 0;
1623   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1624   count = 0;
1625   for (i=0; i<size; i++) {
1626     if (nprocs[2*i+1]) {
1627       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1628     }
1629   }
1630   ierr = PetscFree(starts);CHKERRQ(ierr);
1631 
1632   base = owners[rank];
1633 
1634   /*  wait on receives */
1635   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
1636   source = lens + nrecvs;
1637   count  = nrecvs; slen = 0;
1638   while (count) {
1639     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1640     /* unpack receives into our local space */
1641     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1642     source[imdex]  = recv_status.MPI_SOURCE;
1643     lens[imdex]    = n;
1644     slen          += n;
1645     count--;
1646   }
1647   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1648 
1649   /* move the data into the send scatter */
1650   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1651   count = 0;
1652   for (i=0; i<nrecvs; i++) {
1653     values = rvalues + i*nmax;
1654     for (j=0; j<lens[i]; j++) {
1655       lrows[count++] = values[j] - base;
1656     }
1657   }
1658   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1659   ierr = PetscFree(lens);CHKERRQ(ierr);
1660   ierr = PetscFree(owner);CHKERRQ(ierr);
1661   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1662 
1663   /* actually zap the local rows */
1664   /*
1665         Zero the required rows. If the "diagonal block" of the matrix
1666      is square and the user wishes to set the diagonal we use separate
1667      code so that MatSetValues() is not called for each diagonal allocating
1668      new memory, thus calling lots of mallocs and slowing things down.
1669 
1670        Contributed by: Matthew Knepley
1671   */
1672   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1673   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr);
1674   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1675     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr);
1676   } else if (diag != 0.0) {
1677     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1678     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1679       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1680 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1681     }
1682     for (i=0; i<slen; i++) {
1683       row  = lrows[i] + rstart_bs;
1684       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1685     }
1686     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1687     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1688   } else {
1689     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1690   }
1691 
1692   ierr = PetscFree(lrows);CHKERRQ(ierr);
1693 
1694   /* wait on sends */
1695   if (nsends) {
1696     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1697     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1698     ierr = PetscFree(send_status);CHKERRQ(ierr);
1699   }
1700   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1701   ierr = PetscFree(svalues);CHKERRQ(ierr);
1702 
1703   PetscFunctionReturn(0);
1704 }
1705 
1706 #undef __FUNCT__
1707 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1708 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1709 {
1710   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1711   PetscErrorCode ierr;
1712 
1713   PetscFunctionBegin;
1714   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1715   PetscFunctionReturn(0);
1716 }
1717 
1718 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1719 
1720 #undef __FUNCT__
1721 #define __FUNCT__ "MatEqual_MPIBAIJ"
1722 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1723 {
1724   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1725   Mat            a,b,c,d;
1726   PetscTruth     flg;
1727   PetscErrorCode ierr;
1728 
1729   PetscFunctionBegin;
1730   a = matA->A; b = matA->B;
1731   c = matB->A; d = matB->B;
1732 
1733   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1734   if (flg) {
1735     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1736   }
1737   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1738   PetscFunctionReturn(0);
1739 }
1740 
1741 #undef __FUNCT__
1742 #define __FUNCT__ "MatCopy_MPIBAIJ"
1743 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1744 {
1745   PetscErrorCode ierr;
1746   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1747   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1748 
1749   PetscFunctionBegin;
1750   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1751   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1752     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1753   } else {
1754     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1755     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1756   }
1757   PetscFunctionReturn(0);
1758 }
1759 
1760 #undef __FUNCT__
1761 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1762 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1763 {
1764   PetscErrorCode ierr;
1765 
1766   PetscFunctionBegin;
1767   ierr =  MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1768   PetscFunctionReturn(0);
1769 }
1770 
1771 #include "petscblaslapack.h"
1772 #undef __FUNCT__
1773 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1774 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1775 {
1776   PetscErrorCode ierr;
1777   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1778   PetscBLASInt   bnz,one=1;
1779   Mat_SeqBAIJ    *x,*y;
1780 
1781   PetscFunctionBegin;
1782   if (str == SAME_NONZERO_PATTERN) {
1783     PetscScalar alpha = a;
1784     x = (Mat_SeqBAIJ *)xx->A->data;
1785     y = (Mat_SeqBAIJ *)yy->A->data;
1786     bnz = PetscBLASIntCast(x->nz);
1787     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1788     x = (Mat_SeqBAIJ *)xx->B->data;
1789     y = (Mat_SeqBAIJ *)yy->B->data;
1790     bnz = PetscBLASIntCast(x->nz);
1791     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1792   } else {
1793     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1794   }
1795   PetscFunctionReturn(0);
1796 }
1797 
1798 #undef __FUNCT__
1799 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1800 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1801 {
1802   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1803   PetscErrorCode ierr;
1804 
1805   PetscFunctionBegin;
1806   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1807   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1808   PetscFunctionReturn(0);
1809 }
1810 
1811 #undef __FUNCT__
1812 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
1813 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1814 {
1815   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1816   PetscErrorCode ierr;
1817 
1818   PetscFunctionBegin;
1819   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1820   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1821   PetscFunctionReturn(0);
1822 }
1823 
1824 /* -------------------------------------------------------------------*/
1825 static struct _MatOps MatOps_Values = {
1826        MatSetValues_MPIBAIJ,
1827        MatGetRow_MPIBAIJ,
1828        MatRestoreRow_MPIBAIJ,
1829        MatMult_MPIBAIJ,
1830 /* 4*/ MatMultAdd_MPIBAIJ,
1831        MatMultTranspose_MPIBAIJ,
1832        MatMultTransposeAdd_MPIBAIJ,
1833        0,
1834        0,
1835        0,
1836 /*10*/ 0,
1837        0,
1838        0,
1839        0,
1840        MatTranspose_MPIBAIJ,
1841 /*15*/ MatGetInfo_MPIBAIJ,
1842        MatEqual_MPIBAIJ,
1843        MatGetDiagonal_MPIBAIJ,
1844        MatDiagonalScale_MPIBAIJ,
1845        MatNorm_MPIBAIJ,
1846 /*20*/ MatAssemblyBegin_MPIBAIJ,
1847        MatAssemblyEnd_MPIBAIJ,
1848        0,
1849        MatSetOption_MPIBAIJ,
1850        MatZeroEntries_MPIBAIJ,
1851 /*25*/ MatZeroRows_MPIBAIJ,
1852        0,
1853        0,
1854        0,
1855        0,
1856 /*30*/ MatSetUpPreallocation_MPIBAIJ,
1857        0,
1858        0,
1859        0,
1860        0,
1861 /*35*/ MatDuplicate_MPIBAIJ,
1862        0,
1863        0,
1864        0,
1865        0,
1866 /*40*/ MatAXPY_MPIBAIJ,
1867        MatGetSubMatrices_MPIBAIJ,
1868        MatIncreaseOverlap_MPIBAIJ,
1869        MatGetValues_MPIBAIJ,
1870        MatCopy_MPIBAIJ,
1871 /*45*/ 0,
1872        MatScale_MPIBAIJ,
1873        0,
1874        0,
1875        0,
1876 /*50*/ 0,
1877        0,
1878        0,
1879        0,
1880        0,
1881 /*55*/ 0,
1882        0,
1883        MatSetUnfactored_MPIBAIJ,
1884        0,
1885        MatSetValuesBlocked_MPIBAIJ,
1886 /*60*/ 0,
1887        MatDestroy_MPIBAIJ,
1888        MatView_MPIBAIJ,
1889        0,
1890        0,
1891 /*65*/ 0,
1892        0,
1893        0,
1894        0,
1895        0,
1896 /*70*/ MatGetRowMaxAbs_MPIBAIJ,
1897        0,
1898        0,
1899        0,
1900        0,
1901 /*75*/ 0,
1902        0,
1903        0,
1904        0,
1905        0,
1906 /*80*/ 0,
1907        0,
1908        0,
1909        0,
1910        MatLoad_MPIBAIJ,
1911 /*85*/ 0,
1912        0,
1913        0,
1914        0,
1915        0,
1916 /*90*/ 0,
1917        0,
1918        0,
1919        0,
1920        0,
1921 /*95*/ 0,
1922        0,
1923        0,
1924        0,
1925        0,
1926 /*100*/0,
1927        0,
1928        0,
1929        0,
1930        0,
1931 /*105*/0,
1932        MatRealPart_MPIBAIJ,
1933        MatImaginaryPart_MPIBAIJ};
1934 
1935 
1936 EXTERN_C_BEGIN
1937 #undef __FUNCT__
1938 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
1939 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1940 {
1941   PetscFunctionBegin;
1942   *a      = ((Mat_MPIBAIJ *)A->data)->A;
1943   *iscopy = PETSC_FALSE;
1944   PetscFunctionReturn(0);
1945 }
1946 EXTERN_C_END
1947 
1948 EXTERN_C_BEGIN
1949 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
1950 EXTERN_C_END
1951 
1952 EXTERN_C_BEGIN
1953 #undef __FUNCT__
1954 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
1955 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1956 {
1957   PetscInt       m,rstart,cstart,cend;
1958   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
1959   const PetscInt *JJ=0;
1960   PetscScalar    *values=0;
1961   PetscErrorCode ierr;
1962 
1963   PetscFunctionBegin;
1964 
1965   if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
1966   B->rmap->bs = bs;
1967   B->cmap->bs = bs;
1968   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
1969   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
1970   m      = B->rmap->n/bs;
1971   rstart = B->rmap->rstart/bs;
1972   cstart = B->cmap->rstart/bs;
1973   cend   = B->cmap->rend/bs;
1974 
1975   if (ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
1976   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
1977   o_nnz = d_nnz + m;
1978   for (i=0; i<m; i++) {
1979     nz = ii[i+1] - ii[i];
1980     if (nz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
1981     nz_max = PetscMax(nz_max,nz);
1982     JJ  = jj + ii[i];
1983     for (j=0; j<nz; j++) {
1984       if (*JJ >= cstart) break;
1985       JJ++;
1986     }
1987     d = 0;
1988     for (; j<nz; j++) {
1989       if (*JJ++ >= cend) break;
1990       d++;
1991     }
1992     d_nnz[i] = d;
1993     o_nnz[i] = nz - d;
1994   }
1995   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1996   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
1997 
1998   values = (PetscScalar*)V;
1999   if (!values) {
2000     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2001     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2002   }
2003   for (i=0; i<m; i++) {
2004     PetscInt          row    = i + rstart;
2005     PetscInt          ncols  = ii[i+1] - ii[i];
2006     const PetscInt    *icols = jj + ii[i];
2007     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2008     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2009   }
2010 
2011   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2012   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2013   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2014 
2015   PetscFunctionReturn(0);
2016 }
2017 EXTERN_C_END
2018 
2019 #undef __FUNCT__
2020 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2021 /*@C
2022    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2023    (the default parallel PETSc format).
2024 
2025    Collective on MPI_Comm
2026 
2027    Input Parameters:
2028 +  A - the matrix
2029 .  i - the indices into j for the start of each local row (starts with zero)
2030 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2031 -  v - optional values in the matrix
2032 
2033    Level: developer
2034 
2035 .keywords: matrix, aij, compressed row, sparse, parallel
2036 
2037 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2038 @*/
2039 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2040 {
2041   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
2042 
2043   PetscFunctionBegin;
2044   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2045   if (f) {
2046     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
2047   }
2048   PetscFunctionReturn(0);
2049 }
2050 
2051 EXTERN_C_BEGIN
2052 #undef __FUNCT__
2053 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2054 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2055 {
2056   Mat_MPIBAIJ    *b;
2057   PetscErrorCode ierr;
2058   PetscInt       i, newbs = PetscAbs(bs);
2059 
2060   PetscFunctionBegin;
2061   B->preallocated = PETSC_TRUE;
2062   if (bs < 0) {
2063     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr);
2064       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2065     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2066     bs   = PetscAbs(bs);
2067   }
2068   if ((d_nnz || o_nnz) && newbs != bs) {
2069     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
2070   }
2071   bs = newbs;
2072 
2073 
2074   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2075   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2076   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2077   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2078   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2079 
2080   B->rmap->bs  = bs;
2081   B->cmap->bs  = bs;
2082   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
2083   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
2084 
2085   if (d_nnz) {
2086     for (i=0; i<B->rmap->n/bs; i++) {
2087       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2088     }
2089   }
2090   if (o_nnz) {
2091     for (i=0; i<B->rmap->n/bs; i++) {
2092       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2093     }
2094   }
2095 
2096   b = (Mat_MPIBAIJ*)B->data;
2097   b->bs2 = bs*bs;
2098   b->mbs = B->rmap->n/bs;
2099   b->nbs = B->cmap->n/bs;
2100   b->Mbs = B->rmap->N/bs;
2101   b->Nbs = B->cmap->N/bs;
2102 
2103   for (i=0; i<=b->size; i++) {
2104     b->rangebs[i] = B->rmap->range[i]/bs;
2105   }
2106   b->rstartbs = B->rmap->rstart/bs;
2107   b->rendbs   = B->rmap->rend/bs;
2108   b->cstartbs = B->cmap->rstart/bs;
2109   b->cendbs   = B->cmap->rend/bs;
2110 
2111   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2112   ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2113   ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2114   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2115   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2116   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2117   ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2118   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2119   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2120   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2121 
2122   ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
2123 
2124   PetscFunctionReturn(0);
2125 }
2126 EXTERN_C_END
2127 
2128 EXTERN_C_BEGIN
2129 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2130 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2131 EXTERN_C_END
2132 
2133 /*MC
2134    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2135 
2136    Options Database Keys:
2137 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2138 . -mat_block_size <bs> - set the blocksize used to store the matrix
2139 - -mat_use_hash_table <fact>
2140 
2141   Level: beginner
2142 
2143 .seealso: MatCreateMPIBAIJ
2144 M*/
2145 
2146 EXTERN_C_BEGIN
2147 #undef __FUNCT__
2148 #define __FUNCT__ "MatCreate_MPIBAIJ"
2149 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B)
2150 {
2151   Mat_MPIBAIJ    *b;
2152   PetscErrorCode ierr;
2153   PetscTruth     flg;
2154 
2155   PetscFunctionBegin;
2156   ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
2157   B->data = (void*)b;
2158 
2159 
2160   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2161   B->mapping    = 0;
2162   B->assembled  = PETSC_FALSE;
2163 
2164   B->insertmode = NOT_SET_VALUES;
2165   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
2166   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
2167 
2168   /* build local table of row and column ownerships */
2169   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
2170 
2171   /* build cache for off array entries formed */
2172   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
2173   b->donotstash  = PETSC_FALSE;
2174   b->colmap      = PETSC_NULL;
2175   b->garray      = PETSC_NULL;
2176   b->roworiented = PETSC_TRUE;
2177 
2178   /* stuff used in block assembly */
2179   b->barray       = 0;
2180 
2181   /* stuff used for matrix vector multiply */
2182   b->lvec         = 0;
2183   b->Mvctx        = 0;
2184 
2185   /* stuff for MatGetRow() */
2186   b->rowindices   = 0;
2187   b->rowvalues    = 0;
2188   b->getrowactive = PETSC_FALSE;
2189 
2190   /* hash table stuff */
2191   b->ht           = 0;
2192   b->hd           = 0;
2193   b->ht_size      = 0;
2194   b->ht_flag      = PETSC_FALSE;
2195   b->ht_fact      = 0;
2196   b->ht_total_ct  = 0;
2197   b->ht_insert_ct = 0;
2198 
2199   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
2200     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2201     if (flg) {
2202       PetscReal fact = 1.39;
2203       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2204       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
2205       if (fact <= 1.0) fact = 1.39;
2206       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2207       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2208     }
2209   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2210 
2211   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2212                                      "MatStoreValues_MPIBAIJ",
2213                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2214   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2215                                      "MatRetrieveValues_MPIBAIJ",
2216                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2217   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2218                                      "MatGetDiagonalBlock_MPIBAIJ",
2219                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2220   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
2221                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
2222                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2223   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
2224 				     "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
2225 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
2226   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
2227                                      "MatDiagonalScaleLocal_MPIBAIJ",
2228                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
2229   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
2230                                      "MatSetHashTableFactor_MPIBAIJ",
2231                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2232   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
2233   PetscFunctionReturn(0);
2234 }
2235 EXTERN_C_END
2236 
2237 /*MC
2238    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2239 
2240    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2241    and MATMPIBAIJ otherwise.
2242 
2243    Options Database Keys:
2244 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2245 
2246   Level: beginner
2247 
2248 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2249 M*/
2250 
2251 EXTERN_C_BEGIN
2252 #undef __FUNCT__
2253 #define __FUNCT__ "MatCreate_BAIJ"
2254 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A)
2255 {
2256   PetscErrorCode ierr;
2257   PetscMPIInt    size;
2258 
2259   PetscFunctionBegin;
2260   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2261   if (size == 1) {
2262     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
2263   } else {
2264     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
2265   }
2266   PetscFunctionReturn(0);
2267 }
2268 EXTERN_C_END
2269 
2270 #undef __FUNCT__
2271 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
2272 /*@C
2273    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
2274    (block compressed row).  For good matrix assembly performance
2275    the user should preallocate the matrix storage by setting the parameters
2276    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2277    performance can be increased by more than a factor of 50.
2278 
2279    Collective on Mat
2280 
2281    Input Parameters:
2282 +  A - the matrix
2283 .  bs   - size of blockk
2284 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2285            submatrix  (same for all local rows)
2286 .  d_nnz - array containing the number of block nonzeros in the various block rows
2287            of the in diagonal portion of the local (possibly different for each block
2288            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2289 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2290            submatrix (same for all local rows).
2291 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2292            off-diagonal portion of the local submatrix (possibly different for
2293            each block row) or PETSC_NULL.
2294 
2295    If the *_nnz parameter is given then the *_nz parameter is ignored
2296 
2297    Options Database Keys:
2298 +   -mat_block_size - size of the blocks to use
2299 -   -mat_use_hash_table <fact>
2300 
2301    Notes:
2302    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2303    than it must be used on all processors that share the object for that argument.
2304 
2305    Storage Information:
2306    For a square global matrix we define each processor's diagonal portion
2307    to be its local rows and the corresponding columns (a square submatrix);
2308    each processor's off-diagonal portion encompasses the remainder of the
2309    local matrix (a rectangular submatrix).
2310 
2311    The user can specify preallocated storage for the diagonal part of
2312    the local submatrix with either d_nz or d_nnz (not both).  Set
2313    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2314    memory allocation.  Likewise, specify preallocated storage for the
2315    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2316 
2317    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2318    the figure below we depict these three local rows and all columns (0-11).
2319 
2320 .vb
2321            0 1 2 3 4 5 6 7 8 9 10 11
2322           -------------------
2323    row 3  |  o o o d d d o o o o o o
2324    row 4  |  o o o d d d o o o o o o
2325    row 5  |  o o o d d d o o o o o o
2326           -------------------
2327 .ve
2328 
2329    Thus, any entries in the d locations are stored in the d (diagonal)
2330    submatrix, and any entries in the o locations are stored in the
2331    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2332    stored simply in the MATSEQBAIJ format for compressed row storage.
2333 
2334    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2335    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2336    In general, for PDE problems in which most nonzeros are near the diagonal,
2337    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2338    or you will get TERRIBLE performance; see the users' manual chapter on
2339    matrices.
2340 
2341    You can call MatGetInfo() to get information on how effective the preallocation was;
2342    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2343    You can also run with the option -info and look for messages with the string
2344    malloc in them to see if additional memory allocation was needed.
2345 
2346    Level: intermediate
2347 
2348 .keywords: matrix, block, aij, compressed row, sparse, parallel
2349 
2350 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
2351 @*/
2352 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2353 {
2354   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
2355 
2356   PetscFunctionBegin;
2357   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2358   if (f) {
2359     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2360   }
2361   PetscFunctionReturn(0);
2362 }
2363 
2364 #undef __FUNCT__
2365 #define __FUNCT__ "MatCreateMPIBAIJ"
2366 /*@C
2367    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
2368    (block compressed row).  For good matrix assembly performance
2369    the user should preallocate the matrix storage by setting the parameters
2370    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2371    performance can be increased by more than a factor of 50.
2372 
2373    Collective on MPI_Comm
2374 
2375    Input Parameters:
2376 +  comm - MPI communicator
2377 .  bs   - size of blockk
2378 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2379            This value should be the same as the local size used in creating the
2380            y vector for the matrix-vector product y = Ax.
2381 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2382            This value should be the same as the local size used in creating the
2383            x vector for the matrix-vector product y = Ax.
2384 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2385 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2386 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
2387            submatrix  (same for all local rows)
2388 .  d_nnz - array containing the number of nonzero blocks in the various block rows
2389            of the in diagonal portion of the local (possibly different for each block
2390            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2391 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
2392            submatrix (same for all local rows).
2393 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
2394            off-diagonal portion of the local submatrix (possibly different for
2395            each block row) or PETSC_NULL.
2396 
2397    Output Parameter:
2398 .  A - the matrix
2399 
2400    Options Database Keys:
2401 +   -mat_block_size - size of the blocks to use
2402 -   -mat_use_hash_table <fact>
2403 
2404    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2405    MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely
2406    true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles.
2407    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2408 
2409    Notes:
2410    If the *_nnz parameter is given then the *_nz parameter is ignored
2411 
2412    A nonzero block is any block that as 1 or more nonzeros in it
2413 
2414    The user MUST specify either the local or global matrix dimensions
2415    (possibly both).
2416 
2417    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2418    than it must be used on all processors that share the object for that argument.
2419 
2420    Storage Information:
2421    For a square global matrix we define each processor's diagonal portion
2422    to be its local rows and the corresponding columns (a square submatrix);
2423    each processor's off-diagonal portion encompasses the remainder of the
2424    local matrix (a rectangular submatrix).
2425 
2426    The user can specify preallocated storage for the diagonal part of
2427    the local submatrix with either d_nz or d_nnz (not both).  Set
2428    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2429    memory allocation.  Likewise, specify preallocated storage for the
2430    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2431 
2432    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2433    the figure below we depict these three local rows and all columns (0-11).
2434 
2435 .vb
2436            0 1 2 3 4 5 6 7 8 9 10 11
2437           -------------------
2438    row 3  |  o o o d d d o o o o o o
2439    row 4  |  o o o d d d o o o o o o
2440    row 5  |  o o o d d d o o o o o o
2441           -------------------
2442 .ve
2443 
2444    Thus, any entries in the d locations are stored in the d (diagonal)
2445    submatrix, and any entries in the o locations are stored in the
2446    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2447    stored simply in the MATSEQBAIJ format for compressed row storage.
2448 
2449    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2450    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2451    In general, for PDE problems in which most nonzeros are near the diagonal,
2452    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2453    or you will get TERRIBLE performance; see the users' manual chapter on
2454    matrices.
2455 
2456    Level: intermediate
2457 
2458 .keywords: matrix, block, aij, compressed row, sparse, parallel
2459 
2460 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2461 @*/
2462 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2463 {
2464   PetscErrorCode ierr;
2465   PetscMPIInt    size;
2466 
2467   PetscFunctionBegin;
2468   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2469   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2470   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2471   if (size > 1) {
2472     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
2473     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2474   } else {
2475     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2476     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2477   }
2478   PetscFunctionReturn(0);
2479 }
2480 
2481 #undef __FUNCT__
2482 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
2483 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2484 {
2485   Mat            mat;
2486   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2487   PetscErrorCode ierr;
2488   PetscInt       len=0;
2489 
2490   PetscFunctionBegin;
2491   *newmat       = 0;
2492   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2493   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2494   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2495   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2496 
2497   mat->factor       = matin->factor;
2498   mat->preallocated = PETSC_TRUE;
2499   mat->assembled    = PETSC_TRUE;
2500   mat->insertmode   = NOT_SET_VALUES;
2501 
2502   a      = (Mat_MPIBAIJ*)mat->data;
2503   mat->rmap->bs  = matin->rmap->bs;
2504   a->bs2   = oldmat->bs2;
2505   a->mbs   = oldmat->mbs;
2506   a->nbs   = oldmat->nbs;
2507   a->Mbs   = oldmat->Mbs;
2508   a->Nbs   = oldmat->Nbs;
2509 
2510   ierr = PetscMapCopy(((PetscObject)matin)->comm,matin->rmap,mat->rmap);CHKERRQ(ierr);
2511   ierr = PetscMapCopy(((PetscObject)matin)->comm,matin->cmap,mat->cmap);CHKERRQ(ierr);
2512 
2513   a->size         = oldmat->size;
2514   a->rank         = oldmat->rank;
2515   a->donotstash   = oldmat->donotstash;
2516   a->roworiented  = oldmat->roworiented;
2517   a->rowindices   = 0;
2518   a->rowvalues    = 0;
2519   a->getrowactive = PETSC_FALSE;
2520   a->barray       = 0;
2521   a->rstartbs     = oldmat->rstartbs;
2522   a->rendbs       = oldmat->rendbs;
2523   a->cstartbs     = oldmat->cstartbs;
2524   a->cendbs       = oldmat->cendbs;
2525 
2526   /* hash table stuff */
2527   a->ht           = 0;
2528   a->hd           = 0;
2529   a->ht_size      = 0;
2530   a->ht_flag      = oldmat->ht_flag;
2531   a->ht_fact      = oldmat->ht_fact;
2532   a->ht_total_ct  = 0;
2533   a->ht_insert_ct = 0;
2534 
2535   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
2536   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
2537   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2538   if (oldmat->colmap) {
2539 #if defined (PETSC_USE_CTABLE)
2540   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2541 #else
2542   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2543   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2544   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2545 #endif
2546   } else a->colmap = 0;
2547 
2548   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2549     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2550     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2551     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2552   } else a->garray = 0;
2553 
2554   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2555   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2556   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2557   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2558 
2559   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2560   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2561   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2562   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2563   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2564   *newmat = mat;
2565 
2566   PetscFunctionReturn(0);
2567 }
2568 
2569 #include "petscsys.h"
2570 
2571 #undef __FUNCT__
2572 #define __FUNCT__ "MatLoad_MPIBAIJ"
2573 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
2574 {
2575   Mat            A;
2576   PetscErrorCode ierr;
2577   int            fd;
2578   PetscInt       i,nz,j,rstart,rend;
2579   PetscScalar    *vals,*buf;
2580   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2581   MPI_Status     status;
2582   PetscMPIInt    rank,size,maxnz;
2583   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
2584   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
2585   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
2586   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
2587   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
2588   PetscInt       dcount,kmax,k,nzcount,tmp,mend;
2589 
2590   PetscFunctionBegin;
2591   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
2592     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2593   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2594 
2595   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2596   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2597   if (!rank) {
2598     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2599     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2600     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2601   }
2602 
2603   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2604   M = header[1]; N = header[2];
2605 
2606   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2607 
2608   /*
2609      This code adds extra rows to make sure the number of rows is
2610      divisible by the blocksize
2611   */
2612   Mbs        = M/bs;
2613   extra_rows = bs - M + bs*Mbs;
2614   if (extra_rows == bs) extra_rows = 0;
2615   else                  Mbs++;
2616   if (extra_rows && !rank) {
2617     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2618   }
2619 
2620   /* determine ownership of all rows */
2621   mbs        = Mbs/size + ((Mbs % size) > rank);
2622   m          = mbs*bs;
2623   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
2624   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
2625 
2626   /* process 0 needs enough room for process with most rows */
2627   if (!rank) {
2628     mmax = rowners[1];
2629     for (i=2; i<size; i++) {
2630       mmax = PetscMax(mmax,rowners[i]);
2631     }
2632     mmax*=bs;
2633   } else mmax = m;
2634 
2635   rowners[0] = 0;
2636   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
2637   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2638   rstart = rowners[rank];
2639   rend   = rowners[rank+1];
2640 
2641   /* distribute row lengths to all processors */
2642   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
2643   if (!rank) {
2644     mend = m;
2645     if (size == 1) mend = mend - extra_rows;
2646     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
2647     for (j=mend; j<m; j++) locrowlens[j] = 1;
2648     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2649     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2650     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2651     for (j=0; j<m; j++) {
2652       procsnz[0] += locrowlens[j];
2653     }
2654     for (i=1; i<size; i++) {
2655       mend = browners[i+1] - browners[i];
2656       if (i == size-1) mend = mend - extra_rows;
2657       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
2658       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
2659       /* calculate the number of nonzeros on each processor */
2660       for (j=0; j<browners[i+1]-browners[i]; j++) {
2661         procsnz[i] += rowlengths[j];
2662       }
2663       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2664     }
2665     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2666   } else {
2667     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2668   }
2669 
2670   if (!rank) {
2671     /* determine max buffer needed and allocate it */
2672     maxnz = procsnz[0];
2673     for (i=1; i<size; i++) {
2674       maxnz = PetscMax(maxnz,procsnz[i]);
2675     }
2676     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2677 
2678     /* read in my part of the matrix column indices  */
2679     nz     = procsnz[0];
2680     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2681     mycols = ibuf;
2682     if (size == 1)  nz -= extra_rows;
2683     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2684     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2685 
2686     /* read in every ones (except the last) and ship off */
2687     for (i=1; i<size-1; i++) {
2688       nz   = procsnz[i];
2689       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2690       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2691     }
2692     /* read in the stuff for the last proc */
2693     if (size != 1) {
2694       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2695       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2696       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2697       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2698     }
2699     ierr = PetscFree(cols);CHKERRQ(ierr);
2700   } else {
2701     /* determine buffer space needed for message */
2702     nz = 0;
2703     for (i=0; i<m; i++) {
2704       nz += locrowlens[i];
2705     }
2706     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2707     mycols = ibuf;
2708     /* receive message of column indices*/
2709     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2710     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2711     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2712   }
2713 
2714   /* loop over local rows, determining number of off diagonal entries */
2715   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2716   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2717   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2718   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2719   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2720   rowcount = 0; nzcount = 0;
2721   for (i=0; i<mbs; i++) {
2722     dcount  = 0;
2723     odcount = 0;
2724     for (j=0; j<bs; j++) {
2725       kmax = locrowlens[rowcount];
2726       for (k=0; k<kmax; k++) {
2727         tmp = mycols[nzcount++]/bs;
2728         if (!mask[tmp]) {
2729           mask[tmp] = 1;
2730           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2731           else masked1[dcount++] = tmp;
2732         }
2733       }
2734       rowcount++;
2735     }
2736 
2737     dlens[i]  = dcount;
2738     odlens[i] = odcount;
2739 
2740     /* zero out the mask elements we set */
2741     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2742     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2743   }
2744 
2745   /* create our matrix */
2746   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2747   ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2748   ierr = MatSetType(A,type);CHKERRQ(ierr)
2749   ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2750 
2751   if (!rank) {
2752     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2753     /* read in my part of the matrix numerical values  */
2754     nz = procsnz[0];
2755     vals = buf;
2756     mycols = ibuf;
2757     if (size == 1)  nz -= extra_rows;
2758     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2759     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2760 
2761     /* insert into matrix */
2762     jj      = rstart*bs;
2763     for (i=0; i<m; i++) {
2764       ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2765       mycols += locrowlens[i];
2766       vals   += locrowlens[i];
2767       jj++;
2768     }
2769     /* read in other processors (except the last one) and ship out */
2770     for (i=1; i<size-1; i++) {
2771       nz   = procsnz[i];
2772       vals = buf;
2773       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2774       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2775     }
2776     /* the last proc */
2777     if (size != 1){
2778       nz   = procsnz[i] - extra_rows;
2779       vals = buf;
2780       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2781       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2782       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2783     }
2784     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2785   } else {
2786     /* receive numeric values */
2787     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2788 
2789     /* receive message of values*/
2790     vals   = buf;
2791     mycols = ibuf;
2792     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2793     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2794     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2795 
2796     /* insert into matrix */
2797     jj      = rstart*bs;
2798     for (i=0; i<m; i++) {
2799       ierr    = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2800       mycols += locrowlens[i];
2801       vals   += locrowlens[i];
2802       jj++;
2803     }
2804   }
2805   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2806   ierr = PetscFree(buf);CHKERRQ(ierr);
2807   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2808   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2809   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2810   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2811   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2812   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2813 
2814   *newmat = A;
2815   PetscFunctionReturn(0);
2816 }
2817 
2818 #undef __FUNCT__
2819 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
2820 /*@
2821    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2822 
2823    Input Parameters:
2824 .  mat  - the matrix
2825 .  fact - factor
2826 
2827    Collective on Mat
2828 
2829    Level: advanced
2830 
2831   Notes:
2832    This can also be set by the command line option: -mat_use_hash_table <fact>
2833 
2834 .keywords: matrix, hashtable, factor, HT
2835 
2836 .seealso: MatSetOption()
2837 @*/
2838 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2839 {
2840   PetscErrorCode ierr,(*f)(Mat,PetscReal);
2841 
2842   PetscFunctionBegin;
2843   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
2844   if (f) {
2845     ierr = (*f)(mat,fact);CHKERRQ(ierr);
2846   }
2847   PetscFunctionReturn(0);
2848 }
2849 
2850 EXTERN_C_BEGIN
2851 #undef __FUNCT__
2852 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
2853 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
2854 {
2855   Mat_MPIBAIJ *baij;
2856 
2857   PetscFunctionBegin;
2858   baij = (Mat_MPIBAIJ*)mat->data;
2859   baij->ht_fact = fact;
2860   PetscFunctionReturn(0);
2861 }
2862 EXTERN_C_END
2863 
2864 #undef __FUNCT__
2865 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
2866 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
2867 {
2868   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2869   PetscFunctionBegin;
2870   *Ad     = a->A;
2871   *Ao     = a->B;
2872   *colmap = a->garray;
2873   PetscFunctionReturn(0);
2874 }
2875 
2876 /*
2877     Special version for direct calls from Fortran (to eliminate two function call overheads
2878 */
2879 #if defined(PETSC_HAVE_FORTRAN_CAPS)
2880 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
2881 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
2882 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
2883 #endif
2884 
2885 #undef __FUNCT__
2886 #define __FUNCT__ "matmpibiajsetvaluesblocked"
2887 /*@C
2888   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
2889 
2890   Collective on Mat
2891 
2892   Input Parameters:
2893 + mat - the matrix
2894 . min - number of input rows
2895 . im - input rows
2896 . nin - number of input columns
2897 . in - input columns
2898 . v - numerical values input
2899 - addvin - INSERT_VALUES or ADD_VALUES
2900 
2901   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
2902 
2903   Level: advanced
2904 
2905 .seealso:   MatSetValuesBlocked()
2906 @*/
2907 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
2908 {
2909   /* convert input arguments to C version */
2910   Mat             mat = *matin;
2911   PetscInt        m = *min, n = *nin;
2912   InsertMode      addv = *addvin;
2913 
2914   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
2915   const MatScalar *value;
2916   MatScalar       *barray=baij->barray;
2917   PetscTruth      roworiented = baij->roworiented;
2918   PetscErrorCode  ierr;
2919   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
2920   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
2921   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
2922 
2923   PetscFunctionBegin;
2924   /* tasks normally handled by MatSetValuesBlocked() */
2925   if (mat->insertmode == NOT_SET_VALUES) {
2926     mat->insertmode = addv;
2927   }
2928 #if defined(PETSC_USE_DEBUG)
2929   else if (mat->insertmode != addv) {
2930     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
2931   }
2932   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2933 #endif
2934   if (mat->assembled) {
2935     mat->was_assembled = PETSC_TRUE;
2936     mat->assembled     = PETSC_FALSE;
2937   }
2938   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
2939 
2940 
2941   if(!barray) {
2942     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
2943     baij->barray = barray;
2944   }
2945 
2946   if (roworiented) {
2947     stepval = (n-1)*bs;
2948   } else {
2949     stepval = (m-1)*bs;
2950   }
2951   for (i=0; i<m; i++) {
2952     if (im[i] < 0) continue;
2953 #if defined(PETSC_USE_DEBUG)
2954     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
2955 #endif
2956     if (im[i] >= rstart && im[i] < rend) {
2957       row = im[i] - rstart;
2958       for (j=0; j<n; j++) {
2959         /* If NumCol = 1 then a copy is not required */
2960         if ((roworiented) && (n == 1)) {
2961           barray = (MatScalar*)v + i*bs2;
2962         } else if((!roworiented) && (m == 1)) {
2963           barray = (MatScalar*)v + j*bs2;
2964         } else { /* Here a copy is required */
2965           if (roworiented) {
2966             value = v + i*(stepval+bs)*bs + j*bs;
2967           } else {
2968             value = v + j*(stepval+bs)*bs + i*bs;
2969           }
2970           for (ii=0; ii<bs; ii++,value+=stepval) {
2971             for (jj=0; jj<bs; jj++) {
2972               *barray++  = *value++;
2973             }
2974           }
2975           barray -=bs2;
2976         }
2977 
2978         if (in[j] >= cstart && in[j] < cend){
2979           col  = in[j] - cstart;
2980           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
2981         }
2982         else if (in[j] < 0) continue;
2983 #if defined(PETSC_USE_DEBUG)
2984         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
2985 #endif
2986         else {
2987           if (mat->was_assembled) {
2988             if (!baij->colmap) {
2989               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
2990             }
2991 
2992 #if defined(PETSC_USE_DEBUG)
2993 #if defined (PETSC_USE_CTABLE)
2994             { PetscInt data;
2995               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
2996               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
2997             }
2998 #else
2999             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
3000 #endif
3001 #endif
3002 #if defined (PETSC_USE_CTABLE)
3003 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3004             col  = (col - 1)/bs;
3005 #else
3006             col = (baij->colmap[in[j]] - 1)/bs;
3007 #endif
3008             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3009               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3010               col =  in[j];
3011             }
3012           }
3013           else col = in[j];
3014           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3015         }
3016       }
3017     } else {
3018       if (!baij->donotstash) {
3019         if (roworiented) {
3020           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3021         } else {
3022           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3023         }
3024       }
3025     }
3026   }
3027 
3028   /* task normally handled by MatSetValuesBlocked() */
3029   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3030   PetscFunctionReturn(0);
3031 }
3032