xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 2576faa2086d9b116d85f33d71a74699055c8042)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 #include <petscblaslapack.h>
4 
5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 extern PetscErrorCode DisAssemble_MPIBAIJ(Mat);
7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec);
13 
14 #undef __FUNCT__
15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
17 {
18   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
19   PetscErrorCode ierr;
20   PetscInt       i,*idxb = 0;
21   PetscScalar    *va,*vb;
22   Vec            vtmp;
23 
24   PetscFunctionBegin;
25   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
26   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
27   if (idx) {
28     for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;}
29   }
30 
31   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
32   if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
33   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
34   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
35 
36   for (i=0; i<A->rmap->n; i++){
37     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);}
38   }
39 
40   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
41   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
42   ierr = PetscFree(idxb);CHKERRQ(ierr);
43   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
44   PetscFunctionReturn(0);
45 }
46 
47 EXTERN_C_BEGIN
48 #undef __FUNCT__
49 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
50 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
51 {
52   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
53   PetscErrorCode ierr;
54 
55   PetscFunctionBegin;
56   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
57   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
58   PetscFunctionReturn(0);
59 }
60 EXTERN_C_END
61 
62 EXTERN_C_BEGIN
63 #undef __FUNCT__
64 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
65 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
66 {
67   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
68   PetscErrorCode ierr;
69 
70   PetscFunctionBegin;
71   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
72   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
73   PetscFunctionReturn(0);
74 }
75 EXTERN_C_END
76 
77 /*
78      Local utility routine that creates a mapping from the global column
79    number to the local number in the off-diagonal part of the local
80    storage of the matrix.  This is done in a non scalable way since the
81    length of colmap equals the global matrix length.
82 */
83 #undef __FUNCT__
84 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
85 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
86 {
87   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
88   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
89   PetscErrorCode ierr;
90   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
91 
92   PetscFunctionBegin;
93 #if defined (PETSC_USE_CTABLE)
94   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
95   for (i=0; i<nbs; i++){
96     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
97   }
98 #else
99   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
100   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
101   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
102   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
103 #endif
104   PetscFunctionReturn(0);
105 }
106 
107 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
108 { \
109  \
110     brow = row/bs;  \
111     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
112     rmax = aimax[brow]; nrow = ailen[brow]; \
113       bcol = col/bs; \
114       ridx = row % bs; cidx = col % bs; \
115       low = 0; high = nrow; \
116       while (high-low > 3) { \
117         t = (low+high)/2; \
118         if (rp[t] > bcol) high = t; \
119         else              low  = t; \
120       } \
121       for (_i=low; _i<high; _i++) { \
122         if (rp[_i] > bcol) break; \
123         if (rp[_i] == bcol) { \
124           bap  = ap +  bs2*_i + bs*cidx + ridx; \
125           if (addv == ADD_VALUES) *bap += value;  \
126           else                    *bap  = value;  \
127           goto a_noinsert; \
128         } \
129       } \
130       if (a->nonew == 1) goto a_noinsert; \
131       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
132       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
133       N = nrow++ - 1;  \
134       /* shift up all the later entries in this row */ \
135       for (ii=N; ii>=_i; ii--) { \
136         rp[ii+1] = rp[ii]; \
137         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
138       } \
139       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
140       rp[_i]                      = bcol;  \
141       ap[bs2*_i + bs*cidx + ridx] = value;  \
142       a_noinsert:; \
143     ailen[brow] = nrow; \
144 }
145 
146 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
147 { \
148     brow = row/bs;  \
149     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
150     rmax = bimax[brow]; nrow = bilen[brow]; \
151       bcol = col/bs; \
152       ridx = row % bs; cidx = col % bs; \
153       low = 0; high = nrow; \
154       while (high-low > 3) { \
155         t = (low+high)/2; \
156         if (rp[t] > bcol) high = t; \
157         else              low  = t; \
158       } \
159       for (_i=low; _i<high; _i++) { \
160         if (rp[_i] > bcol) break; \
161         if (rp[_i] == bcol) { \
162           bap  = ap +  bs2*_i + bs*cidx + ridx; \
163           if (addv == ADD_VALUES) *bap += value;  \
164           else                    *bap  = value;  \
165           goto b_noinsert; \
166         } \
167       } \
168       if (b->nonew == 1) goto b_noinsert; \
169       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
170       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
171       CHKMEMQ;\
172       N = nrow++ - 1;  \
173       /* shift up all the later entries in this row */ \
174       for (ii=N; ii>=_i; ii--) { \
175         rp[ii+1] = rp[ii]; \
176         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
177       } \
178       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
179       rp[_i]                      = bcol;  \
180       ap[bs2*_i + bs*cidx + ridx] = value;  \
181       b_noinsert:; \
182     bilen[brow] = nrow; \
183 }
184 
185 #undef __FUNCT__
186 #define __FUNCT__ "MatSetValues_MPIBAIJ"
187 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
188 {
189   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
190   MatScalar      value;
191   PetscBool      roworiented = baij->roworiented;
192   PetscErrorCode ierr;
193   PetscInt       i,j,row,col;
194   PetscInt       rstart_orig=mat->rmap->rstart;
195   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
196   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
197 
198   /* Some Variables required in the macro */
199   Mat            A = baij->A;
200   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
201   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
202   MatScalar      *aa=a->a;
203 
204   Mat            B = baij->B;
205   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
206   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
207   MatScalar      *ba=b->a;
208 
209   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
210   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
211   MatScalar      *ap,*bap;
212 
213   PetscFunctionBegin;
214   if (v) PetscValidScalarPointer(v,6);
215   for (i=0; i<m; i++) {
216     if (im[i] < 0) continue;
217 #if defined(PETSC_USE_DEBUG)
218     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
219 #endif
220     if (im[i] >= rstart_orig && im[i] < rend_orig) {
221       row = im[i] - rstart_orig;
222       for (j=0; j<n; j++) {
223         if (in[j] >= cstart_orig && in[j] < cend_orig){
224           col = in[j] - cstart_orig;
225           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
226           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
227           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
228         } else if (in[j] < 0) continue;
229 #if defined(PETSC_USE_DEBUG)
230         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
231 #endif
232         else {
233           if (mat->was_assembled) {
234             if (!baij->colmap) {
235               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
236             }
237 #if defined (PETSC_USE_CTABLE)
238             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
239             col  = col - 1;
240 #else
241             col = baij->colmap[in[j]/bs] - 1;
242 #endif
243             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
244               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
245               col =  in[j];
246               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
247               B = baij->B;
248               b = (Mat_SeqBAIJ*)(B)->data;
249               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
250               ba=b->a;
251             } else col += in[j]%bs;
252           } else col = in[j];
253           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
254           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
255           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
256         }
257       }
258     } else {
259       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
260       if (!baij->donotstash) {
261         if (roworiented) {
262           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
263         } else {
264           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
265         }
266       }
267     }
268   }
269   PetscFunctionReturn(0);
270 }
271 
272 #undef __FUNCT__
273 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
274 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
275 {
276   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
277   const PetscScalar *value;
278   MatScalar         *barray=baij->barray;
279   PetscBool         roworiented = baij->roworiented;
280   PetscErrorCode    ierr;
281   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
282   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
283   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
284 
285   PetscFunctionBegin;
286   if(!barray) {
287     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
288     baij->barray = barray;
289   }
290 
291   if (roworiented) {
292     stepval = (n-1)*bs;
293   } else {
294     stepval = (m-1)*bs;
295   }
296   for (i=0; i<m; i++) {
297     if (im[i] < 0) continue;
298 #if defined(PETSC_USE_DEBUG)
299     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
300 #endif
301     if (im[i] >= rstart && im[i] < rend) {
302       row = im[i] - rstart;
303       for (j=0; j<n; j++) {
304         /* If NumCol = 1 then a copy is not required */
305         if ((roworiented) && (n == 1)) {
306           barray = (MatScalar*)v + i*bs2;
307         } else if((!roworiented) && (m == 1)) {
308           barray = (MatScalar*)v + j*bs2;
309         } else { /* Here a copy is required */
310           if (roworiented) {
311             value = v + (i*(stepval+bs) + j)*bs;
312           } else {
313 	    value = v + (j*(stepval+bs) + i)*bs;
314           }
315           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
316             for (jj=0; jj<bs; jj++) {
317               barray[jj]  = value[jj];
318             }
319             barray += bs;
320           }
321           barray -= bs2;
322         }
323 
324         if (in[j] >= cstart && in[j] < cend){
325           col  = in[j] - cstart;
326           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
327         }
328         else if (in[j] < 0) continue;
329 #if defined(PETSC_USE_DEBUG)
330         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
331 #endif
332         else {
333           if (mat->was_assembled) {
334             if (!baij->colmap) {
335               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
336             }
337 
338 #if defined(PETSC_USE_DEBUG)
339 #if defined (PETSC_USE_CTABLE)
340             { PetscInt data;
341               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
342               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
343             }
344 #else
345             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
346 #endif
347 #endif
348 #if defined (PETSC_USE_CTABLE)
349 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
350             col  = (col - 1)/bs;
351 #else
352             col = (baij->colmap[in[j]] - 1)/bs;
353 #endif
354             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
355               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
356               col =  in[j];
357             }
358           }
359           else col = in[j];
360           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
361         }
362       }
363     } else {
364       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
365       if (!baij->donotstash) {
366         if (roworiented) {
367           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
368         } else {
369           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
370         }
371       }
372     }
373   }
374   PetscFunctionReturn(0);
375 }
376 
377 #define HASH_KEY 0.6180339887
378 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
379 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
380 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
381 #undef __FUNCT__
382 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
383 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
384 {
385   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
386   PetscBool      roworiented = baij->roworiented;
387   PetscErrorCode ierr;
388   PetscInt       i,j,row,col;
389   PetscInt       rstart_orig=mat->rmap->rstart;
390   PetscInt       rend_orig=mat->rmap->rend,Nbs=baij->Nbs;
391   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
392   PetscReal      tmp;
393   MatScalar      **HD = baij->hd,value;
394 #if defined(PETSC_USE_DEBUG)
395   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
396 #endif
397 
398   PetscFunctionBegin;
399   if (v) PetscValidScalarPointer(v,6);
400   for (i=0; i<m; i++) {
401 #if defined(PETSC_USE_DEBUG)
402     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
403     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
404 #endif
405       row = im[i];
406     if (row >= rstart_orig && row < rend_orig) {
407       for (j=0; j<n; j++) {
408         col = in[j];
409         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
410         /* Look up PetscInto the Hash Table */
411         key = (row/bs)*Nbs+(col/bs)+1;
412         h1  = HASH(size,key,tmp);
413 
414 
415         idx = h1;
416 #if defined(PETSC_USE_DEBUG)
417         insert_ct++;
418         total_ct++;
419         if (HT[idx] != key) {
420           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
421           if (idx == size) {
422             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
423             if (idx == h1) {
424               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
425             }
426           }
427         }
428 #else
429         if (HT[idx] != key) {
430           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
431           if (idx == size) {
432             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
433             if (idx == h1) {
434               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
435             }
436           }
437         }
438 #endif
439         /* A HASH table entry is found, so insert the values at the correct address */
440         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
441         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
442       }
443     } else {
444       if (!baij->donotstash) {
445         if (roworiented) {
446           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
447         } else {
448           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
449         }
450       }
451     }
452   }
453 #if defined(PETSC_USE_DEBUG)
454   baij->ht_total_ct = total_ct;
455   baij->ht_insert_ct = insert_ct;
456 #endif
457   PetscFunctionReturn(0);
458 }
459 
460 #undef __FUNCT__
461 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
462 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
463 {
464   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
465   PetscBool         roworiented = baij->roworiented;
466   PetscErrorCode    ierr;
467   PetscInt          i,j,ii,jj,row,col;
468   PetscInt          rstart=baij->rstartbs;
469   PetscInt          rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
470   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
471   PetscReal         tmp;
472   MatScalar         **HD = baij->hd,*baij_a;
473   const PetscScalar *v_t,*value;
474 #if defined(PETSC_USE_DEBUG)
475   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
476 #endif
477 
478   PetscFunctionBegin;
479 
480   if (roworiented) {
481     stepval = (n-1)*bs;
482   } else {
483     stepval = (m-1)*bs;
484   }
485   for (i=0; i<m; i++) {
486 #if defined(PETSC_USE_DEBUG)
487     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
488     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
489 #endif
490     row   = im[i];
491     v_t   = v + i*nbs2;
492     if (row >= rstart && row < rend) {
493       for (j=0; j<n; j++) {
494         col = in[j];
495 
496         /* Look up into the Hash Table */
497         key = row*Nbs+col+1;
498         h1  = HASH(size,key,tmp);
499 
500         idx = h1;
501 #if defined(PETSC_USE_DEBUG)
502         total_ct++;
503         insert_ct++;
504        if (HT[idx] != key) {
505           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
506           if (idx == size) {
507             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
508             if (idx == h1) {
509               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
510             }
511           }
512         }
513 #else
514         if (HT[idx] != key) {
515           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
516           if (idx == size) {
517             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
518             if (idx == h1) {
519               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
520             }
521           }
522         }
523 #endif
524         baij_a = HD[idx];
525         if (roworiented) {
526           /*value = v + i*(stepval+bs)*bs + j*bs;*/
527           /* value = v + (i*(stepval+bs)+j)*bs; */
528           value = v_t;
529           v_t  += bs;
530           if (addv == ADD_VALUES) {
531             for (ii=0; ii<bs; ii++,value+=stepval) {
532               for (jj=ii; jj<bs2; jj+=bs) {
533                 baij_a[jj]  += *value++;
534               }
535             }
536           } else {
537             for (ii=0; ii<bs; ii++,value+=stepval) {
538               for (jj=ii; jj<bs2; jj+=bs) {
539                 baij_a[jj]  = *value++;
540               }
541             }
542           }
543         } else {
544           value = v + j*(stepval+bs)*bs + i*bs;
545           if (addv == ADD_VALUES) {
546             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
547               for (jj=0; jj<bs; jj++) {
548                 baij_a[jj]  += *value++;
549               }
550             }
551           } else {
552             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
553               for (jj=0; jj<bs; jj++) {
554                 baij_a[jj]  = *value++;
555               }
556             }
557           }
558         }
559       }
560     } else {
561       if (!baij->donotstash) {
562         if (roworiented) {
563           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
564         } else {
565           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
566         }
567       }
568     }
569   }
570 #if defined(PETSC_USE_DEBUG)
571   baij->ht_total_ct = total_ct;
572   baij->ht_insert_ct = insert_ct;
573 #endif
574   PetscFunctionReturn(0);
575 }
576 
577 #undef __FUNCT__
578 #define __FUNCT__ "MatGetValues_MPIBAIJ"
579 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
580 {
581   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
582   PetscErrorCode ierr;
583   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
584   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
585 
586   PetscFunctionBegin;
587   for (i=0; i<m; i++) {
588     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
589     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
590     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
591       row = idxm[i] - bsrstart;
592       for (j=0; j<n; j++) {
593         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
594         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
595         if (idxn[j] >= bscstart && idxn[j] < bscend){
596           col = idxn[j] - bscstart;
597           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
598         } else {
599           if (!baij->colmap) {
600             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
601           }
602 #if defined (PETSC_USE_CTABLE)
603           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
604           data --;
605 #else
606           data = baij->colmap[idxn[j]/bs]-1;
607 #endif
608           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
609           else {
610             col  = data + idxn[j]%bs;
611             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
612           }
613         }
614       }
615     } else {
616       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
617     }
618   }
619  PetscFunctionReturn(0);
620 }
621 
622 #undef __FUNCT__
623 #define __FUNCT__ "MatNorm_MPIBAIJ"
624 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
625 {
626   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
627   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
628   PetscErrorCode ierr;
629   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
630   PetscReal      sum = 0.0;
631   MatScalar      *v;
632 
633   PetscFunctionBegin;
634   if (baij->size == 1) {
635     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
636   } else {
637     if (type == NORM_FROBENIUS) {
638       v = amat->a;
639       nz = amat->nz*bs2;
640       for (i=0; i<nz; i++) {
641 #if defined(PETSC_USE_COMPLEX)
642         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
643 #else
644         sum += (*v)*(*v); v++;
645 #endif
646       }
647       v = bmat->a;
648       nz = bmat->nz*bs2;
649       for (i=0; i<nz; i++) {
650 #if defined(PETSC_USE_COMPLEX)
651         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
652 #else
653         sum += (*v)*(*v); v++;
654 #endif
655       }
656       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
657       *nrm = PetscSqrtReal(*nrm);
658     } else if (type == NORM_1) { /* max column sum */
659       PetscReal *tmp,*tmp2;
660       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
661       ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
662       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
663       v = amat->a; jj = amat->j;
664       for (i=0; i<amat->nz; i++) {
665         for (j=0; j<bs; j++){
666           col = bs*(cstart + *jj) + j; /* column index */
667           for (row=0; row<bs; row++){
668             tmp[col] += PetscAbsScalar(*v);  v++;
669           }
670         }
671         jj++;
672       }
673       v = bmat->a; jj = bmat->j;
674       for (i=0; i<bmat->nz; i++) {
675         for (j=0; j<bs; j++){
676           col = bs*garray[*jj] + j;
677           for (row=0; row<bs; row++){
678             tmp[col] += PetscAbsScalar(*v); v++;
679           }
680         }
681         jj++;
682       }
683       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
684       *nrm = 0.0;
685       for (j=0; j<mat->cmap->N; j++) {
686         if (tmp2[j] > *nrm) *nrm = tmp2[j];
687       }
688       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
689     } else if (type == NORM_INFINITY) { /* max row sum */
690       PetscReal *sums;
691       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr);
692       sum = 0.0;
693       for (j=0; j<amat->mbs; j++) {
694         for (row=0; row<bs; row++) sums[row] = 0.0;
695         v = amat->a + bs2*amat->i[j];
696         nz = amat->i[j+1]-amat->i[j];
697         for (i=0; i<nz; i++) {
698           for (col=0; col<bs; col++){
699             for (row=0; row<bs; row++){
700               sums[row] += PetscAbsScalar(*v); v++;
701             }
702           }
703         }
704         v = bmat->a + bs2*bmat->i[j];
705         nz = bmat->i[j+1]-bmat->i[j];
706         for (i=0; i<nz; i++) {
707           for (col=0; col<bs; col++){
708             for (row=0; row<bs; row++){
709               sums[row] += PetscAbsScalar(*v); v++;
710             }
711           }
712         }
713         for (row=0; row<bs; row++){
714           if (sums[row] > sum) sum = sums[row];
715         }
716       }
717       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
718       ierr = PetscFree(sums);CHKERRQ(ierr);
719     } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet");
720   }
721   PetscFunctionReturn(0);
722 }
723 
724 /*
725   Creates the hash table, and sets the table
726   This table is created only once.
727   If new entried need to be added to the matrix
728   then the hash table has to be destroyed and
729   recreated.
730 */
731 #undef __FUNCT__
732 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
733 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
734 {
735   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
736   Mat            A = baij->A,B=baij->B;
737   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
738   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
739   PetscErrorCode ierr;
740   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
741   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
742   PetscInt       *HT,key;
743   MatScalar      **HD;
744   PetscReal      tmp;
745 #if defined(PETSC_USE_INFO)
746   PetscInt       ct=0,max=0;
747 #endif
748 
749   PetscFunctionBegin;
750   if (baij->ht) PetscFunctionReturn(0);
751 
752   baij->ht_size = (PetscInt)(factor*nz);
753   ht_size       = baij->ht_size;
754 
755   /* Allocate Memory for Hash Table */
756   ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr);
757   ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr);
758   ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr);
759   HD   = baij->hd;
760   HT   = baij->ht;
761 
762   /* Loop Over A */
763   for (i=0; i<a->mbs; i++) {
764     for (j=ai[i]; j<ai[i+1]; j++) {
765       row = i+rstart;
766       col = aj[j]+cstart;
767 
768       key = row*Nbs + col + 1;
769       h1  = HASH(ht_size,key,tmp);
770       for (k=0; k<ht_size; k++){
771         if (!HT[(h1+k)%ht_size]) {
772           HT[(h1+k)%ht_size] = key;
773           HD[(h1+k)%ht_size] = a->a + j*bs2;
774           break;
775 #if defined(PETSC_USE_INFO)
776         } else {
777           ct++;
778 #endif
779         }
780       }
781 #if defined(PETSC_USE_INFO)
782       if (k> max) max = k;
783 #endif
784     }
785   }
786   /* Loop Over B */
787   for (i=0; i<b->mbs; i++) {
788     for (j=bi[i]; j<bi[i+1]; j++) {
789       row = i+rstart;
790       col = garray[bj[j]];
791       key = row*Nbs + col + 1;
792       h1  = HASH(ht_size,key,tmp);
793       for (k=0; k<ht_size; k++){
794         if (!HT[(h1+k)%ht_size]) {
795           HT[(h1+k)%ht_size] = key;
796           HD[(h1+k)%ht_size] = b->a + j*bs2;
797           break;
798 #if defined(PETSC_USE_INFO)
799         } else {
800           ct++;
801 #endif
802         }
803       }
804 #if defined(PETSC_USE_INFO)
805       if (k> max) max = k;
806 #endif
807     }
808   }
809 
810   /* Print Summary */
811 #if defined(PETSC_USE_INFO)
812   for (i=0,j=0; i<ht_size; i++) {
813     if (HT[i]) {j++;}
814   }
815   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
816 #endif
817   PetscFunctionReturn(0);
818 }
819 
820 #undef __FUNCT__
821 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
822 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
823 {
824   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
825   PetscErrorCode ierr;
826   PetscInt       nstash,reallocs;
827   InsertMode     addv;
828 
829   PetscFunctionBegin;
830   if (baij->donotstash || mat->nooffprocentries) {
831     PetscFunctionReturn(0);
832   }
833 
834   /* make sure all processors are either in INSERTMODE or ADDMODE */
835   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
836   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
837   mat->insertmode = addv; /* in case this processor had no cache */
838 
839   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
840   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
841   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
842   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
843   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
844   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
845   PetscFunctionReturn(0);
846 }
847 
848 #undef __FUNCT__
849 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
850 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
851 {
852   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
853   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
854   PetscErrorCode ierr;
855   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
856   PetscInt       *row,*col;
857   PetscBool      r1,r2,r3,other_disassembled;
858   MatScalar      *val;
859   InsertMode     addv = mat->insertmode;
860   PetscMPIInt    n;
861 
862   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
863   PetscFunctionBegin;
864   if (!baij->donotstash && !mat->nooffprocentries) {
865     while (1) {
866       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
867       if (!flg) break;
868 
869       for (i=0; i<n;) {
870         /* Now identify the consecutive vals belonging to the same row */
871         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
872         if (j < n) ncols = j-i;
873         else       ncols = n-i;
874         /* Now assemble all these values with a single function call */
875         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
876         i = j;
877       }
878     }
879     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
880     /* Now process the block-stash. Since the values are stashed column-oriented,
881        set the roworiented flag to column oriented, and after MatSetValues()
882        restore the original flags */
883     r1 = baij->roworiented;
884     r2 = a->roworiented;
885     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
886     baij->roworiented = PETSC_FALSE;
887     a->roworiented    = PETSC_FALSE;
888     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
889     while (1) {
890       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
891       if (!flg) break;
892 
893       for (i=0; i<n;) {
894         /* Now identify the consecutive vals belonging to the same row */
895         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
896         if (j < n) ncols = j-i;
897         else       ncols = n-i;
898         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
899         i = j;
900       }
901     }
902     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
903     baij->roworiented = r1;
904     a->roworiented    = r2;
905     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
906   }
907 
908   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
909   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
910 
911   /* determine if any processor has disassembled, if so we must
912      also disassemble ourselfs, in order that we may reassemble. */
913   /*
914      if nonzero structure of submatrix B cannot change then we know that
915      no processor disassembled thus we can skip this stuff
916   */
917   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
918     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
919     if (mat->was_assembled && !other_disassembled) {
920       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
921     }
922   }
923 
924   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
925     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
926   }
927   ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
928   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
929   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
930 
931 #if defined(PETSC_USE_INFO)
932   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
933     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
934     baij->ht_total_ct  = 0;
935     baij->ht_insert_ct = 0;
936   }
937 #endif
938   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
939     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
940     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
941     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
942   }
943 
944   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
945   baij->rowvalues = 0;
946   PetscFunctionReturn(0);
947 }
948 
949 #undef __FUNCT__
950 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
951 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
952 {
953   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
954   PetscErrorCode    ierr;
955   PetscMPIInt       size = baij->size,rank = baij->rank;
956   PetscInt          bs = mat->rmap->bs;
957   PetscBool         iascii,isdraw;
958   PetscViewer       sviewer;
959   PetscViewerFormat format;
960 
961   PetscFunctionBegin;
962   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
963   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
964   if (iascii) {
965     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
966     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
967       MatInfo info;
968       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
969       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
970       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
971       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
972              rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
973       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
974       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
975       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
976       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
977       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
978       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
979       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
980       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
981       PetscFunctionReturn(0);
982     } else if (format == PETSC_VIEWER_ASCII_INFO) {
983       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
984       PetscFunctionReturn(0);
985     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
986       PetscFunctionReturn(0);
987     }
988   }
989 
990   if (isdraw) {
991     PetscDraw       draw;
992     PetscBool  isnull;
993     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
994     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
995   }
996 
997   if (size == 1) {
998     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
999     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1000   } else {
1001     /* assemble the entire matrix onto first processor. */
1002     Mat         A;
1003     Mat_SeqBAIJ *Aloc;
1004     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1005     MatScalar   *a;
1006 
1007     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1008     /* Perhaps this should be the type of mat? */
1009     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1010     if (!rank) {
1011       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1012     } else {
1013       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1014     }
1015     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1016     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1017     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1018     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1019 
1020     /* copy over the A part */
1021     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1022     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1023     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1024 
1025     for (i=0; i<mbs; i++) {
1026       rvals[0] = bs*(baij->rstartbs + i);
1027       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1028       for (j=ai[i]; j<ai[i+1]; j++) {
1029         col = (baij->cstartbs+aj[j])*bs;
1030         for (k=0; k<bs; k++) {
1031           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1032           col++; a += bs;
1033         }
1034       }
1035     }
1036     /* copy over the B part */
1037     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1038     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1039     for (i=0; i<mbs; i++) {
1040       rvals[0] = bs*(baij->rstartbs + i);
1041       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1042       for (j=ai[i]; j<ai[i+1]; j++) {
1043         col = baij->garray[aj[j]]*bs;
1044         for (k=0; k<bs; k++) {
1045           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1046           col++; a += bs;
1047         }
1048       }
1049     }
1050     ierr = PetscFree(rvals);CHKERRQ(ierr);
1051     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1052     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1053     /*
1054        Everyone has to call to draw the matrix since the graphics waits are
1055        synchronized across all processors that share the PetscDraw object
1056     */
1057     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1058     if (!rank) {
1059       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1060     /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/
1061       PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ);
1062       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1063     }
1064     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1065     ierr = MatDestroy(&A);CHKERRQ(ierr);
1066   }
1067   PetscFunctionReturn(0);
1068 }
1069 
1070 #undef __FUNCT__
1071 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1072 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1073 {
1074   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1075   Mat_SeqBAIJ*   A = (Mat_SeqBAIJ*)a->A->data;
1076   Mat_SeqBAIJ*   B = (Mat_SeqBAIJ*)a->B->data;
1077   PetscErrorCode ierr;
1078   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1079   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1080   int            fd;
1081   PetscScalar    *column_values;
1082   FILE           *file;
1083   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1084   PetscInt       message_count,flowcontrolcount;
1085 
1086   PetscFunctionBegin;
1087   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1088   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1089   nz   = bs2*(A->nz + B->nz);
1090   rlen = mat->rmap->n;
1091   if (!rank) {
1092     header[0] = MAT_FILE_CLASSID;
1093     header[1] = mat->rmap->N;
1094     header[2] = mat->cmap->N;
1095     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1096     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1097     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1098     /* get largest number of rows any processor has */
1099     range = mat->rmap->range;
1100     for (i=1; i<size; i++) {
1101       rlen = PetscMax(rlen,range[i+1] - range[i]);
1102     }
1103   } else {
1104     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1105   }
1106 
1107   ierr  = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr);
1108   /* compute lengths of each row  */
1109   for (i=0; i<a->mbs; i++) {
1110     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1111   }
1112   /* store the row lengths to the file */
1113   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1114   if (!rank) {
1115     MPI_Status status;
1116     ierr  = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr);
1117     rlen  = (range[1] - range[0])/bs;
1118     for (i=0; i<rlen; i++) {
1119       for (j=0; j<bs; j++) {
1120         row_lens[i*bs+j] = bs*crow_lens[i];
1121       }
1122     }
1123     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1124     for (i=1; i<size; i++) {
1125       rlen = (range[i+1] - range[i])/bs;
1126       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1127       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1128       for (k=0; k<rlen; k++) {
1129 	for (j=0; j<bs; j++) {
1130 	  row_lens[k*bs+j] = bs*crow_lens[k];
1131 	}
1132       }
1133       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1134     }
1135     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1136     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1137   } else {
1138     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1139     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1140     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1141   }
1142   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1143 
1144   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1145      information needed to make it for each row from a block row. This does require more communication but still not more than
1146      the communication needed for the nonzero values  */
1147   nzmax = nz; /*  space a largest processor needs */
1148   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1149   ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1150   cnt  = 0;
1151   for (i=0; i<a->mbs; i++) {
1152     pcnt = cnt;
1153     for (j=B->i[i]; j<B->i[i+1]; j++) {
1154       if ( (col = garray[B->j[j]]) > cstart) break;
1155       for (l=0; l<bs; l++) {
1156 	column_indices[cnt++] = bs*col+l;
1157       }
1158     }
1159     for (k=A->i[i]; k<A->i[i+1]; k++) {
1160       for (l=0; l<bs; l++) {
1161         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1162       }
1163     }
1164     for (; j<B->i[i+1]; j++) {
1165       for (l=0; l<bs; l++) {
1166         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1167       }
1168     }
1169     len = cnt - pcnt;
1170     for (k=1; k<bs; k++) {
1171       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1172       cnt += len;
1173     }
1174   }
1175   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1176 
1177   /* store the columns to the file */
1178   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1179   if (!rank) {
1180     MPI_Status status;
1181     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1182     for (i=1; i<size; i++) {
1183       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1184       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1185       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1186       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1187     }
1188     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1189   } else {
1190     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1191     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1192     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1193     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1194   }
1195   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1196 
1197   /* load up the numerical values */
1198   ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1199   cnt = 0;
1200   for (i=0; i<a->mbs; i++) {
1201     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1202     for (j=B->i[i]; j<B->i[i+1]; j++) {
1203       if ( garray[B->j[j]] > cstart) break;
1204       for (l=0; l<bs; l++) {
1205         for (ll=0; ll<bs; ll++) {
1206 	  column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1207         }
1208       }
1209       cnt += bs;
1210     }
1211     for (k=A->i[i]; k<A->i[i+1]; k++) {
1212       for (l=0; l<bs; l++) {
1213         for (ll=0; ll<bs; ll++) {
1214           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1215         }
1216       }
1217       cnt += bs;
1218     }
1219     for (; j<B->i[i+1]; j++) {
1220       for (l=0; l<bs; l++) {
1221         for (ll=0; ll<bs; ll++) {
1222 	  column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1223         }
1224       }
1225       cnt += bs;
1226     }
1227     cnt += (bs-1)*rlen;
1228   }
1229   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1230 
1231   /* store the column values to the file */
1232   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1233   if (!rank) {
1234     MPI_Status status;
1235     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1236     for (i=1; i<size; i++) {
1237       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1238       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1239       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1240       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1241     }
1242     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1243   } else {
1244     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1245     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1246     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1247     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1248   }
1249   ierr = PetscFree(column_values);CHKERRQ(ierr);
1250 
1251   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1252   if (file) {
1253     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1254   }
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #undef __FUNCT__
1259 #define __FUNCT__ "MatView_MPIBAIJ"
1260 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1261 {
1262   PetscErrorCode ierr;
1263   PetscBool      iascii,isdraw,issocket,isbinary;
1264 
1265   PetscFunctionBegin;
1266   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1267   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1268   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1269   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1270   if (iascii || isdraw || issocket) {
1271     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1272   } else if (isbinary) {
1273     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1274   } else {
1275     SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1276   }
1277   PetscFunctionReturn(0);
1278 }
1279 
1280 #undef __FUNCT__
1281 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1282 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1283 {
1284   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1285   PetscErrorCode ierr;
1286 
1287   PetscFunctionBegin;
1288 #if defined(PETSC_USE_LOG)
1289   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1290 #endif
1291   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1292   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1293   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1294   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1295 #if defined (PETSC_USE_CTABLE)
1296   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1297 #else
1298   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1299 #endif
1300   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1301   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1302   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1303   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1304   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1305   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1306   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1307   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1308 
1309   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1310   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1311   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1312   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1313   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1314   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1315   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1316   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1317   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1318   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C","",PETSC_NULL);CHKERRQ(ierr);
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 #undef __FUNCT__
1323 #define __FUNCT__ "MatMult_MPIBAIJ"
1324 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1325 {
1326   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1327   PetscErrorCode ierr;
1328   PetscInt       nt;
1329 
1330   PetscFunctionBegin;
1331   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1332   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1333   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1334   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1335   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1336   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1337   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1338   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1339   PetscFunctionReturn(0);
1340 }
1341 
1342 #undef __FUNCT__
1343 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1344 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1345 {
1346   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1347   PetscErrorCode ierr;
1348 
1349   PetscFunctionBegin;
1350   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1351   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1352   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1353   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1354   PetscFunctionReturn(0);
1355 }
1356 
1357 #undef __FUNCT__
1358 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1359 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1360 {
1361   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1362   PetscErrorCode ierr;
1363   PetscBool      merged;
1364 
1365   PetscFunctionBegin;
1366   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1367   /* do nondiagonal part */
1368   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1369   if (!merged) {
1370     /* send it on its way */
1371     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1372     /* do local part */
1373     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1374     /* receive remote parts: note this assumes the values are not actually */
1375     /* inserted in yy until the next line */
1376     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1377   } else {
1378     /* do local part */
1379     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1380     /* send it on its way */
1381     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1382     /* values actually were received in the Begin() but we need to call this nop */
1383     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1384   }
1385   PetscFunctionReturn(0);
1386 }
1387 
1388 #undef __FUNCT__
1389 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1390 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1391 {
1392   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1393   PetscErrorCode ierr;
1394 
1395   PetscFunctionBegin;
1396   /* do nondiagonal part */
1397   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1398   /* send it on its way */
1399   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1400   /* do local part */
1401   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1402   /* receive remote parts: note this assumes the values are not actually */
1403   /* inserted in yy until the next line, which is true for my implementation*/
1404   /* but is not perhaps always true. */
1405   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1406   PetscFunctionReturn(0);
1407 }
1408 
1409 /*
1410   This only works correctly for square matrices where the subblock A->A is the
1411    diagonal block
1412 */
1413 #undef __FUNCT__
1414 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1415 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1416 {
1417   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1418   PetscErrorCode ierr;
1419 
1420   PetscFunctionBegin;
1421   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1422   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1423   PetscFunctionReturn(0);
1424 }
1425 
1426 #undef __FUNCT__
1427 #define __FUNCT__ "MatScale_MPIBAIJ"
1428 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1429 {
1430   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1431   PetscErrorCode ierr;
1432 
1433   PetscFunctionBegin;
1434   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1435   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1436   PetscFunctionReturn(0);
1437 }
1438 
1439 #undef __FUNCT__
1440 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1441 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1442 {
1443   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1444   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1445   PetscErrorCode ierr;
1446   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1447   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1448   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1449 
1450   PetscFunctionBegin;
1451   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1452   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1453   mat->getrowactive = PETSC_TRUE;
1454 
1455   if (!mat->rowvalues && (idx || v)) {
1456     /*
1457         allocate enough space to hold information from the longest row.
1458     */
1459     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1460     PetscInt     max = 1,mbs = mat->mbs,tmp;
1461     for (i=0; i<mbs; i++) {
1462       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1463       if (max < tmp) { max = tmp; }
1464     }
1465     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1466   }
1467   lrow = row - brstart;
1468 
1469   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1470   if (!v)   {pvA = 0; pvB = 0;}
1471   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1472   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1473   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1474   nztot = nzA + nzB;
1475 
1476   cmap  = mat->garray;
1477   if (v  || idx) {
1478     if (nztot) {
1479       /* Sort by increasing column numbers, assuming A and B already sorted */
1480       PetscInt imark = -1;
1481       if (v) {
1482         *v = v_p = mat->rowvalues;
1483         for (i=0; i<nzB; i++) {
1484           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1485           else break;
1486         }
1487         imark = i;
1488         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1489         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1490       }
1491       if (idx) {
1492         *idx = idx_p = mat->rowindices;
1493         if (imark > -1) {
1494           for (i=0; i<imark; i++) {
1495             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1496           }
1497         } else {
1498           for (i=0; i<nzB; i++) {
1499             if (cmap[cworkB[i]/bs] < cstart)
1500               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1501             else break;
1502           }
1503           imark = i;
1504         }
1505         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1506         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1507       }
1508     } else {
1509       if (idx) *idx = 0;
1510       if (v)   *v   = 0;
1511     }
1512   }
1513   *nz = nztot;
1514   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1515   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1516   PetscFunctionReturn(0);
1517 }
1518 
1519 #undef __FUNCT__
1520 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1521 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1522 {
1523   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1524 
1525   PetscFunctionBegin;
1526   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1527   baij->getrowactive = PETSC_FALSE;
1528   PetscFunctionReturn(0);
1529 }
1530 
1531 #undef __FUNCT__
1532 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1533 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1534 {
1535   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1536   PetscErrorCode ierr;
1537 
1538   PetscFunctionBegin;
1539   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1540   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1541   PetscFunctionReturn(0);
1542 }
1543 
1544 #undef __FUNCT__
1545 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1546 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1547 {
1548   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1549   Mat            A = a->A,B = a->B;
1550   PetscErrorCode ierr;
1551   PetscReal      isend[5],irecv[5];
1552 
1553   PetscFunctionBegin;
1554   info->block_size     = (PetscReal)matin->rmap->bs;
1555   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1556   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1557   isend[3] = info->memory;  isend[4] = info->mallocs;
1558   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1559   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1560   isend[3] += info->memory;  isend[4] += info->mallocs;
1561   if (flag == MAT_LOCAL) {
1562     info->nz_used      = isend[0];
1563     info->nz_allocated = isend[1];
1564     info->nz_unneeded  = isend[2];
1565     info->memory       = isend[3];
1566     info->mallocs      = isend[4];
1567   } else if (flag == MAT_GLOBAL_MAX) {
1568     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1569     info->nz_used      = irecv[0];
1570     info->nz_allocated = irecv[1];
1571     info->nz_unneeded  = irecv[2];
1572     info->memory       = irecv[3];
1573     info->mallocs      = irecv[4];
1574   } else if (flag == MAT_GLOBAL_SUM) {
1575     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1576     info->nz_used      = irecv[0];
1577     info->nz_allocated = irecv[1];
1578     info->nz_unneeded  = irecv[2];
1579     info->memory       = irecv[3];
1580     info->mallocs      = irecv[4];
1581   } else {
1582     SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1583   }
1584   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1585   info->fill_ratio_needed = 0;
1586   info->factor_mallocs    = 0;
1587   PetscFunctionReturn(0);
1588 }
1589 
1590 #undef __FUNCT__
1591 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1592 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool  flg)
1593 {
1594   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1595   PetscErrorCode ierr;
1596 
1597   PetscFunctionBegin;
1598   switch (op) {
1599   case MAT_NEW_NONZERO_LOCATIONS:
1600   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1601   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1602   case MAT_KEEP_NONZERO_PATTERN:
1603   case MAT_NEW_NONZERO_LOCATION_ERR:
1604     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1605     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1606     break;
1607   case MAT_ROW_ORIENTED:
1608     a->roworiented = flg;
1609     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1610     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1611     break;
1612   case MAT_NEW_DIAGONALS:
1613     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1614     break;
1615   case MAT_IGNORE_OFF_PROC_ENTRIES:
1616     a->donotstash = flg;
1617     break;
1618   case MAT_USE_HASH_TABLE:
1619     a->ht_flag = flg;
1620     break;
1621   case MAT_SYMMETRIC:
1622   case MAT_STRUCTURALLY_SYMMETRIC:
1623   case MAT_HERMITIAN:
1624   case MAT_SYMMETRY_ETERNAL:
1625     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1626     break;
1627   default:
1628     SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op);
1629   }
1630   PetscFunctionReturn(0);
1631 }
1632 
1633 #undef __FUNCT__
1634 #define __FUNCT__ "MatTranspose_MPIBAIJ"
1635 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1636 {
1637   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1638   Mat_SeqBAIJ    *Aloc;
1639   Mat            B;
1640   PetscErrorCode ierr;
1641   PetscInt       M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1642   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1643   MatScalar      *a;
1644 
1645   PetscFunctionBegin;
1646   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1647   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1648     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1649     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1650     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1651     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1652   } else {
1653     B = *matout;
1654   }
1655 
1656   /* copy over the A part */
1657   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1658   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1659   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1660 
1661   for (i=0; i<mbs; i++) {
1662     rvals[0] = bs*(baij->rstartbs + i);
1663     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1664     for (j=ai[i]; j<ai[i+1]; j++) {
1665       col = (baij->cstartbs+aj[j])*bs;
1666       for (k=0; k<bs; k++) {
1667         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1668         col++; a += bs;
1669       }
1670     }
1671   }
1672   /* copy over the B part */
1673   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1674   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1675   for (i=0; i<mbs; i++) {
1676     rvals[0] = bs*(baij->rstartbs + i);
1677     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1678     for (j=ai[i]; j<ai[i+1]; j++) {
1679       col = baij->garray[aj[j]]*bs;
1680       for (k=0; k<bs; k++) {
1681         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1682         col++; a += bs;
1683       }
1684     }
1685   }
1686   ierr = PetscFree(rvals);CHKERRQ(ierr);
1687   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1688   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1689 
1690   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1691     *matout = B;
1692   } else {
1693     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1694   }
1695   PetscFunctionReturn(0);
1696 }
1697 
1698 #undef __FUNCT__
1699 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1700 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1701 {
1702   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1703   Mat            a = baij->A,b = baij->B;
1704   PetscErrorCode ierr;
1705   PetscInt       s1,s2,s3;
1706 
1707   PetscFunctionBegin;
1708   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1709   if (rr) {
1710     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1711     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1712     /* Overlap communication with computation. */
1713     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1714   }
1715   if (ll) {
1716     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1717     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1718     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1719   }
1720   /* scale  the diagonal block */
1721   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1722 
1723   if (rr) {
1724     /* Do a scatter end and then right scale the off-diagonal block */
1725     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1726     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1727   }
1728 
1729   PetscFunctionReturn(0);
1730 }
1731 
1732 #undef __FUNCT__
1733 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1734 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1735 {
1736   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1737   PetscErrorCode    ierr;
1738   PetscMPIInt       imdex,size = l->size,n,rank = l->rank;
1739   PetscInt          i,*owners = A->rmap->range;
1740   PetscInt          *nprocs,j,idx,nsends,row;
1741   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
1742   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1743   PetscInt          *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1744   MPI_Comm          comm = ((PetscObject)A)->comm;
1745   MPI_Request       *send_waits,*recv_waits;
1746   MPI_Status        recv_status,*send_status;
1747   const PetscScalar *xx;
1748   PetscScalar       *bb;
1749 #if defined(PETSC_DEBUG)
1750   PetscBool         found = PETSC_FALSE;
1751 #endif
1752 
1753   PetscFunctionBegin;
1754   /*  first count number of contributors to each processor */
1755   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1756   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1757   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1758   j = 0;
1759   for (i=0; i<N; i++) {
1760     if (lastidx > (idx = rows[i])) j = 0;
1761     lastidx = idx;
1762     for (; j<size; j++) {
1763       if (idx >= owners[j] && idx < owners[j+1]) {
1764         nprocs[2*j]++;
1765         nprocs[2*j+1] = 1;
1766         owner[i] = j;
1767 #if defined(PETSC_DEBUG)
1768         found = PETSC_TRUE;
1769 #endif
1770         break;
1771       }
1772     }
1773 #if defined(PETSC_DEBUG)
1774     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1775     found = PETSC_FALSE;
1776 #endif
1777   }
1778   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1779 
1780   if (A->nooffproczerorows) {
1781     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
1782     nrecvs = nsends;
1783     nmax   = N;
1784   } else {
1785     /* inform other processors of number of messages and max length*/
1786     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1787   }
1788 
1789   /* post receives:   */
1790   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1791   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1792   for (i=0; i<nrecvs; i++) {
1793     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1794   }
1795 
1796   /* do sends:
1797      1) starts[i] gives the starting index in svalues for stuff going to
1798      the ith processor
1799   */
1800   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1801   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1802   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1803   starts[0]  = 0;
1804   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1805   for (i=0; i<N; i++) {
1806     svalues[starts[owner[i]]++] = rows[i];
1807   }
1808 
1809   starts[0] = 0;
1810   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1811   count = 0;
1812   for (i=0; i<size; i++) {
1813     if (nprocs[2*i+1]) {
1814       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1815     }
1816   }
1817   ierr = PetscFree(starts);CHKERRQ(ierr);
1818 
1819   base = owners[rank];
1820 
1821   /*  wait on receives */
1822   ierr   = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr);
1823   count  = nrecvs;
1824   slen = 0;
1825   while (count) {
1826     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1827     /* unpack receives into our local space */
1828     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1829     source[imdex]  = recv_status.MPI_SOURCE;
1830     lens[imdex]    = n;
1831     slen          += n;
1832     count--;
1833   }
1834   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1835 
1836   /* move the data into the send scatter */
1837   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1838   count = 0;
1839   for (i=0; i<nrecvs; i++) {
1840     values = rvalues + i*nmax;
1841     for (j=0; j<lens[i]; j++) {
1842       lrows[count++] = values[j] - base;
1843     }
1844   }
1845   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1846   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1847   ierr = PetscFree(owner);CHKERRQ(ierr);
1848   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1849 
1850   /* fix right hand side if needed */
1851   if (x && b) {
1852     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1853     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1854     for (i=0; i<slen; i++) {
1855       bb[lrows[i]] = diag*xx[lrows[i]];
1856     }
1857     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1858     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1859   }
1860 
1861   /* actually zap the local rows */
1862   /*
1863         Zero the required rows. If the "diagonal block" of the matrix
1864      is square and the user wishes to set the diagonal we use separate
1865      code so that MatSetValues() is not called for each diagonal allocating
1866      new memory, thus calling lots of mallocs and slowing things down.
1867 
1868   */
1869   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1870   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1871   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1872     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
1873   } else if (diag != 0.0) {
1874     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1875     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1876        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1877     for (i=0; i<slen; i++) {
1878       row  = lrows[i] + rstart_bs;
1879       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1880     }
1881     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1882     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1883   } else {
1884     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1885   }
1886 
1887   ierr = PetscFree(lrows);CHKERRQ(ierr);
1888 
1889   /* wait on sends */
1890   if (nsends) {
1891     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1892     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1893     ierr = PetscFree(send_status);CHKERRQ(ierr);
1894   }
1895   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1896   ierr = PetscFree(svalues);CHKERRQ(ierr);
1897 
1898   PetscFunctionReturn(0);
1899 }
1900 
1901 #undef __FUNCT__
1902 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1903 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1904 {
1905   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1906   PetscErrorCode ierr;
1907 
1908   PetscFunctionBegin;
1909   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1910   PetscFunctionReturn(0);
1911 }
1912 
1913 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1914 
1915 #undef __FUNCT__
1916 #define __FUNCT__ "MatEqual_MPIBAIJ"
1917 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1918 {
1919   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1920   Mat            a,b,c,d;
1921   PetscBool      flg;
1922   PetscErrorCode ierr;
1923 
1924   PetscFunctionBegin;
1925   a = matA->A; b = matA->B;
1926   c = matB->A; d = matB->B;
1927 
1928   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1929   if (flg) {
1930     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1931   }
1932   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1933   PetscFunctionReturn(0);
1934 }
1935 
1936 #undef __FUNCT__
1937 #define __FUNCT__ "MatCopy_MPIBAIJ"
1938 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1939 {
1940   PetscErrorCode ierr;
1941   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1942   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1943 
1944   PetscFunctionBegin;
1945   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1946   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1947     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1948   } else {
1949     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1950     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1951   }
1952   PetscFunctionReturn(0);
1953 }
1954 
1955 #undef __FUNCT__
1956 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1957 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1958 {
1959   PetscErrorCode ierr;
1960 
1961   PetscFunctionBegin;
1962   ierr =  MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1963   PetscFunctionReturn(0);
1964 }
1965 
1966 #undef __FUNCT__
1967 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1968 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1969 {
1970   PetscErrorCode ierr;
1971   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1972   PetscBLASInt   bnz,one=1;
1973   Mat_SeqBAIJ    *x,*y;
1974 
1975   PetscFunctionBegin;
1976   if (str == SAME_NONZERO_PATTERN) {
1977     PetscScalar alpha = a;
1978     x = (Mat_SeqBAIJ *)xx->A->data;
1979     y = (Mat_SeqBAIJ *)yy->A->data;
1980     bnz = PetscBLASIntCast(x->nz);
1981     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1982     x = (Mat_SeqBAIJ *)xx->B->data;
1983     y = (Mat_SeqBAIJ *)yy->B->data;
1984     bnz = PetscBLASIntCast(x->nz);
1985     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1986   } else {
1987     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1988   }
1989   PetscFunctionReturn(0);
1990 }
1991 
1992 #undef __FUNCT__
1993 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ"
1994 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs)
1995 {
1996   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1997   PetscInt rbs,cbs;
1998   PetscErrorCode ierr;
1999 
2000   PetscFunctionBegin;
2001   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
2002   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
2003   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
2004   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
2005   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
2006   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
2007   PetscFunctionReturn(0);
2008 }
2009 
2010 #undef __FUNCT__
2011 #define __FUNCT__ "MatRealPart_MPIBAIJ"
2012 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
2013 {
2014   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
2015   PetscErrorCode ierr;
2016 
2017   PetscFunctionBegin;
2018   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2019   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2020   PetscFunctionReturn(0);
2021 }
2022 
2023 #undef __FUNCT__
2024 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
2025 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2026 {
2027   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
2028   PetscErrorCode ierr;
2029 
2030   PetscFunctionBegin;
2031   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2032   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2033   PetscFunctionReturn(0);
2034 }
2035 
2036 #undef __FUNCT__
2037 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
2038 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2039 {
2040   PetscErrorCode ierr;
2041   IS             iscol_local;
2042   PetscInt       csize;
2043 
2044   PetscFunctionBegin;
2045   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2046   if (call == MAT_REUSE_MATRIX) {
2047     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2048     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2049   } else {
2050     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2051   }
2052   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2053   if (call == MAT_INITIAL_MATRIX) {
2054     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2055     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2056   }
2057   PetscFunctionReturn(0);
2058 }
2059 
2060 #undef __FUNCT__
2061 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
2062 /*
2063     Not great since it makes two copies of the submatrix, first an SeqBAIJ
2064   in local and then by concatenating the local matrices the end result.
2065   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
2066 */
2067 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2068 {
2069   PetscErrorCode ierr;
2070   PetscMPIInt    rank,size;
2071   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2072   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2073   Mat            *local,M,Mreuse;
2074   MatScalar      *vwork,*aa;
2075   MPI_Comm       comm = ((PetscObject)mat)->comm;
2076   Mat_SeqBAIJ    *aij;
2077 
2078 
2079   PetscFunctionBegin;
2080   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2081   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2082 
2083   if (call ==  MAT_REUSE_MATRIX) {
2084     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2085     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2086     local = &Mreuse;
2087     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2088   } else {
2089     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2090     Mreuse = *local;
2091     ierr   = PetscFree(local);CHKERRQ(ierr);
2092   }
2093 
2094   /*
2095       m - number of local rows
2096       n - number of columns (same on all processors)
2097       rstart - first row in new global matrix generated
2098   */
2099   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2100   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2101   m    = m/bs;
2102   n    = n/bs;
2103 
2104   if (call == MAT_INITIAL_MATRIX) {
2105     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2106     ii  = aij->i;
2107     jj  = aij->j;
2108 
2109     /*
2110         Determine the number of non-zeros in the diagonal and off-diagonal
2111         portions of the matrix in order to do correct preallocation
2112     */
2113 
2114     /* first get start and end of "diagonal" columns */
2115     if (csize == PETSC_DECIDE) {
2116       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2117       if (mglobal == n*bs) { /* square matrix */
2118 	nlocal = m;
2119       } else {
2120         nlocal = n/size + ((n % size) > rank);
2121       }
2122     } else {
2123       nlocal = csize/bs;
2124     }
2125     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2126     rstart = rend - nlocal;
2127     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2128 
2129     /* next, compute all the lengths */
2130     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2131     olens = dlens + m;
2132     for (i=0; i<m; i++) {
2133       jend = ii[i+1] - ii[i];
2134       olen = 0;
2135       dlen = 0;
2136       for (j=0; j<jend; j++) {
2137         if (*jj < rstart || *jj >= rend) olen++;
2138         else dlen++;
2139         jj++;
2140       }
2141       olens[i] = olen;
2142       dlens[i] = dlen;
2143     }
2144     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2145     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2146     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2147     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2148     ierr = PetscFree(dlens);CHKERRQ(ierr);
2149   } else {
2150     PetscInt ml,nl;
2151 
2152     M = *newmat;
2153     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2154     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2155     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2156     /*
2157          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2158        rather than the slower MatSetValues().
2159     */
2160     M->was_assembled = PETSC_TRUE;
2161     M->assembled     = PETSC_FALSE;
2162   }
2163   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2164   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2165   aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2166   ii  = aij->i;
2167   jj  = aij->j;
2168   aa  = aij->a;
2169   for (i=0; i<m; i++) {
2170     row   = rstart/bs + i;
2171     nz    = ii[i+1] - ii[i];
2172     cwork = jj;     jj += nz;
2173     vwork = aa;     aa += nz;
2174     ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2175   }
2176 
2177   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2178   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2179   *newmat = M;
2180 
2181   /* save submatrix used in processor for next request */
2182   if (call ==  MAT_INITIAL_MATRIX) {
2183     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2184     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2185   }
2186 
2187   PetscFunctionReturn(0);
2188 }
2189 
2190 #undef __FUNCT__
2191 #define __FUNCT__ "MatPermute_MPIBAIJ"
2192 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2193 {
2194   MPI_Comm       comm,pcomm;
2195   PetscInt       first,local_size,nrows;
2196   const PetscInt *rows;
2197   PetscMPIInt    size;
2198   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
2199   PetscErrorCode ierr;
2200 
2201   PetscFunctionBegin;
2202   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2203   /* make a collective version of 'rowp' */
2204   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2205   if (pcomm==comm) {
2206     crowp = rowp;
2207   } else {
2208     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2209     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2210     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2211     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2212   }
2213   /* collect the global row permutation and invert it */
2214   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
2215   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
2216   if (pcomm!=comm) {
2217     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2218   }
2219   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
2220   /* get the local target indices */
2221   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
2222   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
2223   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
2224   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
2225   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
2226   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
2227   /* the column permutation is so much easier;
2228      make a local version of 'colp' and invert it */
2229   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2230   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2231   if (size==1) {
2232     lcolp = colp;
2233   } else {
2234     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
2235     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
2236     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr);
2237   }
2238   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2239   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
2240   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
2241   if (size>1) {
2242     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
2243     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2244   }
2245   /* now we just get the submatrix */
2246   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2247   /* clean up */
2248   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
2249   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
2250   PetscFunctionReturn(0);
2251 }
2252 
2253 #undef __FUNCT__
2254 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2255 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2256 {
2257   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*) mat->data;
2258   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
2259 
2260   PetscFunctionBegin;
2261   if (nghosts) { *nghosts = B->nbs;}
2262   if (ghosts) {*ghosts = baij->garray;}
2263   PetscFunctionReturn(0);
2264 }
2265 
2266 extern PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat);
2267 
2268 #undef __FUNCT__
2269 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ"
2270 /*
2271     This routine is almost identical to MatFDColoringCreate_MPIBAIJ()!
2272 */
2273 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
2274 {
2275   Mat_MPIBAIJ            *baij = (Mat_MPIBAIJ*)mat->data;
2276   PetscErrorCode        ierr;
2277   PetscMPIInt           size,*ncolsonproc,*disp,nn;
2278   PetscInt              bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col;
2279   const PetscInt        *is;
2280   PetscInt              nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj;
2281   PetscInt              *rowhit,M,cstart,cend,colb;
2282   PetscInt              *columnsforrow,l;
2283   IS                    *isa;
2284   PetscBool              done,flg;
2285   ISLocalToGlobalMapping map = mat->cmap->bmapping;
2286   PetscInt               *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype;
2287 
2288   PetscFunctionBegin;
2289   if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();");
2290   if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock");
2291 
2292   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
2293   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2294   M                = mat->rmap->n/bs;
2295   cstart           = mat->cmap->rstart/bs;
2296   cend             = mat->cmap->rend/bs;
2297   c->M             = mat->rmap->N/bs;  /* set the global rows and columns and local rows */
2298   c->N             = mat->cmap->N/bs;
2299   c->m             = mat->rmap->n/bs;
2300   c->rstart        = mat->rmap->rstart/bs;
2301 
2302   c->ncolors       = nis;
2303   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
2304   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
2305   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
2306   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
2307   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
2308   ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
2309 
2310   /* Allow access to data structures of local part of matrix */
2311   if (!baij->colmap) {
2312     ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
2313   }
2314   ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2315   ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2316 
2317   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
2318   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
2319 
2320   for (i=0; i<nis; i++) {
2321     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
2322     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
2323     c->ncolumns[i] = n;
2324     if (n) {
2325       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
2326       ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr);
2327       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
2328     } else {
2329       c->columns[i]  = 0;
2330     }
2331 
2332     if (ctype == IS_COLORING_GLOBAL){
2333       /* Determine the total (parallel) number of columns of this color */
2334       ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
2335       ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr);
2336 
2337       nn   = PetscMPIIntCast(n);
2338       ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2339       nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];}
2340       if (!nctot) {
2341         ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
2342       }
2343 
2344       disp[0] = 0;
2345       for (j=1; j<size; j++) {
2346         disp[j] = disp[j-1] + ncolsonproc[j-1];
2347       }
2348 
2349       /* Get complete list of columns for color on each processor */
2350       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2351       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2352       ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr);
2353     } else if (ctype == IS_COLORING_GHOSTED){
2354       /* Determine local number of columns of this color on this process, including ghost points */
2355       nctot = n;
2356       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2357       ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
2358     } else {
2359       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
2360     }
2361 
2362     /*
2363        Mark all rows affect by these columns
2364     */
2365     /* Temporary option to allow for debugging/testing */
2366     flg  = PETSC_FALSE;
2367     ierr = PetscOptionsGetBool(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr);
2368     if (!flg) {/*-----------------------------------------------------------------------------*/
2369       /* crude, fast version */
2370       ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
2371       /* loop over columns*/
2372       for (j=0; j<nctot; j++) {
2373         if (ctype == IS_COLORING_GHOSTED) {
2374           col = ltog[cols[j]];
2375         } else {
2376           col  = cols[j];
2377         }
2378         if (col >= cstart && col < cend) {
2379           /* column is in diagonal block of matrix */
2380           rows = A_cj + A_ci[col-cstart];
2381           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2382         } else {
2383 #if defined (PETSC_USE_CTABLE)
2384           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2385 	  colb --;
2386 #else
2387           colb = baij->colmap[col] - 1;
2388 #endif
2389           if (colb == -1) {
2390             m = 0;
2391           } else {
2392             colb = colb/bs;
2393             rows = B_cj + B_ci[colb];
2394             m    = B_ci[colb+1] - B_ci[colb];
2395           }
2396         }
2397         /* loop over columns marking them in rowhit */
2398         for (k=0; k<m; k++) {
2399           rowhit[*rows++] = col + 1;
2400         }
2401       }
2402 
2403       /* count the number of hits */
2404       nrows = 0;
2405       for (j=0; j<M; j++) {
2406         if (rowhit[j]) nrows++;
2407       }
2408       c->nrows[i]         = nrows;
2409       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2410       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2411       ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2412       nrows = 0;
2413       for (j=0; j<M; j++) {
2414         if (rowhit[j]) {
2415           c->rows[i][nrows]           = j;
2416           c->columnsforrow[i][nrows] = rowhit[j] - 1;
2417           nrows++;
2418         }
2419       }
2420     } else {/*-------------------------------------------------------------------------------*/
2421       /* slow version, using rowhit as a linked list */
2422       PetscInt currentcol,fm,mfm;
2423       rowhit[M] = M;
2424       nrows     = 0;
2425       /* loop over columns*/
2426       for (j=0; j<nctot; j++) {
2427         if (ctype == IS_COLORING_GHOSTED) {
2428           col = ltog[cols[j]];
2429         } else {
2430           col  = cols[j];
2431         }
2432         if (col >= cstart && col < cend) {
2433           /* column is in diagonal block of matrix */
2434           rows = A_cj + A_ci[col-cstart];
2435           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2436         } else {
2437 #if defined (PETSC_USE_CTABLE)
2438           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2439           colb --;
2440 #else
2441           colb = baij->colmap[col] - 1;
2442 #endif
2443           if (colb == -1) {
2444             m = 0;
2445           } else {
2446             colb = colb/bs;
2447             rows = B_cj + B_ci[colb];
2448             m    = B_ci[colb+1] - B_ci[colb];
2449           }
2450         }
2451 
2452         /* loop over columns marking them in rowhit */
2453         fm    = M; /* fm points to first entry in linked list */
2454         for (k=0; k<m; k++) {
2455           currentcol = *rows++;
2456 	  /* is it already in the list? */
2457           do {
2458             mfm  = fm;
2459             fm   = rowhit[fm];
2460           } while (fm < currentcol);
2461           /* not in list so add it */
2462           if (fm != currentcol) {
2463             nrows++;
2464             columnsforrow[currentcol] = col;
2465             /* next three lines insert new entry into linked list */
2466             rowhit[mfm]               = currentcol;
2467             rowhit[currentcol]        = fm;
2468             fm                        = currentcol;
2469             /* fm points to present position in list since we know the columns are sorted */
2470           } else {
2471             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
2472           }
2473         }
2474       }
2475       c->nrows[i]         = nrows;
2476       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2477       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2478       ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2479       /* now store the linked list of rows into c->rows[i] */
2480       nrows = 0;
2481       fm    = rowhit[M];
2482       do {
2483         c->rows[i][nrows]            = fm;
2484         c->columnsforrow[i][nrows++] = columnsforrow[fm];
2485         fm                           = rowhit[fm];
2486       } while (fm < M);
2487     } /* ---------------------------------------------------------------------------------------*/
2488     ierr = PetscFree(cols);CHKERRQ(ierr);
2489   }
2490 
2491   /* Optimize by adding the vscale, and scaleforrow[][] fields */
2492   /*
2493        vscale will contain the "diagonal" on processor scalings followed by the off processor
2494   */
2495   if (ctype == IS_COLORING_GLOBAL) {
2496     PetscInt *garray;
2497     ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2498     for (i=0; i<baij->B->cmap->n/bs; i++) {
2499       for (j=0; j<bs; j++) {
2500         garray[i*bs+j] = bs*baij->garray[i]+j;
2501       }
2502     }
2503     ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr);
2504     ierr = PetscFree(garray);CHKERRQ(ierr);
2505     CHKMEMQ;
2506     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2507     for (k=0; k<c->ncolors; k++) {
2508       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2509       for (l=0; l<c->nrows[k]; l++) {
2510         col = c->columnsforrow[k][l];
2511         if (col >= cstart && col < cend) {
2512           /* column is in diagonal block of matrix */
2513           colb = col - cstart;
2514         } else {
2515           /* column  is in "off-processor" part */
2516 #if defined (PETSC_USE_CTABLE)
2517           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2518           colb --;
2519 #else
2520           colb = baij->colmap[col] - 1;
2521 #endif
2522           colb = colb/bs;
2523           colb += cend - cstart;
2524         }
2525         c->vscaleforrow[k][l] = colb;
2526       }
2527     }
2528   } else if (ctype == IS_COLORING_GHOSTED) {
2529     /* Get gtol mapping */
2530     PetscInt N = mat->cmap->N, *gtol;
2531     ierr = PetscMalloc((N+1)*sizeof(PetscInt),&gtol);CHKERRQ(ierr);
2532     for (i=0; i<N; i++) gtol[i] = -1;
2533     for (i=0; i<map->n; i++) gtol[ltog[i]] = i;
2534 
2535     c->vscale = 0; /* will be created in MatFDColoringApply() */
2536     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2537     for (k=0; k<c->ncolors; k++) {
2538       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2539       for (l=0; l<c->nrows[k]; l++) {
2540         col = c->columnsforrow[k][l];      /* global column index */
2541         c->vscaleforrow[k][l] = gtol[col]; /* local column index */
2542       }
2543     }
2544     ierr = PetscFree(gtol);CHKERRQ(ierr);
2545   }
2546   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
2547 
2548   ierr = PetscFree(rowhit);CHKERRQ(ierr);
2549   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
2550   ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2551   ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2552     CHKMEMQ;
2553   PetscFunctionReturn(0);
2554 }
2555 
2556 #undef __FUNCT__
2557 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ"
2558 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2559 {
2560   Mat            B;
2561   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
2562   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2563   Mat_SeqAIJ     *b;
2564   PetscErrorCode ierr;
2565   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2566   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2567   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2568 
2569   PetscFunctionBegin;
2570   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2571   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2572 
2573   /* ----------------------------------------------------------------
2574      Tell every processor the number of nonzeros per row
2575   */
2576   ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
2577   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2578     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2579   }
2580   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2581   ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr);
2582   displs     = recvcounts + size;
2583   for (i=0; i<size; i++) {
2584     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2585     displs[i]     = A->rmap->range[i]/bs;
2586   }
2587 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2588   ierr  = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2589 #else
2590   ierr  = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2591 #endif
2592   /* ---------------------------------------------------------------
2593      Create the sequential matrix of the same type as the local block diagonal
2594   */
2595   ierr  = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2596   ierr  = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2597   ierr  = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2598   ierr  = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2599   b = (Mat_SeqAIJ *)B->data;
2600 
2601   /*--------------------------------------------------------------------
2602     Copy my part of matrix column indices over
2603   */
2604   sendcount  = ad->nz + bd->nz;
2605   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2606   a_jsendbuf = ad->j;
2607   b_jsendbuf = bd->j;
2608   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2609   cnt        = 0;
2610   for (i=0; i<n; i++) {
2611 
2612     /* put in lower diagonal portion */
2613     m = bd->i[i+1] - bd->i[i];
2614     while (m > 0) {
2615       /* is it above diagonal (in bd (compressed) numbering) */
2616       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2617       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2618       m--;
2619     }
2620 
2621     /* put in diagonal portion */
2622     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2623       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2624     }
2625 
2626     /* put in upper diagonal portion */
2627     while (m-- > 0) {
2628       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2629     }
2630   }
2631   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2632 
2633   /*--------------------------------------------------------------------
2634     Gather all column indices to all processors
2635   */
2636   for (i=0; i<size; i++) {
2637     recvcounts[i] = 0;
2638     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2639       recvcounts[i] += lens[j];
2640     }
2641   }
2642   displs[0]  = 0;
2643   for (i=1; i<size; i++) {
2644     displs[i] = displs[i-1] + recvcounts[i-1];
2645   }
2646 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2647   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2648 #else
2649   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2650 #endif
2651   /*--------------------------------------------------------------------
2652     Assemble the matrix into useable form (note numerical values not yet set)
2653   */
2654   /* set the b->ilen (length of each row) values */
2655   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2656   /* set the b->i indices */
2657   b->i[0] = 0;
2658   for (i=1; i<=A->rmap->N/bs; i++) {
2659     b->i[i] = b->i[i-1] + lens[i-1];
2660   }
2661   ierr = PetscFree(lens);CHKERRQ(ierr);
2662   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2663   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2664   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2665 
2666   if (A->symmetric){
2667     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2668   } else if (A->hermitian) {
2669     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2670   } else if (A->structurally_symmetric) {
2671     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2672   }
2673   *newmat = B;
2674   PetscFunctionReturn(0);
2675 }
2676 
2677 #undef __FUNCT__
2678 #define __FUNCT__ "MatSOR_MPIBAIJ"
2679 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2680 {
2681   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2682   PetscErrorCode ierr;
2683   Vec            bb1 = 0;
2684 
2685   PetscFunctionBegin;
2686   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2687     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2688   }
2689 
2690   if (flag == SOR_APPLY_UPPER) {
2691     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2692     PetscFunctionReturn(0);
2693   }
2694 
2695   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2696     if (flag & SOR_ZERO_INITIAL_GUESS) {
2697       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2698       its--;
2699     }
2700 
2701     while (its--) {
2702       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2703       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2704 
2705       /* update rhs: bb1 = bb - B*x */
2706       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2707       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2708 
2709       /* local sweep */
2710       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2711     }
2712   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
2713     if (flag & SOR_ZERO_INITIAL_GUESS) {
2714       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2715       its--;
2716     }
2717     while (its--) {
2718       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2719       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2720 
2721       /* update rhs: bb1 = bb - B*x */
2722       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2723       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2724 
2725       /* local sweep */
2726       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2727     }
2728   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
2729     if (flag & SOR_ZERO_INITIAL_GUESS) {
2730       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2731       its--;
2732     }
2733     while (its--) {
2734       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2735       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2736 
2737       /* update rhs: bb1 = bb - B*x */
2738       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2739       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2740 
2741       /* local sweep */
2742       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2743     }
2744   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2745 
2746   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2747   PetscFunctionReturn(0);
2748 }
2749 
2750 extern PetscErrorCode  MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2751 
2752 #undef __FUNCT__
2753 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ"
2754 PetscErrorCode  MatInvertBlockDiagonal_MPIBAIJ(Mat A,PetscScalar **values)
2755 {
2756   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2757   PetscErrorCode ierr;
2758 
2759   PetscFunctionBegin;
2760   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2761   PetscFunctionReturn(0);
2762 }
2763 
2764 
2765 /* -------------------------------------------------------------------*/
2766 static struct _MatOps MatOps_Values = {
2767        MatSetValues_MPIBAIJ,
2768        MatGetRow_MPIBAIJ,
2769        MatRestoreRow_MPIBAIJ,
2770        MatMult_MPIBAIJ,
2771 /* 4*/ MatMultAdd_MPIBAIJ,
2772        MatMultTranspose_MPIBAIJ,
2773        MatMultTransposeAdd_MPIBAIJ,
2774        0,
2775        0,
2776        0,
2777 /*10*/ 0,
2778        0,
2779        0,
2780        MatSOR_MPIBAIJ,
2781        MatTranspose_MPIBAIJ,
2782 /*15*/ MatGetInfo_MPIBAIJ,
2783        MatEqual_MPIBAIJ,
2784        MatGetDiagonal_MPIBAIJ,
2785        MatDiagonalScale_MPIBAIJ,
2786        MatNorm_MPIBAIJ,
2787 /*20*/ MatAssemblyBegin_MPIBAIJ,
2788        MatAssemblyEnd_MPIBAIJ,
2789        MatSetOption_MPIBAIJ,
2790        MatZeroEntries_MPIBAIJ,
2791 /*24*/ MatZeroRows_MPIBAIJ,
2792        0,
2793        0,
2794        0,
2795        0,
2796 /*29*/ MatSetUpPreallocation_MPIBAIJ,
2797        0,
2798        0,
2799        0,
2800        0,
2801 /*34*/ MatDuplicate_MPIBAIJ,
2802        0,
2803        0,
2804        0,
2805        0,
2806 /*39*/ MatAXPY_MPIBAIJ,
2807        MatGetSubMatrices_MPIBAIJ,
2808        MatIncreaseOverlap_MPIBAIJ,
2809        MatGetValues_MPIBAIJ,
2810        MatCopy_MPIBAIJ,
2811 /*44*/ 0,
2812        MatScale_MPIBAIJ,
2813        0,
2814        0,
2815        0,
2816 /*49*/ MatSetBlockSize_MPIBAIJ,
2817        0,
2818        0,
2819        0,
2820        0,
2821 /*54*/ MatFDColoringCreate_MPIBAIJ,
2822        0,
2823        MatSetUnfactored_MPIBAIJ,
2824        MatPermute_MPIBAIJ,
2825        MatSetValuesBlocked_MPIBAIJ,
2826 /*59*/ MatGetSubMatrix_MPIBAIJ,
2827        MatDestroy_MPIBAIJ,
2828        MatView_MPIBAIJ,
2829        0,
2830        0,
2831 /*64*/ 0,
2832        0,
2833        0,
2834        0,
2835        0,
2836 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2837        0,
2838        0,
2839        0,
2840        0,
2841 /*74*/ 0,
2842        MatFDColoringApply_BAIJ,
2843        0,
2844        0,
2845        0,
2846 /*79*/ 0,
2847        0,
2848        0,
2849        0,
2850        MatLoad_MPIBAIJ,
2851 /*84*/ 0,
2852        0,
2853        0,
2854        0,
2855        0,
2856 /*89*/ 0,
2857        0,
2858        0,
2859        0,
2860        0,
2861 /*94*/ 0,
2862        0,
2863        0,
2864        0,
2865        0,
2866 /*99*/ 0,
2867        0,
2868        0,
2869        0,
2870        0,
2871 /*104*/0,
2872        MatRealPart_MPIBAIJ,
2873        MatImaginaryPart_MPIBAIJ,
2874        0,
2875        0,
2876 /*109*/0,
2877        0,
2878        0,
2879        0,
2880        0,
2881 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2882        0,
2883        MatGetGhosts_MPIBAIJ,
2884        0,
2885        0,
2886 /*119*/0,
2887        0,
2888        0,
2889        0,
2890        0,
2891 /*124*/0,
2892        0,
2893        MatInvertBlockDiagonal_MPIBAIJ
2894 };
2895 
2896 EXTERN_C_BEGIN
2897 #undef __FUNCT__
2898 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2899 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2900 {
2901   PetscFunctionBegin;
2902   *a = ((Mat_MPIBAIJ *)A->data)->A;
2903   PetscFunctionReturn(0);
2904 }
2905 EXTERN_C_END
2906 
2907 EXTERN_C_BEGIN
2908 extern PetscErrorCode  MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2909 EXTERN_C_END
2910 
2911 EXTERN_C_BEGIN
2912 #undef __FUNCT__
2913 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2914 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2915 {
2916   PetscInt       m,rstart,cstart,cend;
2917   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2918   const PetscInt *JJ=0;
2919   PetscScalar    *values=0;
2920   PetscErrorCode ierr;
2921 
2922   PetscFunctionBegin;
2923 
2924   if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2925   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2926   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2927   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2928   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2929   m      = B->rmap->n/bs;
2930   rstart = B->rmap->rstart/bs;
2931   cstart = B->cmap->rstart/bs;
2932   cend   = B->cmap->rend/bs;
2933 
2934   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2935   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
2936   for (i=0; i<m; i++) {
2937     nz = ii[i+1] - ii[i];
2938     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2939     nz_max = PetscMax(nz_max,nz);
2940     JJ  = jj + ii[i];
2941     for (j=0; j<nz; j++) {
2942       if (*JJ >= cstart) break;
2943       JJ++;
2944     }
2945     d = 0;
2946     for (; j<nz; j++) {
2947       if (*JJ++ >= cend) break;
2948       d++;
2949     }
2950     d_nnz[i] = d;
2951     o_nnz[i] = nz - d;
2952   }
2953   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2954   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2955 
2956   values = (PetscScalar*)V;
2957   if (!values) {
2958     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2959     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2960   }
2961   for (i=0; i<m; i++) {
2962     PetscInt          row    = i + rstart;
2963     PetscInt          ncols  = ii[i+1] - ii[i];
2964     const PetscInt    *icols = jj + ii[i];
2965     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2966     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2967   }
2968 
2969   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2970   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2971   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2972   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2973   PetscFunctionReturn(0);
2974 }
2975 EXTERN_C_END
2976 
2977 #undef __FUNCT__
2978 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2979 /*@C
2980    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2981    (the default parallel PETSc format).
2982 
2983    Collective on MPI_Comm
2984 
2985    Input Parameters:
2986 +  A - the matrix
2987 .  bs - the block size
2988 .  i - the indices into j for the start of each local row (starts with zero)
2989 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2990 -  v - optional values in the matrix
2991 
2992    Level: developer
2993 
2994 .keywords: matrix, aij, compressed row, sparse, parallel
2995 
2996 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2997 @*/
2998 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2999 {
3000   PetscErrorCode ierr;
3001 
3002   PetscFunctionBegin;
3003   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3004   PetscValidType(B,1);
3005   PetscValidLogicalCollectiveInt(B,bs,2);
3006   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3007   PetscFunctionReturn(0);
3008 }
3009 
3010 EXTERN_C_BEGIN
3011 #undef __FUNCT__
3012 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
3013 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
3014 {
3015   Mat_MPIBAIJ    *b;
3016   PetscErrorCode ierr;
3017   PetscInt       i, newbs = PetscAbs(bs);
3018   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3019 
3020   PetscFunctionBegin;
3021   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3022   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3023   if (bs < 0) {
3024     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr);
3025       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
3026     ierr = PetscOptionsEnd();CHKERRQ(ierr);
3027     bs   = PetscAbs(bs);
3028   }
3029   if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
3030   bs = newbs;
3031 
3032 
3033   if (bs < 1) SETERRQ(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
3034   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3035   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3036   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3037   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3038 
3039   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3040   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3041   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3042   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3043 
3044   if (d_nnz) {
3045     for (i=0; i<B->rmap->n/bs; i++) {
3046       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
3047     }
3048   }
3049   if (o_nnz) {
3050     for (i=0; i<B->rmap->n/bs; i++) {
3051       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
3052     }
3053   }
3054 
3055   b = (Mat_MPIBAIJ*)B->data;
3056   b->bs2 = bs*bs;
3057   b->mbs = B->rmap->n/bs;
3058   b->nbs = B->cmap->n/bs;
3059   b->Mbs = B->rmap->N/bs;
3060   b->Nbs = B->cmap->N/bs;
3061 
3062   for (i=0; i<=b->size; i++) {
3063     b->rangebs[i] = B->rmap->range[i]/bs;
3064   }
3065   b->rstartbs = B->rmap->rstart/bs;
3066   b->rendbs   = B->rmap->rend/bs;
3067   b->cstartbs = B->cmap->rstart/bs;
3068   b->cendbs   = B->cmap->rend/bs;
3069 
3070   if (!B->preallocated) {
3071     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3072     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3073     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
3074     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3075     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3076     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3077     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
3078     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3079     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
3080   }
3081 
3082   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3083   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
3084   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3085   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3086   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3087   B->preallocated = PETSC_TRUE;
3088   PetscFunctionReturn(0);
3089 }
3090 EXTERN_C_END
3091 
3092 EXTERN_C_BEGIN
3093 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
3094 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
3095 EXTERN_C_END
3096 
3097 
3098 EXTERN_C_BEGIN
3099 #undef __FUNCT__
3100 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
3101 PetscErrorCode  MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj)
3102 {
3103   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
3104   PetscErrorCode ierr;
3105   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
3106   PetscInt       M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
3107   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
3108 
3109   PetscFunctionBegin;
3110   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr);
3111   ii[0] = 0;
3112   CHKMEMQ;
3113   for (i=0; i<M; i++) {
3114     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
3115     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
3116     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
3117     /* remove one from count of matrix has diagonal */
3118     for (j=id[i]; j<id[i+1]; j++) {
3119       if (jd[j] == i) {ii[i+1]--;break;}
3120     }
3121   CHKMEMQ;
3122   }
3123   ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3124   cnt = 0;
3125   for (i=0; i<M; i++) {
3126     for (j=io[i]; j<io[i+1]; j++) {
3127       if (garray[jo[j]] > rstart) break;
3128       jj[cnt++] = garray[jo[j]];
3129   CHKMEMQ;
3130     }
3131     for (k=id[i]; k<id[i+1]; k++) {
3132       if (jd[k] != i) {
3133         jj[cnt++] = rstart + jd[k];
3134   CHKMEMQ;
3135       }
3136     }
3137     for (;j<io[i+1]; j++) {
3138       jj[cnt++] = garray[jo[j]];
3139   CHKMEMQ;
3140     }
3141   }
3142   ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr);
3143   PetscFunctionReturn(0);
3144 }
3145 EXTERN_C_END
3146 
3147 #include <../src/mat/impls/aij/mpi/mpiaij.h>
3148 EXTERN_C_BEGIN
3149 PetscErrorCode  MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*);
3150 EXTERN_C_END
3151 
3152 EXTERN_C_BEGIN
3153 #undef __FUNCT__
3154 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ"
3155 PetscErrorCode  MatConvert_MPIBAIJ_MPIAIJ(Mat A,const MatType newtype,MatReuse reuse,Mat *newmat)
3156 {
3157   PetscErrorCode ierr;
3158   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3159   Mat            B;
3160   Mat_MPIAIJ     *b;
3161 
3162   PetscFunctionBegin;
3163   if (!A->assembled) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Matrix must be assembled");
3164 
3165   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
3166   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3167   ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
3168   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3169   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
3170   b = (Mat_MPIAIJ*) B->data;
3171 
3172   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
3173   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
3174   ierr = DisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
3175   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
3176   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
3177   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3178   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3179   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3180   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3181   if (reuse == MAT_REUSE_MATRIX) {
3182     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
3183   } else {
3184    *newmat = B;
3185   }
3186   PetscFunctionReturn(0);
3187 }
3188 EXTERN_C_END
3189 
3190 EXTERN_C_BEGIN
3191 #if defined(PETSC_HAVE_MUMPS)
3192 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3193 #endif
3194 EXTERN_C_END
3195 
3196 /*MC
3197    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
3198 
3199    Options Database Keys:
3200 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
3201 . -mat_block_size <bs> - set the blocksize used to store the matrix
3202 - -mat_use_hash_table <fact>
3203 
3204   Level: beginner
3205 
3206 .seealso: MatCreateMPIBAIJ
3207 M*/
3208 
3209 EXTERN_C_BEGIN
3210 extern PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,const MatType,MatReuse,Mat*);
3211 EXTERN_C_END
3212 
3213 EXTERN_C_BEGIN
3214 #undef __FUNCT__
3215 #define __FUNCT__ "MatCreate_MPIBAIJ"
3216 PetscErrorCode  MatCreate_MPIBAIJ(Mat B)
3217 {
3218   Mat_MPIBAIJ    *b;
3219   PetscErrorCode ierr;
3220   PetscBool      flg;
3221 
3222   PetscFunctionBegin;
3223   ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
3224   B->data = (void*)b;
3225 
3226   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3227   B->assembled  = PETSC_FALSE;
3228 
3229   B->insertmode = NOT_SET_VALUES;
3230   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
3231   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
3232 
3233   /* build local table of row and column ownerships */
3234   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
3235 
3236   /* build cache for off array entries formed */
3237   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
3238   b->donotstash  = PETSC_FALSE;
3239   b->colmap      = PETSC_NULL;
3240   b->garray      = PETSC_NULL;
3241   b->roworiented = PETSC_TRUE;
3242 
3243   /* stuff used in block assembly */
3244   b->barray       = 0;
3245 
3246   /* stuff used for matrix vector multiply */
3247   b->lvec         = 0;
3248   b->Mvctx        = 0;
3249 
3250   /* stuff for MatGetRow() */
3251   b->rowindices   = 0;
3252   b->rowvalues    = 0;
3253   b->getrowactive = PETSC_FALSE;
3254 
3255   /* hash table stuff */
3256   b->ht           = 0;
3257   b->hd           = 0;
3258   b->ht_size      = 0;
3259   b->ht_flag      = PETSC_FALSE;
3260   b->ht_fact      = 0;
3261   b->ht_total_ct  = 0;
3262   b->ht_insert_ct = 0;
3263 
3264   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
3265   b->ijonly       = PETSC_FALSE;
3266 
3267   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3268     ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3269     if (flg) {
3270       PetscReal fact = 1.39;
3271       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3272       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
3273       if (fact <= 1.0) fact = 1.39;
3274       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3275       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3276     }
3277   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3278 
3279 #if defined(PETSC_HAVE_MUMPS)
3280   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr);
3281 #endif
3282   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",
3283                                      "MatConvert_MPIBAIJ_MPIAdj",
3284                                       MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3285   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",
3286                                      "MatConvert_MPIBAIJ_MPIAIJ",
3287                                       MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3288   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",
3289                                      "MatConvert_MPIBAIJ_MPISBAIJ",
3290                                       MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3291   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3292                                      "MatStoreValues_MPIBAIJ",
3293                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3294   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3295                                      "MatRetrieveValues_MPIBAIJ",
3296                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3297   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3298                                      "MatGetDiagonalBlock_MPIBAIJ",
3299                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3300   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
3301                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
3302                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3303   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
3304 				     "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
3305 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3306   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3307                                      "MatDiagonalScaleLocal_MPIBAIJ",
3308                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3309   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
3310                                      "MatSetHashTableFactor_MPIBAIJ",
3311                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3312   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",
3313                                      "MatConvert_MPIBAIJ_MPIBSTRM",
3314                                       MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr);
3315   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3316   PetscFunctionReturn(0);
3317 }
3318 EXTERN_C_END
3319 
3320 /*MC
3321    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3322 
3323    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3324    and MATMPIBAIJ otherwise.
3325 
3326    Options Database Keys:
3327 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3328 
3329   Level: beginner
3330 
3331 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3332 M*/
3333 
3334 #undef __FUNCT__
3335 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3336 /*@C
3337    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3338    (block compressed row).  For good matrix assembly performance
3339    the user should preallocate the matrix storage by setting the parameters
3340    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3341    performance can be increased by more than a factor of 50.
3342 
3343    Collective on Mat
3344 
3345    Input Parameters:
3346 +  A - the matrix
3347 .  bs   - size of blockk
3348 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3349            submatrix  (same for all local rows)
3350 .  d_nnz - array containing the number of block nonzeros in the various block rows
3351            of the in diagonal portion of the local (possibly different for each block
3352            row) or PETSC_NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3353            set it even if it is zero.
3354 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3355            submatrix (same for all local rows).
3356 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3357            off-diagonal portion of the local submatrix (possibly different for
3358            each block row) or PETSC_NULL.
3359 
3360    If the *_nnz parameter is given then the *_nz parameter is ignored
3361 
3362    Options Database Keys:
3363 +   -mat_block_size - size of the blocks to use
3364 -   -mat_use_hash_table <fact>
3365 
3366    Notes:
3367    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3368    than it must be used on all processors that share the object for that argument.
3369 
3370    Storage Information:
3371    For a square global matrix we define each processor's diagonal portion
3372    to be its local rows and the corresponding columns (a square submatrix);
3373    each processor's off-diagonal portion encompasses the remainder of the
3374    local matrix (a rectangular submatrix).
3375 
3376    The user can specify preallocated storage for the diagonal part of
3377    the local submatrix with either d_nz or d_nnz (not both).  Set
3378    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3379    memory allocation.  Likewise, specify preallocated storage for the
3380    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3381 
3382    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3383    the figure below we depict these three local rows and all columns (0-11).
3384 
3385 .vb
3386            0 1 2 3 4 5 6 7 8 9 10 11
3387           -------------------
3388    row 3  |  o o o d d d o o o o o o
3389    row 4  |  o o o d d d o o o o o o
3390    row 5  |  o o o d d d o o o o o o
3391           -------------------
3392 .ve
3393 
3394    Thus, any entries in the d locations are stored in the d (diagonal)
3395    submatrix, and any entries in the o locations are stored in the
3396    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3397    stored simply in the MATSEQBAIJ format for compressed row storage.
3398 
3399    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3400    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3401    In general, for PDE problems in which most nonzeros are near the diagonal,
3402    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3403    or you will get TERRIBLE performance; see the users' manual chapter on
3404    matrices.
3405 
3406    You can call MatGetInfo() to get information on how effective the preallocation was;
3407    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3408    You can also run with the option -info and look for messages with the string
3409    malloc in them to see if additional memory allocation was needed.
3410 
3411    Level: intermediate
3412 
3413 .keywords: matrix, block, aij, compressed row, sparse, parallel
3414 
3415 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
3416 @*/
3417 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3418 {
3419   PetscErrorCode ierr;
3420 
3421   PetscFunctionBegin;
3422   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3423   PetscValidType(B,1);
3424   PetscValidLogicalCollectiveInt(B,bs,2);
3425   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3426   PetscFunctionReturn(0);
3427 }
3428 
3429 #undef __FUNCT__
3430 #define __FUNCT__ "MatCreateMPIBAIJ"
3431 /*@C
3432    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
3433    (block compressed row).  For good matrix assembly performance
3434    the user should preallocate the matrix storage by setting the parameters
3435    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3436    performance can be increased by more than a factor of 50.
3437 
3438    Collective on MPI_Comm
3439 
3440    Input Parameters:
3441 +  comm - MPI communicator
3442 .  bs   - size of blockk
3443 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3444            This value should be the same as the local size used in creating the
3445            y vector for the matrix-vector product y = Ax.
3446 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3447            This value should be the same as the local size used in creating the
3448            x vector for the matrix-vector product y = Ax.
3449 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3450 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3451 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3452            submatrix  (same for all local rows)
3453 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3454            of the in diagonal portion of the local (possibly different for each block
3455            row) or PETSC_NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3456            and set it even if it is zero.
3457 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3458            submatrix (same for all local rows).
3459 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3460            off-diagonal portion of the local submatrix (possibly different for
3461            each block row) or PETSC_NULL.
3462 
3463    Output Parameter:
3464 .  A - the matrix
3465 
3466    Options Database Keys:
3467 +   -mat_block_size - size of the blocks to use
3468 -   -mat_use_hash_table <fact>
3469 
3470    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3471    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3472    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3473 
3474    Notes:
3475    If the *_nnz parameter is given then the *_nz parameter is ignored
3476 
3477    A nonzero block is any block that as 1 or more nonzeros in it
3478 
3479    The user MUST specify either the local or global matrix dimensions
3480    (possibly both).
3481 
3482    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3483    than it must be used on all processors that share the object for that argument.
3484 
3485    Storage Information:
3486    For a square global matrix we define each processor's diagonal portion
3487    to be its local rows and the corresponding columns (a square submatrix);
3488    each processor's off-diagonal portion encompasses the remainder of the
3489    local matrix (a rectangular submatrix).
3490 
3491    The user can specify preallocated storage for the diagonal part of
3492    the local submatrix with either d_nz or d_nnz (not both).  Set
3493    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3494    memory allocation.  Likewise, specify preallocated storage for the
3495    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3496 
3497    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3498    the figure below we depict these three local rows and all columns (0-11).
3499 
3500 .vb
3501            0 1 2 3 4 5 6 7 8 9 10 11
3502           -------------------
3503    row 3  |  o o o d d d o o o o o o
3504    row 4  |  o o o d d d o o o o o o
3505    row 5  |  o o o d d d o o o o o o
3506           -------------------
3507 .ve
3508 
3509    Thus, any entries in the d locations are stored in the d (diagonal)
3510    submatrix, and any entries in the o locations are stored in the
3511    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3512    stored simply in the MATSEQBAIJ format for compressed row storage.
3513 
3514    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3515    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3516    In general, for PDE problems in which most nonzeros are near the diagonal,
3517    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3518    or you will get TERRIBLE performance; see the users' manual chapter on
3519    matrices.
3520 
3521    Level: intermediate
3522 
3523 .keywords: matrix, block, aij, compressed row, sparse, parallel
3524 
3525 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3526 @*/
3527 PetscErrorCode  MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3528 {
3529   PetscErrorCode ierr;
3530   PetscMPIInt    size;
3531 
3532   PetscFunctionBegin;
3533   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3534   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3535   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3536   if (size > 1) {
3537     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3538     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3539   } else {
3540     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3541     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3542   }
3543   PetscFunctionReturn(0);
3544 }
3545 
3546 #undef __FUNCT__
3547 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3548 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3549 {
3550   Mat            mat;
3551   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3552   PetscErrorCode ierr;
3553   PetscInt       len=0;
3554 
3555   PetscFunctionBegin;
3556   *newmat       = 0;
3557   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3558   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3559   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3560   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3561 
3562   mat->factortype   = matin->factortype;
3563   mat->preallocated = PETSC_TRUE;
3564   mat->assembled    = PETSC_TRUE;
3565   mat->insertmode   = NOT_SET_VALUES;
3566 
3567   a      = (Mat_MPIBAIJ*)mat->data;
3568   mat->rmap->bs  = matin->rmap->bs;
3569   a->bs2   = oldmat->bs2;
3570   a->mbs   = oldmat->mbs;
3571   a->nbs   = oldmat->nbs;
3572   a->Mbs   = oldmat->Mbs;
3573   a->Nbs   = oldmat->Nbs;
3574 
3575   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3576   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3577 
3578   a->size         = oldmat->size;
3579   a->rank         = oldmat->rank;
3580   a->donotstash   = oldmat->donotstash;
3581   a->roworiented  = oldmat->roworiented;
3582   a->rowindices   = 0;
3583   a->rowvalues    = 0;
3584   a->getrowactive = PETSC_FALSE;
3585   a->barray       = 0;
3586   a->rstartbs     = oldmat->rstartbs;
3587   a->rendbs       = oldmat->rendbs;
3588   a->cstartbs     = oldmat->cstartbs;
3589   a->cendbs       = oldmat->cendbs;
3590 
3591   /* hash table stuff */
3592   a->ht           = 0;
3593   a->hd           = 0;
3594   a->ht_size      = 0;
3595   a->ht_flag      = oldmat->ht_flag;
3596   a->ht_fact      = oldmat->ht_fact;
3597   a->ht_total_ct  = 0;
3598   a->ht_insert_ct = 0;
3599 
3600   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3601   if (oldmat->colmap) {
3602 #if defined (PETSC_USE_CTABLE)
3603   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3604 #else
3605   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3606   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3607   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3608 #endif
3609   } else a->colmap = 0;
3610 
3611   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3612     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3613     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3614     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3615   } else a->garray = 0;
3616 
3617   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3618   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3619   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3620   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3621   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3622 
3623   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3624   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3625   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3626   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3627   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3628   *newmat = mat;
3629 
3630   PetscFunctionReturn(0);
3631 }
3632 
3633 #undef __FUNCT__
3634 #define __FUNCT__ "MatLoad_MPIBAIJ"
3635 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3636 {
3637   PetscErrorCode ierr;
3638   int            fd;
3639   PetscInt       i,nz,j,rstart,rend;
3640   PetscScalar    *vals,*buf;
3641   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3642   MPI_Status     status;
3643   PetscMPIInt    rank,size,maxnz;
3644   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3645   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
3646   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3647   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
3648   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
3649   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3650 
3651   PetscFunctionBegin;
3652   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3653     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3654   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3655 
3656   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3657   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3658   if (!rank) {
3659     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3660     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3661     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3662   }
3663 
3664   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3665 
3666   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3667   M = header[1]; N = header[2];
3668 
3669   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3670   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3671   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3672 
3673   /* If global sizes are set, check if they are consistent with that given in the file */
3674   if (sizesset) {
3675     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3676   }
3677   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3678   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3679 
3680   if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices");
3681 
3682   /*
3683      This code adds extra rows to make sure the number of rows is
3684      divisible by the blocksize
3685   */
3686   Mbs        = M/bs;
3687   extra_rows = bs - M + bs*Mbs;
3688   if (extra_rows == bs) extra_rows = 0;
3689   else                  Mbs++;
3690   if (extra_rows && !rank) {
3691     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3692   }
3693 
3694   /* determine ownership of all rows */
3695   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3696     mbs        = Mbs/size + ((Mbs % size) > rank);
3697     m          = mbs*bs;
3698   } else { /* User set */
3699     m          = newmat->rmap->n;
3700     mbs        = m/bs;
3701   }
3702   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
3703   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3704 
3705   /* process 0 needs enough room for process with most rows */
3706   if (!rank) {
3707     mmax = rowners[1];
3708     for (i=2; i<size; i++) {
3709       mmax = PetscMax(mmax,rowners[i]);
3710     }
3711     mmax*=bs;
3712   } else mmax = m;
3713 
3714   rowners[0] = 0;
3715   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
3716   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
3717   rstart = rowners[rank];
3718   rend   = rowners[rank+1];
3719 
3720   /* distribute row lengths to all processors */
3721   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
3722   if (!rank) {
3723     mend = m;
3724     if (size == 1) mend = mend - extra_rows;
3725     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3726     for (j=mend; j<m; j++) locrowlens[j] = 1;
3727     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3728     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3729     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3730     for (j=0; j<m; j++) {
3731       procsnz[0] += locrowlens[j];
3732     }
3733     for (i=1; i<size; i++) {
3734       mend = browners[i+1] - browners[i];
3735       if (i == size-1) mend = mend - extra_rows;
3736       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3737       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3738       /* calculate the number of nonzeros on each processor */
3739       for (j=0; j<browners[i+1]-browners[i]; j++) {
3740         procsnz[i] += rowlengths[j];
3741       }
3742       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3743     }
3744     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3745   } else {
3746     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3747   }
3748 
3749   if (!rank) {
3750     /* determine max buffer needed and allocate it */
3751     maxnz = procsnz[0];
3752     for (i=1; i<size; i++) {
3753       maxnz = PetscMax(maxnz,procsnz[i]);
3754     }
3755     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3756 
3757     /* read in my part of the matrix column indices  */
3758     nz     = procsnz[0];
3759     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3760     mycols = ibuf;
3761     if (size == 1)  nz -= extra_rows;
3762     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3763     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
3764 
3765     /* read in every ones (except the last) and ship off */
3766     for (i=1; i<size-1; i++) {
3767       nz   = procsnz[i];
3768       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3769       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3770     }
3771     /* read in the stuff for the last proc */
3772     if (size != 1) {
3773       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3774       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3775       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3776       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3777     }
3778     ierr = PetscFree(cols);CHKERRQ(ierr);
3779   } else {
3780     /* determine buffer space needed for message */
3781     nz = 0;
3782     for (i=0; i<m; i++) {
3783       nz += locrowlens[i];
3784     }
3785     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3786     mycols = ibuf;
3787     /* receive message of column indices*/
3788     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3789     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3790     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3791   }
3792 
3793   /* loop over local rows, determining number of off diagonal entries */
3794   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
3795   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
3796   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3797   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3798   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3799   rowcount = 0; nzcount = 0;
3800   for (i=0; i<mbs; i++) {
3801     dcount  = 0;
3802     odcount = 0;
3803     for (j=0; j<bs; j++) {
3804       kmax = locrowlens[rowcount];
3805       for (k=0; k<kmax; k++) {
3806         tmp = mycols[nzcount++]/bs;
3807         if (!mask[tmp]) {
3808           mask[tmp] = 1;
3809           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3810           else masked1[dcount++] = tmp;
3811         }
3812       }
3813       rowcount++;
3814     }
3815 
3816     dlens[i]  = dcount;
3817     odlens[i] = odcount;
3818 
3819     /* zero out the mask elements we set */
3820     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3821     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3822   }
3823 
3824 
3825   if (!sizesset) {
3826     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3827   }
3828   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3829 
3830   if (!rank) {
3831     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3832     /* read in my part of the matrix numerical values  */
3833     nz = procsnz[0];
3834     vals = buf;
3835     mycols = ibuf;
3836     if (size == 1)  nz -= extra_rows;
3837     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3838     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
3839 
3840     /* insert into matrix */
3841     jj      = rstart*bs;
3842     for (i=0; i<m; i++) {
3843       ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3844       mycols += locrowlens[i];
3845       vals   += locrowlens[i];
3846       jj++;
3847     }
3848     /* read in other processors (except the last one) and ship out */
3849     for (i=1; i<size-1; i++) {
3850       nz   = procsnz[i];
3851       vals = buf;
3852       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3853       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3854     }
3855     /* the last proc */
3856     if (size != 1){
3857       nz   = procsnz[i] - extra_rows;
3858       vals = buf;
3859       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3860       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3861       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3862     }
3863     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3864   } else {
3865     /* receive numeric values */
3866     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3867 
3868     /* receive message of values*/
3869     vals   = buf;
3870     mycols = ibuf;
3871     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
3872     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
3873     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3874 
3875     /* insert into matrix */
3876     jj      = rstart*bs;
3877     for (i=0; i<m; i++) {
3878       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3879       mycols += locrowlens[i];
3880       vals   += locrowlens[i];
3881       jj++;
3882     }
3883   }
3884   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3885   ierr = PetscFree(buf);CHKERRQ(ierr);
3886   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3887   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3888   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3889   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3890   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3891   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3892 
3893   PetscFunctionReturn(0);
3894 }
3895 
3896 #undef __FUNCT__
3897 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3898 /*@
3899    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3900 
3901    Input Parameters:
3902 .  mat  - the matrix
3903 .  fact - factor
3904 
3905    Not Collective, each process can use a different factor
3906 
3907    Level: advanced
3908 
3909   Notes:
3910    This can also be set by the command line option: -mat_use_hash_table <fact>
3911 
3912 .keywords: matrix, hashtable, factor, HT
3913 
3914 .seealso: MatSetOption()
3915 @*/
3916 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3917 {
3918   PetscErrorCode ierr;
3919 
3920   PetscFunctionBegin;
3921   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3922   PetscFunctionReturn(0);
3923 }
3924 
3925 EXTERN_C_BEGIN
3926 #undef __FUNCT__
3927 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3928 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3929 {
3930   Mat_MPIBAIJ *baij;
3931 
3932   PetscFunctionBegin;
3933   baij = (Mat_MPIBAIJ*)mat->data;
3934   baij->ht_fact = fact;
3935   PetscFunctionReturn(0);
3936 }
3937 EXTERN_C_END
3938 
3939 #undef __FUNCT__
3940 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3941 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3942 {
3943   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3944   PetscFunctionBegin;
3945   *Ad     = a->A;
3946   *Ao     = a->B;
3947   *colmap = a->garray;
3948   PetscFunctionReturn(0);
3949 }
3950 
3951 /*
3952     Special version for direct calls from Fortran (to eliminate two function call overheads
3953 */
3954 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3955 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3956 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3957 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3958 #endif
3959 
3960 #undef __FUNCT__
3961 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3962 /*@C
3963   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3964 
3965   Collective on Mat
3966 
3967   Input Parameters:
3968 + mat - the matrix
3969 . min - number of input rows
3970 . im - input rows
3971 . nin - number of input columns
3972 . in - input columns
3973 . v - numerical values input
3974 - addvin - INSERT_VALUES or ADD_VALUES
3975 
3976   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3977 
3978   Level: advanced
3979 
3980 .seealso:   MatSetValuesBlocked()
3981 @*/
3982 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3983 {
3984   /* convert input arguments to C version */
3985   Mat             mat = *matin;
3986   PetscInt        m = *min, n = *nin;
3987   InsertMode      addv = *addvin;
3988 
3989   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3990   const MatScalar *value;
3991   MatScalar       *barray=baij->barray;
3992   PetscBool       roworiented = baij->roworiented;
3993   PetscErrorCode  ierr;
3994   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3995   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3996   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3997 
3998   PetscFunctionBegin;
3999   /* tasks normally handled by MatSetValuesBlocked() */
4000   if (mat->insertmode == NOT_SET_VALUES) {
4001     mat->insertmode = addv;
4002   }
4003 #if defined(PETSC_USE_DEBUG)
4004   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
4005   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
4006 #endif
4007   if (mat->assembled) {
4008     mat->was_assembled = PETSC_TRUE;
4009     mat->assembled     = PETSC_FALSE;
4010   }
4011   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
4012 
4013 
4014   if(!barray) {
4015     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
4016     baij->barray = barray;
4017   }
4018 
4019   if (roworiented) {
4020     stepval = (n-1)*bs;
4021   } else {
4022     stepval = (m-1)*bs;
4023   }
4024   for (i=0; i<m; i++) {
4025     if (im[i] < 0) continue;
4026 #if defined(PETSC_USE_DEBUG)
4027     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
4028 #endif
4029     if (im[i] >= rstart && im[i] < rend) {
4030       row = im[i] - rstart;
4031       for (j=0; j<n; j++) {
4032         /* If NumCol = 1 then a copy is not required */
4033         if ((roworiented) && (n == 1)) {
4034           barray = (MatScalar*)v + i*bs2;
4035         } else if((!roworiented) && (m == 1)) {
4036           barray = (MatScalar*)v + j*bs2;
4037         } else { /* Here a copy is required */
4038           if (roworiented) {
4039             value = v + i*(stepval+bs)*bs + j*bs;
4040           } else {
4041             value = v + j*(stepval+bs)*bs + i*bs;
4042           }
4043           for (ii=0; ii<bs; ii++,value+=stepval) {
4044             for (jj=0; jj<bs; jj++) {
4045               *barray++  = *value++;
4046             }
4047           }
4048           barray -=bs2;
4049         }
4050 
4051         if (in[j] >= cstart && in[j] < cend){
4052           col  = in[j] - cstart;
4053           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4054         }
4055         else if (in[j] < 0) continue;
4056 #if defined(PETSC_USE_DEBUG)
4057         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
4058 #endif
4059         else {
4060           if (mat->was_assembled) {
4061             if (!baij->colmap) {
4062               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
4063             }
4064 
4065 #if defined(PETSC_USE_DEBUG)
4066 #if defined (PETSC_USE_CTABLE)
4067             { PetscInt data;
4068               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
4069               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4070             }
4071 #else
4072             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4073 #endif
4074 #endif
4075 #if defined (PETSC_USE_CTABLE)
4076 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4077             col  = (col - 1)/bs;
4078 #else
4079             col = (baij->colmap[in[j]] - 1)/bs;
4080 #endif
4081             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
4082               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
4083               col =  in[j];
4084             }
4085           }
4086           else col = in[j];
4087           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4088         }
4089       }
4090     } else {
4091       if (!baij->donotstash) {
4092         if (roworiented) {
4093           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4094         } else {
4095           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4096         }
4097       }
4098     }
4099   }
4100 
4101   /* task normally handled by MatSetValuesBlocked() */
4102   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
4103   PetscFunctionReturn(0);
4104 }
4105 
4106 #undef __FUNCT__
4107 #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
4108 /*@
4109      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
4110          CSR format the local rows.
4111 
4112    Collective on MPI_Comm
4113 
4114    Input Parameters:
4115 +  comm - MPI communicator
4116 .  bs - the block size, only a block size of 1 is supported
4117 .  m - number of local rows (Cannot be PETSC_DECIDE)
4118 .  n - This value should be the same as the local size used in creating the
4119        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4120        calculated if N is given) For square matrices n is almost always m.
4121 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4122 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4123 .   i - row indices
4124 .   j - column indices
4125 -   a - matrix values
4126 
4127    Output Parameter:
4128 .   mat - the matrix
4129 
4130    Level: intermediate
4131 
4132    Notes:
4133        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4134      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4135      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4136 
4137        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4138 
4139 .keywords: matrix, aij, compressed row, sparse, parallel
4140 
4141 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4142           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
4143 @*/
4144 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4145 {
4146   PetscErrorCode ierr;
4147 
4148 
4149  PetscFunctionBegin;
4150   if (i[0]) {
4151     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4152   }
4153   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4154   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4155   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4156   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
4157   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
4158   PetscFunctionReturn(0);
4159 }
4160