xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 8cc058d9cd56c1ccb3be12a47760ddfc446aaffc)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 #include <petscblaslapack.h>
4 
5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat);
7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec);
13 
14 #undef __FUNCT__
15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
17 {
18   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
19   PetscErrorCode ierr;
20   PetscInt       i,*idxb = 0;
21   PetscScalar    *va,*vb;
22   Vec            vtmp;
23 
24   PetscFunctionBegin;
25   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
26   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
27   if (idx) {
28     for (i=0; i<A->rmap->n; i++) {
29       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
30     }
31   }
32 
33   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
34   if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
35   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
36   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
37 
38   for (i=0; i<A->rmap->n; i++) {
39     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
40       va[i] = vb[i];
41       if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);
42     }
43   }
44 
45   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
46   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
47   ierr = PetscFree(idxb);CHKERRQ(ierr);
48   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
49   PetscFunctionReturn(0);
50 }
51 
52 #undef __FUNCT__
53 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
54 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
55 {
56   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
57   PetscErrorCode ierr;
58 
59   PetscFunctionBegin;
60   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
61   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
62   PetscFunctionReturn(0);
63 }
64 
65 #undef __FUNCT__
66 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
67 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
68 {
69   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
70   PetscErrorCode ierr;
71 
72   PetscFunctionBegin;
73   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
74   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
75   PetscFunctionReturn(0);
76 }
77 
78 /*
79      Local utility routine that creates a mapping from the global column
80    number to the local number in the off-diagonal part of the local
81    storage of the matrix.  This is done in a non scalable way since the
82    length of colmap equals the global matrix length.
83 */
84 #undef __FUNCT__
85 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private"
86 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
87 {
88   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
89   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
90   PetscErrorCode ierr;
91   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
92 
93   PetscFunctionBegin;
94 #if defined(PETSC_USE_CTABLE)
95   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
96   for (i=0; i<nbs; i++) {
97     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
98   }
99 #else
100   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
101   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
102   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
103   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
104 #endif
105   PetscFunctionReturn(0);
106 }
107 
108 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
109   { \
110  \
111     brow = row/bs;  \
112     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
113     rmax = aimax[brow]; nrow = ailen[brow]; \
114     bcol = col/bs; \
115     ridx = row % bs; cidx = col % bs; \
116     low  = 0; high = nrow; \
117     while (high-low > 3) { \
118       t = (low+high)/2; \
119       if (rp[t] > bcol) high = t; \
120       else              low  = t; \
121     } \
122     for (_i=low; _i<high; _i++) { \
123       if (rp[_i] > bcol) break; \
124       if (rp[_i] == bcol) { \
125         bap = ap +  bs2*_i + bs*cidx + ridx; \
126         if (addv == ADD_VALUES) *bap += value;  \
127         else                    *bap  = value;  \
128         goto a_noinsert; \
129       } \
130     } \
131     if (a->nonew == 1) goto a_noinsert; \
132     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
133     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
134     N = nrow++ - 1;  \
135     /* shift up all the later entries in this row */ \
136     for (ii=N; ii>=_i; ii--) { \
137       rp[ii+1] = rp[ii]; \
138       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
139     } \
140     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
141     rp[_i]                      = bcol;  \
142     ap[bs2*_i + bs*cidx + ridx] = value;  \
143 a_noinsert:; \
144     ailen[brow] = nrow; \
145   }
146 
147 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
148   { \
149     brow = row/bs;  \
150     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
151     rmax = bimax[brow]; nrow = bilen[brow]; \
152     bcol = col/bs; \
153     ridx = row % bs; cidx = col % bs; \
154     low  = 0; high = nrow; \
155     while (high-low > 3) { \
156       t = (low+high)/2; \
157       if (rp[t] > bcol) high = t; \
158       else              low  = t; \
159     } \
160     for (_i=low; _i<high; _i++) { \
161       if (rp[_i] > bcol) break; \
162       if (rp[_i] == bcol) { \
163         bap = ap +  bs2*_i + bs*cidx + ridx; \
164         if (addv == ADD_VALUES) *bap += value;  \
165         else                    *bap  = value;  \
166         goto b_noinsert; \
167       } \
168     } \
169     if (b->nonew == 1) goto b_noinsert; \
170     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
171     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
172     CHKMEMQ; \
173     N = nrow++ - 1;  \
174     /* shift up all the later entries in this row */ \
175     for (ii=N; ii>=_i; ii--) { \
176       rp[ii+1] = rp[ii]; \
177       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
178     } \
179     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
180     rp[_i]                      = bcol;  \
181     ap[bs2*_i + bs*cidx + ridx] = value;  \
182 b_noinsert:; \
183     bilen[brow] = nrow; \
184   }
185 
186 #undef __FUNCT__
187 #define __FUNCT__ "MatSetValues_MPIBAIJ"
188 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
189 {
190   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
191   MatScalar      value;
192   PetscBool      roworiented = baij->roworiented;
193   PetscErrorCode ierr;
194   PetscInt       i,j,row,col;
195   PetscInt       rstart_orig=mat->rmap->rstart;
196   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
197   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
198 
199   /* Some Variables required in the macro */
200   Mat         A     = baij->A;
201   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
202   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
203   MatScalar   *aa   =a->a;
204 
205   Mat         B     = baij->B;
206   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
207   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
208   MatScalar   *ba   =b->a;
209 
210   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
211   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
212   MatScalar *ap,*bap;
213 
214   PetscFunctionBegin;
215   if (v) PetscValidScalarPointer(v,6);
216   for (i=0; i<m; i++) {
217     if (im[i] < 0) continue;
218 #if defined(PETSC_USE_DEBUG)
219     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
220 #endif
221     if (im[i] >= rstart_orig && im[i] < rend_orig) {
222       row = im[i] - rstart_orig;
223       for (j=0; j<n; j++) {
224         if (in[j] >= cstart_orig && in[j] < cend_orig) {
225           col = in[j] - cstart_orig;
226           if (roworiented) value = v[i*n+j];
227           else             value = v[i+j*m];
228           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
229           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
230         } else if (in[j] < 0) continue;
231 #if defined(PETSC_USE_DEBUG)
232         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
233 #endif
234         else {
235           if (mat->was_assembled) {
236             if (!baij->colmap) {
237               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
238             }
239 #if defined(PETSC_USE_CTABLE)
240             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
241             col  = col - 1;
242 #else
243             col = baij->colmap[in[j]/bs] - 1;
244 #endif
245             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
246               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
247               col  =  in[j];
248               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
249               B    = baij->B;
250               b    = (Mat_SeqBAIJ*)(B)->data;
251               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
252               ba   =b->a;
253             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
254             else col += in[j]%bs;
255           } else col = in[j];
256           if (roworiented) value = v[i*n+j];
257           else             value = v[i+j*m];
258           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
259           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
260         }
261       }
262     } else {
263       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
264       if (!baij->donotstash) {
265         mat->assembled = PETSC_FALSE;
266         if (roworiented) {
267           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
268         } else {
269           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
270         }
271       }
272     }
273   }
274   PetscFunctionReturn(0);
275 }
276 
277 #undef __FUNCT__
278 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
279 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
280 {
281   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
282   const PetscScalar *value;
283   MatScalar         *barray     = baij->barray;
284   PetscBool         roworiented = baij->roworiented;
285   PetscErrorCode    ierr;
286   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
287   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
288   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
289 
290   PetscFunctionBegin;
291   if (!barray) {
292     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
293     baij->barray = barray;
294   }
295 
296   if (roworiented) stepval = (n-1)*bs;
297   else stepval = (m-1)*bs;
298 
299   for (i=0; i<m; i++) {
300     if (im[i] < 0) continue;
301 #if defined(PETSC_USE_DEBUG)
302     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
303 #endif
304     if (im[i] >= rstart && im[i] < rend) {
305       row = im[i] - rstart;
306       for (j=0; j<n; j++) {
307         /* If NumCol = 1 then a copy is not required */
308         if ((roworiented) && (n == 1)) {
309           barray = (MatScalar*)v + i*bs2;
310         } else if ((!roworiented) && (m == 1)) {
311           barray = (MatScalar*)v + j*bs2;
312         } else { /* Here a copy is required */
313           if (roworiented) {
314             value = v + (i*(stepval+bs) + j)*bs;
315           } else {
316             value = v + (j*(stepval+bs) + i)*bs;
317           }
318           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
319             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
320             barray += bs;
321           }
322           barray -= bs2;
323         }
324 
325         if (in[j] >= cstart && in[j] < cend) {
326           col  = in[j] - cstart;
327           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
328         } else if (in[j] < 0) continue;
329 #if defined(PETSC_USE_DEBUG)
330         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
331 #endif
332         else {
333           if (mat->was_assembled) {
334             if (!baij->colmap) {
335               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
336             }
337 
338 #if defined(PETSC_USE_DEBUG)
339 #if defined(PETSC_USE_CTABLE)
340             { PetscInt data;
341               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
342               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
343             }
344 #else
345             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
346 #endif
347 #endif
348 #if defined(PETSC_USE_CTABLE)
349             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
350             col  = (col - 1)/bs;
351 #else
352             col = (baij->colmap[in[j]] - 1)/bs;
353 #endif
354             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
355               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
356               col  =  in[j];
357             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]);
358           } else col = in[j];
359           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
360         }
361       }
362     } else {
363       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
364       if (!baij->donotstash) {
365         if (roworiented) {
366           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
367         } else {
368           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
369         }
370       }
371     }
372   }
373   PetscFunctionReturn(0);
374 }
375 
376 #define HASH_KEY 0.6180339887
377 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
378 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
379 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
380 #undef __FUNCT__
381 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
382 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
383 {
384   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
385   PetscBool      roworiented = baij->roworiented;
386   PetscErrorCode ierr;
387   PetscInt       i,j,row,col;
388   PetscInt       rstart_orig=mat->rmap->rstart;
389   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
390   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
391   PetscReal      tmp;
392   MatScalar      **HD = baij->hd,value;
393 #if defined(PETSC_USE_DEBUG)
394   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
395 #endif
396 
397   PetscFunctionBegin;
398   if (v) PetscValidScalarPointer(v,6);
399   for (i=0; i<m; i++) {
400 #if defined(PETSC_USE_DEBUG)
401     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
402     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
403 #endif
404     row = im[i];
405     if (row >= rstart_orig && row < rend_orig) {
406       for (j=0; j<n; j++) {
407         col = in[j];
408         if (roworiented) value = v[i*n+j];
409         else             value = v[i+j*m];
410         /* Look up PetscInto the Hash Table */
411         key = (row/bs)*Nbs+(col/bs)+1;
412         h1  = HASH(size,key,tmp);
413 
414 
415         idx = h1;
416 #if defined(PETSC_USE_DEBUG)
417         insert_ct++;
418         total_ct++;
419         if (HT[idx] != key) {
420           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
421           if (idx == size) {
422             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
423             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
424           }
425         }
426 #else
427         if (HT[idx] != key) {
428           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
429           if (idx == size) {
430             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
431             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
432           }
433         }
434 #endif
435         /* A HASH table entry is found, so insert the values at the correct address */
436         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
437         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
438       }
439     } else if (!baij->donotstash) {
440       if (roworiented) {
441         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
442       } else {
443         ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
444       }
445     }
446   }
447 #if defined(PETSC_USE_DEBUG)
448   baij->ht_total_ct  = total_ct;
449   baij->ht_insert_ct = insert_ct;
450 #endif
451   PetscFunctionReturn(0);
452 }
453 
454 #undef __FUNCT__
455 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
456 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
457 {
458   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
459   PetscBool         roworiented = baij->roworiented;
460   PetscErrorCode    ierr;
461   PetscInt          i,j,ii,jj,row,col;
462   PetscInt          rstart=baij->rstartbs;
463   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
464   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
465   PetscReal         tmp;
466   MatScalar         **HD = baij->hd,*baij_a;
467   const PetscScalar *v_t,*value;
468 #if defined(PETSC_USE_DEBUG)
469   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
470 #endif
471 
472   PetscFunctionBegin;
473   if (roworiented) stepval = (n-1)*bs;
474   else stepval = (m-1)*bs;
475 
476   for (i=0; i<m; i++) {
477 #if defined(PETSC_USE_DEBUG)
478     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
479     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
480 #endif
481     row = im[i];
482     v_t = v + i*nbs2;
483     if (row >= rstart && row < rend) {
484       for (j=0; j<n; j++) {
485         col = in[j];
486 
487         /* Look up into the Hash Table */
488         key = row*Nbs+col+1;
489         h1  = HASH(size,key,tmp);
490 
491         idx = h1;
492 #if defined(PETSC_USE_DEBUG)
493         total_ct++;
494         insert_ct++;
495         if (HT[idx] != key) {
496           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
497           if (idx == size) {
498             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
499             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
500           }
501         }
502 #else
503         if (HT[idx] != key) {
504           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
505           if (idx == size) {
506             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
507             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
508           }
509         }
510 #endif
511         baij_a = HD[idx];
512         if (roworiented) {
513           /*value = v + i*(stepval+bs)*bs + j*bs;*/
514           /* value = v + (i*(stepval+bs)+j)*bs; */
515           value = v_t;
516           v_t  += bs;
517           if (addv == ADD_VALUES) {
518             for (ii=0; ii<bs; ii++,value+=stepval) {
519               for (jj=ii; jj<bs2; jj+=bs) {
520                 baij_a[jj] += *value++;
521               }
522             }
523           } else {
524             for (ii=0; ii<bs; ii++,value+=stepval) {
525               for (jj=ii; jj<bs2; jj+=bs) {
526                 baij_a[jj] = *value++;
527               }
528             }
529           }
530         } else {
531           value = v + j*(stepval+bs)*bs + i*bs;
532           if (addv == ADD_VALUES) {
533             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
534               for (jj=0; jj<bs; jj++) {
535                 baij_a[jj] += *value++;
536               }
537             }
538           } else {
539             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
540               for (jj=0; jj<bs; jj++) {
541                 baij_a[jj] = *value++;
542               }
543             }
544           }
545         }
546       }
547     } else {
548       if (!baij->donotstash) {
549         if (roworiented) {
550           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
551         } else {
552           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
553         }
554       }
555     }
556   }
557 #if defined(PETSC_USE_DEBUG)
558   baij->ht_total_ct  = total_ct;
559   baij->ht_insert_ct = insert_ct;
560 #endif
561   PetscFunctionReturn(0);
562 }
563 
564 #undef __FUNCT__
565 #define __FUNCT__ "MatGetValues_MPIBAIJ"
566 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
567 {
568   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
569   PetscErrorCode ierr;
570   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
571   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
572 
573   PetscFunctionBegin;
574   for (i=0; i<m; i++) {
575     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
576     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
577     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
578       row = idxm[i] - bsrstart;
579       for (j=0; j<n; j++) {
580         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
581         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
582         if (idxn[j] >= bscstart && idxn[j] < bscend) {
583           col  = idxn[j] - bscstart;
584           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
585         } else {
586           if (!baij->colmap) {
587             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
588           }
589 #if defined(PETSC_USE_CTABLE)
590           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
591           data--;
592 #else
593           data = baij->colmap[idxn[j]/bs]-1;
594 #endif
595           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
596           else {
597             col  = data + idxn[j]%bs;
598             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
599           }
600         }
601       }
602     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
603   }
604   PetscFunctionReturn(0);
605 }
606 
607 #undef __FUNCT__
608 #define __FUNCT__ "MatNorm_MPIBAIJ"
609 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
610 {
611   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
612   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
613   PetscErrorCode ierr;
614   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
615   PetscReal      sum = 0.0;
616   MatScalar      *v;
617 
618   PetscFunctionBegin;
619   if (baij->size == 1) {
620     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
621   } else {
622     if (type == NORM_FROBENIUS) {
623       v  = amat->a;
624       nz = amat->nz*bs2;
625       for (i=0; i<nz; i++) {
626         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
627       }
628       v  = bmat->a;
629       nz = bmat->nz*bs2;
630       for (i=0; i<nz; i++) {
631         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
632       }
633       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
634       *nrm = PetscSqrtReal(*nrm);
635     } else if (type == NORM_1) { /* max column sum */
636       PetscReal *tmp,*tmp2;
637       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
638       ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
639       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
640       v    = amat->a; jj = amat->j;
641       for (i=0; i<amat->nz; i++) {
642         for (j=0; j<bs; j++) {
643           col = bs*(cstart + *jj) + j; /* column index */
644           for (row=0; row<bs; row++) {
645             tmp[col] += PetscAbsScalar(*v);  v++;
646           }
647         }
648         jj++;
649       }
650       v = bmat->a; jj = bmat->j;
651       for (i=0; i<bmat->nz; i++) {
652         for (j=0; j<bs; j++) {
653           col = bs*garray[*jj] + j;
654           for (row=0; row<bs; row++) {
655             tmp[col] += PetscAbsScalar(*v); v++;
656           }
657         }
658         jj++;
659       }
660       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
661       *nrm = 0.0;
662       for (j=0; j<mat->cmap->N; j++) {
663         if (tmp2[j] > *nrm) *nrm = tmp2[j];
664       }
665       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
666     } else if (type == NORM_INFINITY) { /* max row sum */
667       PetscReal *sums;
668       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr);
669       sum  = 0.0;
670       for (j=0; j<amat->mbs; j++) {
671         for (row=0; row<bs; row++) sums[row] = 0.0;
672         v  = amat->a + bs2*amat->i[j];
673         nz = amat->i[j+1]-amat->i[j];
674         for (i=0; i<nz; i++) {
675           for (col=0; col<bs; col++) {
676             for (row=0; row<bs; row++) {
677               sums[row] += PetscAbsScalar(*v); v++;
678             }
679           }
680         }
681         v  = bmat->a + bs2*bmat->i[j];
682         nz = bmat->i[j+1]-bmat->i[j];
683         for (i=0; i<nz; i++) {
684           for (col=0; col<bs; col++) {
685             for (row=0; row<bs; row++) {
686               sums[row] += PetscAbsScalar(*v); v++;
687             }
688           }
689         }
690         for (row=0; row<bs; row++) {
691           if (sums[row] > sum) sum = sums[row];
692         }
693       }
694       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
695       ierr = PetscFree(sums);CHKERRQ(ierr);
696     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
697   }
698   PetscFunctionReturn(0);
699 }
700 
701 /*
702   Creates the hash table, and sets the table
703   This table is created only once.
704   If new entried need to be added to the matrix
705   then the hash table has to be destroyed and
706   recreated.
707 */
708 #undef __FUNCT__
709 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
710 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
711 {
712   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
713   Mat            A     = baij->A,B=baij->B;
714   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
715   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
716   PetscErrorCode ierr;
717   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
718   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
719   PetscInt       *HT,key;
720   MatScalar      **HD;
721   PetscReal      tmp;
722 #if defined(PETSC_USE_INFO)
723   PetscInt ct=0,max=0;
724 #endif
725 
726   PetscFunctionBegin;
727   if (baij->ht) PetscFunctionReturn(0);
728 
729   baij->ht_size = (PetscInt)(factor*nz);
730   ht_size       = baij->ht_size;
731 
732   /* Allocate Memory for Hash Table */
733   ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr);
734   ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr);
735   ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr);
736   HD   = baij->hd;
737   HT   = baij->ht;
738 
739   /* Loop Over A */
740   for (i=0; i<a->mbs; i++) {
741     for (j=ai[i]; j<ai[i+1]; j++) {
742       row = i+rstart;
743       col = aj[j]+cstart;
744 
745       key = row*Nbs + col + 1;
746       h1  = HASH(ht_size,key,tmp);
747       for (k=0; k<ht_size; k++) {
748         if (!HT[(h1+k)%ht_size]) {
749           HT[(h1+k)%ht_size] = key;
750           HD[(h1+k)%ht_size] = a->a + j*bs2;
751           break;
752 #if defined(PETSC_USE_INFO)
753         } else {
754           ct++;
755 #endif
756         }
757       }
758 #if defined(PETSC_USE_INFO)
759       if (k> max) max = k;
760 #endif
761     }
762   }
763   /* Loop Over B */
764   for (i=0; i<b->mbs; i++) {
765     for (j=bi[i]; j<bi[i+1]; j++) {
766       row = i+rstart;
767       col = garray[bj[j]];
768       key = row*Nbs + col + 1;
769       h1  = HASH(ht_size,key,tmp);
770       for (k=0; k<ht_size; k++) {
771         if (!HT[(h1+k)%ht_size]) {
772           HT[(h1+k)%ht_size] = key;
773           HD[(h1+k)%ht_size] = b->a + j*bs2;
774           break;
775 #if defined(PETSC_USE_INFO)
776         } else {
777           ct++;
778 #endif
779         }
780       }
781 #if defined(PETSC_USE_INFO)
782       if (k> max) max = k;
783 #endif
784     }
785   }
786 
787   /* Print Summary */
788 #if defined(PETSC_USE_INFO)
789   for (i=0,j=0; i<ht_size; i++) {
790     if (HT[i]) j++;
791   }
792   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
793 #endif
794   PetscFunctionReturn(0);
795 }
796 
797 #undef __FUNCT__
798 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
799 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
800 {
801   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
802   PetscErrorCode ierr;
803   PetscInt       nstash,reallocs;
804   InsertMode     addv;
805 
806   PetscFunctionBegin;
807   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
808 
809   /* make sure all processors are either in INSERTMODE or ADDMODE */
810   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
811   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
812   mat->insertmode = addv; /* in case this processor had no cache */
813 
814   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
815   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
816   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
817   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
818   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
819   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
820   PetscFunctionReturn(0);
821 }
822 
823 #undef __FUNCT__
824 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
825 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
826 {
827   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
828   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
829   PetscErrorCode ierr;
830   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
831   PetscInt       *row,*col;
832   PetscBool      r1,r2,r3,other_disassembled;
833   MatScalar      *val;
834   InsertMode     addv = mat->insertmode;
835   PetscMPIInt    n;
836 
837   PetscFunctionBegin;
838   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
839   if (!baij->donotstash && !mat->nooffprocentries) {
840     while (1) {
841       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
842       if (!flg) break;
843 
844       for (i=0; i<n;) {
845         /* Now identify the consecutive vals belonging to the same row */
846         for (j=i,rstart=row[j]; j<n; j++) {
847           if (row[j] != rstart) break;
848         }
849         if (j < n) ncols = j-i;
850         else       ncols = n-i;
851         /* Now assemble all these values with a single function call */
852         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
853         i    = j;
854       }
855     }
856     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
857     /* Now process the block-stash. Since the values are stashed column-oriented,
858        set the roworiented flag to column oriented, and after MatSetValues()
859        restore the original flags */
860     r1 = baij->roworiented;
861     r2 = a->roworiented;
862     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
863 
864     baij->roworiented = PETSC_FALSE;
865     a->roworiented    = PETSC_FALSE;
866 
867     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
868     while (1) {
869       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
870       if (!flg) break;
871 
872       for (i=0; i<n;) {
873         /* Now identify the consecutive vals belonging to the same row */
874         for (j=i,rstart=row[j]; j<n; j++) {
875           if (row[j] != rstart) break;
876         }
877         if (j < n) ncols = j-i;
878         else       ncols = n-i;
879         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
880         i    = j;
881       }
882     }
883     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
884 
885     baij->roworiented = r1;
886     a->roworiented    = r2;
887 
888     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
889   }
890 
891   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
892   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
893 
894   /* determine if any processor has disassembled, if so we must
895      also disassemble ourselfs, in order that we may reassemble. */
896   /*
897      if nonzero structure of submatrix B cannot change then we know that
898      no processor disassembled thus we can skip this stuff
899   */
900   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
901     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
902     if (mat->was_assembled && !other_disassembled) {
903       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
904     }
905   }
906 
907   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
908     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
909   }
910   ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
911   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
912   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
913 
914 #if defined(PETSC_USE_INFO)
915   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
916     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
917 
918     baij->ht_total_ct  = 0;
919     baij->ht_insert_ct = 0;
920   }
921 #endif
922   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
923     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
924 
925     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
926     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
927   }
928 
929   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
930 
931   baij->rowvalues = 0;
932   PetscFunctionReturn(0);
933 }
934 
935 #include <petscdraw.h>
936 #undef __FUNCT__
937 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
938 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
939 {
940   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
941   PetscErrorCode    ierr;
942   PetscMPIInt       size = baij->size,rank = baij->rank;
943   PetscInt          bs   = mat->rmap->bs;
944   PetscBool         iascii,isdraw;
945   PetscViewer       sviewer;
946   PetscViewerFormat format;
947 
948   PetscFunctionBegin;
949   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
950   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
951   if (iascii) {
952     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
953     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
954       MatInfo info;
955       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
956       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
957       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
958       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
959                                                 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
960       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
961       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
962       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
963       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
964       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
965       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
966       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
967       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
968       PetscFunctionReturn(0);
969     } else if (format == PETSC_VIEWER_ASCII_INFO) {
970       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
971       PetscFunctionReturn(0);
972     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
973       PetscFunctionReturn(0);
974     }
975   }
976 
977   if (isdraw) {
978     PetscDraw draw;
979     PetscBool isnull;
980     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
981     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
982   }
983 
984   if (size == 1) {
985     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
986     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
987   } else {
988     /* assemble the entire matrix onto first processor. */
989     Mat         A;
990     Mat_SeqBAIJ *Aloc;
991     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
992     MatScalar   *a;
993 
994     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
995     /* Perhaps this should be the type of mat? */
996     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
997     if (!rank) {
998       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
999     } else {
1000       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1001     }
1002     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1003     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
1004     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1005     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1006 
1007     /* copy over the A part */
1008     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1009     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1010     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1011 
1012     for (i=0; i<mbs; i++) {
1013       rvals[0] = bs*(baij->rstartbs + i);
1014       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1015       for (j=ai[i]; j<ai[i+1]; j++) {
1016         col = (baij->cstartbs+aj[j])*bs;
1017         for (k=0; k<bs; k++) {
1018           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1019           col++; a += bs;
1020         }
1021       }
1022     }
1023     /* copy over the B part */
1024     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1025     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1026     for (i=0; i<mbs; i++) {
1027       rvals[0] = bs*(baij->rstartbs + i);
1028       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1029       for (j=ai[i]; j<ai[i+1]; j++) {
1030         col = baij->garray[aj[j]]*bs;
1031         for (k=0; k<bs; k++) {
1032           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1033           col++; a += bs;
1034         }
1035       }
1036     }
1037     ierr = PetscFree(rvals);CHKERRQ(ierr);
1038     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1039     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1040     /*
1041        Everyone has to call to draw the matrix since the graphics waits are
1042        synchronized across all processors that share the PetscDraw object
1043     */
1044     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1045     if (!rank) {
1046       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1047       /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/
1048       PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ);
1049       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1050     }
1051     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1052     ierr = MatDestroy(&A);CHKERRQ(ierr);
1053   }
1054   PetscFunctionReturn(0);
1055 }
1056 
1057 #undef __FUNCT__
1058 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1059 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1060 {
1061   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1062   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)a->A->data;
1063   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
1064   PetscErrorCode ierr;
1065   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1066   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1067   int            fd;
1068   PetscScalar    *column_values;
1069   FILE           *file;
1070   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1071   PetscInt       message_count,flowcontrolcount;
1072 
1073   PetscFunctionBegin;
1074   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1075   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1076   nz   = bs2*(A->nz + B->nz);
1077   rlen = mat->rmap->n;
1078   if (!rank) {
1079     header[0] = MAT_FILE_CLASSID;
1080     header[1] = mat->rmap->N;
1081     header[2] = mat->cmap->N;
1082 
1083     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1084     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1085     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1086     /* get largest number of rows any processor has */
1087     range = mat->rmap->range;
1088     for (i=1; i<size; i++) {
1089       rlen = PetscMax(rlen,range[i+1] - range[i]);
1090     }
1091   } else {
1092     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1093   }
1094 
1095   ierr = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr);
1096   /* compute lengths of each row  */
1097   for (i=0; i<a->mbs; i++) {
1098     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1099   }
1100   /* store the row lengths to the file */
1101   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1102   if (!rank) {
1103     MPI_Status status;
1104     ierr = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr);
1105     rlen = (range[1] - range[0])/bs;
1106     for (i=0; i<rlen; i++) {
1107       for (j=0; j<bs; j++) {
1108         row_lens[i*bs+j] = bs*crow_lens[i];
1109       }
1110     }
1111     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1112     for (i=1; i<size; i++) {
1113       rlen = (range[i+1] - range[i])/bs;
1114       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1115       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1116       for (k=0; k<rlen; k++) {
1117         for (j=0; j<bs; j++) {
1118           row_lens[k*bs+j] = bs*crow_lens[k];
1119         }
1120       }
1121       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1122     }
1123     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1124     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1125   } else {
1126     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1127     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1128     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1129   }
1130   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1131 
1132   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1133      information needed to make it for each row from a block row. This does require more communication but still not more than
1134      the communication needed for the nonzero values  */
1135   nzmax = nz; /*  space a largest processor needs */
1136   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1137   ierr  = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1138   cnt   = 0;
1139   for (i=0; i<a->mbs; i++) {
1140     pcnt = cnt;
1141     for (j=B->i[i]; j<B->i[i+1]; j++) {
1142       if ((col = garray[B->j[j]]) > cstart) break;
1143       for (l=0; l<bs; l++) {
1144         column_indices[cnt++] = bs*col+l;
1145       }
1146     }
1147     for (k=A->i[i]; k<A->i[i+1]; k++) {
1148       for (l=0; l<bs; l++) {
1149         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1150       }
1151     }
1152     for (; j<B->i[i+1]; j++) {
1153       for (l=0; l<bs; l++) {
1154         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1155       }
1156     }
1157     len = cnt - pcnt;
1158     for (k=1; k<bs; k++) {
1159       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1160       cnt += len;
1161     }
1162   }
1163   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1164 
1165   /* store the columns to the file */
1166   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1167   if (!rank) {
1168     MPI_Status status;
1169     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1170     for (i=1; i<size; i++) {
1171       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1172       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1173       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1174       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1175     }
1176     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1177   } else {
1178     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1179     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1180     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1181     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1182   }
1183   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1184 
1185   /* load up the numerical values */
1186   ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1187   cnt  = 0;
1188   for (i=0; i<a->mbs; i++) {
1189     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1190     for (j=B->i[i]; j<B->i[i+1]; j++) {
1191       if (garray[B->j[j]] > cstart) break;
1192       for (l=0; l<bs; l++) {
1193         for (ll=0; ll<bs; ll++) {
1194           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1195         }
1196       }
1197       cnt += bs;
1198     }
1199     for (k=A->i[i]; k<A->i[i+1]; k++) {
1200       for (l=0; l<bs; l++) {
1201         for (ll=0; ll<bs; ll++) {
1202           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1203         }
1204       }
1205       cnt += bs;
1206     }
1207     for (; j<B->i[i+1]; j++) {
1208       for (l=0; l<bs; l++) {
1209         for (ll=0; ll<bs; ll++) {
1210           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1211         }
1212       }
1213       cnt += bs;
1214     }
1215     cnt += (bs-1)*rlen;
1216   }
1217   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1218 
1219   /* store the column values to the file */
1220   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1221   if (!rank) {
1222     MPI_Status status;
1223     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1224     for (i=1; i<size; i++) {
1225       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1226       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1227       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1228       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1229     }
1230     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1231   } else {
1232     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1233     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1234     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1235     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1236   }
1237   ierr = PetscFree(column_values);CHKERRQ(ierr);
1238 
1239   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1240   if (file) {
1241     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1242   }
1243   PetscFunctionReturn(0);
1244 }
1245 
1246 #undef __FUNCT__
1247 #define __FUNCT__ "MatView_MPIBAIJ"
1248 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1249 {
1250   PetscErrorCode ierr;
1251   PetscBool      iascii,isdraw,issocket,isbinary;
1252 
1253   PetscFunctionBegin;
1254   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1255   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1256   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1257   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1258   if (iascii || isdraw || issocket) {
1259     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1260   } else if (isbinary) {
1261     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1262   }
1263   PetscFunctionReturn(0);
1264 }
1265 
1266 #undef __FUNCT__
1267 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1268 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1269 {
1270   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1271   PetscErrorCode ierr;
1272 
1273   PetscFunctionBegin;
1274 #if defined(PETSC_USE_LOG)
1275   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1276 #endif
1277   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1278   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1279   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1280   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1281 #if defined(PETSC_USE_CTABLE)
1282   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1283 #else
1284   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1285 #endif
1286   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1287   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1288   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1289   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1290   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1291   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1292   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1293   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1294 
1295   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1296   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",NULL);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C","",NULL);CHKERRQ(ierr);
1304   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C","",NULL);CHKERRQ(ierr);
1305   PetscFunctionReturn(0);
1306 }
1307 
1308 #undef __FUNCT__
1309 #define __FUNCT__ "MatMult_MPIBAIJ"
1310 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1311 {
1312   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1313   PetscErrorCode ierr;
1314   PetscInt       nt;
1315 
1316   PetscFunctionBegin;
1317   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1318   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1319   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1320   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1321   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1322   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1323   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1324   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1325   PetscFunctionReturn(0);
1326 }
1327 
1328 #undef __FUNCT__
1329 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1330 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1331 {
1332   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1333   PetscErrorCode ierr;
1334 
1335   PetscFunctionBegin;
1336   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1337   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1338   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1339   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1340   PetscFunctionReturn(0);
1341 }
1342 
1343 #undef __FUNCT__
1344 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1345 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1346 {
1347   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1348   PetscErrorCode ierr;
1349   PetscBool      merged;
1350 
1351   PetscFunctionBegin;
1352   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1353   /* do nondiagonal part */
1354   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1355   if (!merged) {
1356     /* send it on its way */
1357     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1358     /* do local part */
1359     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1360     /* receive remote parts: note this assumes the values are not actually */
1361     /* inserted in yy until the next line */
1362     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1363   } else {
1364     /* do local part */
1365     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1366     /* send it on its way */
1367     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1368     /* values actually were received in the Begin() but we need to call this nop */
1369     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1370   }
1371   PetscFunctionReturn(0);
1372 }
1373 
1374 #undef __FUNCT__
1375 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1376 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1377 {
1378   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1379   PetscErrorCode ierr;
1380 
1381   PetscFunctionBegin;
1382   /* do nondiagonal part */
1383   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1384   /* send it on its way */
1385   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1386   /* do local part */
1387   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1388   /* receive remote parts: note this assumes the values are not actually */
1389   /* inserted in yy until the next line, which is true for my implementation*/
1390   /* but is not perhaps always true. */
1391   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1392   PetscFunctionReturn(0);
1393 }
1394 
1395 /*
1396   This only works correctly for square matrices where the subblock A->A is the
1397    diagonal block
1398 */
1399 #undef __FUNCT__
1400 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1401 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1402 {
1403   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1404   PetscErrorCode ierr;
1405 
1406   PetscFunctionBegin;
1407   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1408   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1409   PetscFunctionReturn(0);
1410 }
1411 
1412 #undef __FUNCT__
1413 #define __FUNCT__ "MatScale_MPIBAIJ"
1414 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1415 {
1416   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1417   PetscErrorCode ierr;
1418 
1419   PetscFunctionBegin;
1420   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1421   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1422   PetscFunctionReturn(0);
1423 }
1424 
1425 #undef __FUNCT__
1426 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1427 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1428 {
1429   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1430   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1431   PetscErrorCode ierr;
1432   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1433   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1434   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1435 
1436   PetscFunctionBegin;
1437   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1438   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1439   mat->getrowactive = PETSC_TRUE;
1440 
1441   if (!mat->rowvalues && (idx || v)) {
1442     /*
1443         allocate enough space to hold information from the longest row.
1444     */
1445     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1446     PetscInt    max = 1,mbs = mat->mbs,tmp;
1447     for (i=0; i<mbs; i++) {
1448       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1449       if (max < tmp) max = tmp;
1450     }
1451     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1452   }
1453   lrow = row - brstart;
1454 
1455   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1456   if (!v)   {pvA = 0; pvB = 0;}
1457   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1458   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1459   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1460   nztot = nzA + nzB;
1461 
1462   cmap = mat->garray;
1463   if (v  || idx) {
1464     if (nztot) {
1465       /* Sort by increasing column numbers, assuming A and B already sorted */
1466       PetscInt imark = -1;
1467       if (v) {
1468         *v = v_p = mat->rowvalues;
1469         for (i=0; i<nzB; i++) {
1470           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1471           else break;
1472         }
1473         imark = i;
1474         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1475         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1476       }
1477       if (idx) {
1478         *idx = idx_p = mat->rowindices;
1479         if (imark > -1) {
1480           for (i=0; i<imark; i++) {
1481             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1482           }
1483         } else {
1484           for (i=0; i<nzB; i++) {
1485             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1486             else break;
1487           }
1488           imark = i;
1489         }
1490         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1491         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1492       }
1493     } else {
1494       if (idx) *idx = 0;
1495       if (v)   *v   = 0;
1496     }
1497   }
1498   *nz  = nztot;
1499   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1500   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1501   PetscFunctionReturn(0);
1502 }
1503 
1504 #undef __FUNCT__
1505 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1506 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1507 {
1508   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1509 
1510   PetscFunctionBegin;
1511   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1512   baij->getrowactive = PETSC_FALSE;
1513   PetscFunctionReturn(0);
1514 }
1515 
1516 #undef __FUNCT__
1517 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1518 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1519 {
1520   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1521   PetscErrorCode ierr;
1522 
1523   PetscFunctionBegin;
1524   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1525   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1526   PetscFunctionReturn(0);
1527 }
1528 
1529 #undef __FUNCT__
1530 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1531 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1532 {
1533   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1534   Mat            A  = a->A,B = a->B;
1535   PetscErrorCode ierr;
1536   PetscReal      isend[5],irecv[5];
1537 
1538   PetscFunctionBegin;
1539   info->block_size = (PetscReal)matin->rmap->bs;
1540 
1541   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1542 
1543   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1544   isend[3] = info->memory;  isend[4] = info->mallocs;
1545 
1546   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1547 
1548   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1549   isend[3] += info->memory;  isend[4] += info->mallocs;
1550 
1551   if (flag == MAT_LOCAL) {
1552     info->nz_used      = isend[0];
1553     info->nz_allocated = isend[1];
1554     info->nz_unneeded  = isend[2];
1555     info->memory       = isend[3];
1556     info->mallocs      = isend[4];
1557   } else if (flag == MAT_GLOBAL_MAX) {
1558     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1559 
1560     info->nz_used      = irecv[0];
1561     info->nz_allocated = irecv[1];
1562     info->nz_unneeded  = irecv[2];
1563     info->memory       = irecv[3];
1564     info->mallocs      = irecv[4];
1565   } else if (flag == MAT_GLOBAL_SUM) {
1566     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1567 
1568     info->nz_used      = irecv[0];
1569     info->nz_allocated = irecv[1];
1570     info->nz_unneeded  = irecv[2];
1571     info->memory       = irecv[3];
1572     info->mallocs      = irecv[4];
1573   } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1574   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1575   info->fill_ratio_needed = 0;
1576   info->factor_mallocs    = 0;
1577   PetscFunctionReturn(0);
1578 }
1579 
1580 #undef __FUNCT__
1581 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1582 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1583 {
1584   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1585   PetscErrorCode ierr;
1586 
1587   PetscFunctionBegin;
1588   switch (op) {
1589   case MAT_NEW_NONZERO_LOCATIONS:
1590   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1591   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1592   case MAT_KEEP_NONZERO_PATTERN:
1593   case MAT_NEW_NONZERO_LOCATION_ERR:
1594     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1595     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1596     break;
1597   case MAT_ROW_ORIENTED:
1598     a->roworiented = flg;
1599 
1600     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1601     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1602     break;
1603   case MAT_NEW_DIAGONALS:
1604     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1605     break;
1606   case MAT_IGNORE_OFF_PROC_ENTRIES:
1607     a->donotstash = flg;
1608     break;
1609   case MAT_USE_HASH_TABLE:
1610     a->ht_flag = flg;
1611     break;
1612   case MAT_SYMMETRIC:
1613   case MAT_STRUCTURALLY_SYMMETRIC:
1614   case MAT_HERMITIAN:
1615   case MAT_SYMMETRY_ETERNAL:
1616     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1617     break;
1618   default:
1619     SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1620   }
1621   PetscFunctionReturn(0);
1622 }
1623 
1624 #undef __FUNCT__
1625 #define __FUNCT__ "MatTranspose_MPIBAIJ"
1626 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1627 {
1628   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1629   Mat_SeqBAIJ    *Aloc;
1630   Mat            B;
1631   PetscErrorCode ierr;
1632   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1633   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1634   MatScalar      *a;
1635 
1636   PetscFunctionBegin;
1637   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1638   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1639     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1640     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1641     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1642     /* Do not know preallocation information, but must set block size */
1643     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr);
1644   } else {
1645     B = *matout;
1646   }
1647 
1648   /* copy over the A part */
1649   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1650   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1651   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1652 
1653   for (i=0; i<mbs; i++) {
1654     rvals[0] = bs*(baij->rstartbs + i);
1655     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1656     for (j=ai[i]; j<ai[i+1]; j++) {
1657       col = (baij->cstartbs+aj[j])*bs;
1658       for (k=0; k<bs; k++) {
1659         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1660 
1661         col++; a += bs;
1662       }
1663     }
1664   }
1665   /* copy over the B part */
1666   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1667   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1668   for (i=0; i<mbs; i++) {
1669     rvals[0] = bs*(baij->rstartbs + i);
1670     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1671     for (j=ai[i]; j<ai[i+1]; j++) {
1672       col = baij->garray[aj[j]]*bs;
1673       for (k=0; k<bs; k++) {
1674         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1675         col++;
1676         a += bs;
1677       }
1678     }
1679   }
1680   ierr = PetscFree(rvals);CHKERRQ(ierr);
1681   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1682   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1683 
1684   if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B;
1685   else {
1686     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1687   }
1688   PetscFunctionReturn(0);
1689 }
1690 
1691 #undef __FUNCT__
1692 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1693 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1694 {
1695   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1696   Mat            a     = baij->A,b = baij->B;
1697   PetscErrorCode ierr;
1698   PetscInt       s1,s2,s3;
1699 
1700   PetscFunctionBegin;
1701   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1702   if (rr) {
1703     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1704     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1705     /* Overlap communication with computation. */
1706     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1707   }
1708   if (ll) {
1709     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1710     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1711     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1712   }
1713   /* scale  the diagonal block */
1714   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1715 
1716   if (rr) {
1717     /* Do a scatter end and then right scale the off-diagonal block */
1718     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1719     ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1720   }
1721   PetscFunctionReturn(0);
1722 }
1723 
1724 #undef __FUNCT__
1725 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1726 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1727 {
1728   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1729   PetscErrorCode    ierr;
1730   PetscMPIInt       imdex,size = l->size,n,rank = l->rank;
1731   PetscInt          i,*owners = A->rmap->range;
1732   PetscInt          *nprocs,j,idx,nsends,row;
1733   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
1734   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1735   PetscInt          *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1736   MPI_Comm          comm;
1737   MPI_Request       *send_waits,*recv_waits;
1738   MPI_Status        recv_status,*send_status;
1739   const PetscScalar *xx;
1740   PetscScalar       *bb;
1741 #if defined(PETSC_DEBUG)
1742   PetscBool         found = PETSC_FALSE;
1743 #endif
1744 
1745   PetscFunctionBegin;
1746   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1747   /*  first count number of contributors to each processor */
1748   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1749   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1750   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr);  /* see note*/
1751   j    = 0;
1752   for (i=0; i<N; i++) {
1753     if (lastidx > (idx = rows[i])) j = 0;
1754     lastidx = idx;
1755     for (; j<size; j++) {
1756       if (idx >= owners[j] && idx < owners[j+1]) {
1757         nprocs[2*j]++;
1758         nprocs[2*j+1] = 1;
1759         owner[i]      = j;
1760 #if defined(PETSC_DEBUG)
1761         found = PETSC_TRUE;
1762 #endif
1763         break;
1764       }
1765     }
1766 #if defined(PETSC_DEBUG)
1767     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1768     found = PETSC_FALSE;
1769 #endif
1770   }
1771   nsends = 0;  for (i=0; i<size; i++) nsends += nprocs[2*i+1];
1772 
1773   if (A->nooffproczerorows) {
1774     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
1775     nrecvs = nsends;
1776     nmax   = N;
1777   } else {
1778     /* inform other processors of number of messages and max length*/
1779     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1780   }
1781 
1782   /* post receives:   */
1783   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1784   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1785   for (i=0; i<nrecvs; i++) {
1786     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1787   }
1788 
1789   /* do sends:
1790      1) starts[i] gives the starting index in svalues for stuff going to
1791      the ith processor
1792   */
1793   ierr      = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1794   ierr      = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1795   ierr      = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1796   starts[0] = 0;
1797   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
1798   for (i=0; i<N; i++) {
1799     svalues[starts[owner[i]]++] = rows[i];
1800   }
1801 
1802   starts[0] = 0;
1803   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
1804   count = 0;
1805   for (i=0; i<size; i++) {
1806     if (nprocs[2*i+1]) {
1807       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1808     }
1809   }
1810   ierr = PetscFree(starts);CHKERRQ(ierr);
1811 
1812   base = owners[rank];
1813 
1814   /*  wait on receives */
1815   ierr  = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr);
1816   count = nrecvs;
1817   slen  = 0;
1818   while (count) {
1819     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1820     /* unpack receives into our local space */
1821     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1822 
1823     source[imdex] = recv_status.MPI_SOURCE;
1824     lens[imdex]   = n;
1825     slen         += n;
1826     count--;
1827   }
1828   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1829 
1830   /* move the data into the send scatter */
1831   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1832   count = 0;
1833   for (i=0; i<nrecvs; i++) {
1834     values = rvalues + i*nmax;
1835     for (j=0; j<lens[i]; j++) {
1836       lrows[count++] = values[j] - base;
1837     }
1838   }
1839   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1840   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1841   ierr = PetscFree(owner);CHKERRQ(ierr);
1842   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1843 
1844   /* fix right hand side if needed */
1845   if (x && b) {
1846     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1847     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1848     for (i=0; i<slen; i++) {
1849       bb[lrows[i]] = diag*xx[lrows[i]];
1850     }
1851     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1852     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1853   }
1854 
1855   /* actually zap the local rows */
1856   /*
1857         Zero the required rows. If the "diagonal block" of the matrix
1858      is square and the user wishes to set the diagonal we use separate
1859      code so that MatSetValues() is not called for each diagonal allocating
1860      new memory, thus calling lots of mallocs and slowing things down.
1861 
1862   */
1863   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1864   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1865   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1866     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
1867   } else if (diag != 0.0) {
1868     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1869     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1870        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1871     for (i=0; i<slen; i++) {
1872       row  = lrows[i] + rstart_bs;
1873       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1874     }
1875     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1876     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1877   } else {
1878     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1879   }
1880 
1881   ierr = PetscFree(lrows);CHKERRQ(ierr);
1882 
1883   /* wait on sends */
1884   if (nsends) {
1885     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1886     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1887     ierr = PetscFree(send_status);CHKERRQ(ierr);
1888   }
1889   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1890   ierr = PetscFree(svalues);CHKERRQ(ierr);
1891   PetscFunctionReturn(0);
1892 }
1893 
1894 #undef __FUNCT__
1895 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1896 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1897 {
1898   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1899   PetscErrorCode ierr;
1900 
1901   PetscFunctionBegin;
1902   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1903   PetscFunctionReturn(0);
1904 }
1905 
1906 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
1907 
1908 #undef __FUNCT__
1909 #define __FUNCT__ "MatEqual_MPIBAIJ"
1910 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1911 {
1912   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1913   Mat            a,b,c,d;
1914   PetscBool      flg;
1915   PetscErrorCode ierr;
1916 
1917   PetscFunctionBegin;
1918   a = matA->A; b = matA->B;
1919   c = matB->A; d = matB->B;
1920 
1921   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1922   if (flg) {
1923     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1924   }
1925   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1926   PetscFunctionReturn(0);
1927 }
1928 
1929 #undef __FUNCT__
1930 #define __FUNCT__ "MatCopy_MPIBAIJ"
1931 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1932 {
1933   PetscErrorCode ierr;
1934   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1935   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
1936 
1937   PetscFunctionBegin;
1938   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1939   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1940     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1941   } else {
1942     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1943     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1944   }
1945   PetscFunctionReturn(0);
1946 }
1947 
1948 #undef __FUNCT__
1949 #define __FUNCT__ "MatSetUp_MPIBAIJ"
1950 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1951 {
1952   PetscErrorCode ierr;
1953 
1954   PetscFunctionBegin;
1955   ierr =  MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1956   PetscFunctionReturn(0);
1957 }
1958 
1959 #undef __FUNCT__
1960 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1961 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1962 {
1963   PetscErrorCode ierr;
1964   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
1965   PetscBLASInt   bnz,one=1;
1966   Mat_SeqBAIJ    *x,*y;
1967 
1968   PetscFunctionBegin;
1969   if (str == SAME_NONZERO_PATTERN) {
1970     PetscScalar alpha = a;
1971     x    = (Mat_SeqBAIJ*)xx->A->data;
1972     y    = (Mat_SeqBAIJ*)yy->A->data;
1973     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
1974     PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1975     x    = (Mat_SeqBAIJ*)xx->B->data;
1976     y    = (Mat_SeqBAIJ*)yy->B->data;
1977     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
1978     PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1979   } else {
1980     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1981   }
1982   PetscFunctionReturn(0);
1983 }
1984 
1985 #undef __FUNCT__
1986 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1987 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1988 {
1989   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1990   PetscErrorCode ierr;
1991 
1992   PetscFunctionBegin;
1993   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1994   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1995   PetscFunctionReturn(0);
1996 }
1997 
1998 #undef __FUNCT__
1999 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
2000 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2001 {
2002   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2003   PetscErrorCode ierr;
2004 
2005   PetscFunctionBegin;
2006   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2007   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2008   PetscFunctionReturn(0);
2009 }
2010 
2011 #undef __FUNCT__
2012 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
2013 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2014 {
2015   PetscErrorCode ierr;
2016   IS             iscol_local;
2017   PetscInt       csize;
2018 
2019   PetscFunctionBegin;
2020   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2021   if (call == MAT_REUSE_MATRIX) {
2022     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2023     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2024   } else {
2025     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2026   }
2027   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2028   if (call == MAT_INITIAL_MATRIX) {
2029     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2030     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2031   }
2032   PetscFunctionReturn(0);
2033 }
2034 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*);
2035 #undef __FUNCT__
2036 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
2037 /*
2038   Not great since it makes two copies of the submatrix, first an SeqBAIJ
2039   in local and then by concatenating the local matrices the end result.
2040   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
2041 */
2042 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2043 {
2044   PetscErrorCode ierr;
2045   PetscMPIInt    rank,size;
2046   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2047   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow;
2048   Mat            M,Mreuse;
2049   MatScalar      *vwork,*aa;
2050   MPI_Comm       comm;
2051   IS             isrow_new, iscol_new;
2052   PetscBool      idflag,allrows, allcols;
2053   Mat_SeqBAIJ    *aij;
2054 
2055   PetscFunctionBegin;
2056   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2057   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2058   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2059   /* The compression and expansion should be avoided. Doesn't point
2060      out errors, might change the indices, hence buggey */
2061   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
2062   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
2063 
2064   /* Check for special case: each processor gets entire matrix columns */
2065   ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr);
2066   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
2067   if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE;
2068   else allcols = PETSC_FALSE;
2069 
2070   ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr);
2071   ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr);
2072   if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE;
2073   else allrows = PETSC_FALSE;
2074 
2075   if (call ==  MAT_REUSE_MATRIX) {
2076     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
2077     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2078     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2079   } else {
2080     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2081   }
2082   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
2083   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
2084   /*
2085       m - number of local rows
2086       n - number of columns (same on all processors)
2087       rstart - first row in new global matrix generated
2088   */
2089   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2090   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2091   m    = m/bs;
2092   n    = n/bs;
2093 
2094   if (call == MAT_INITIAL_MATRIX) {
2095     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2096     ii  = aij->i;
2097     jj  = aij->j;
2098 
2099     /*
2100         Determine the number of non-zeros in the diagonal and off-diagonal
2101         portions of the matrix in order to do correct preallocation
2102     */
2103 
2104     /* first get start and end of "diagonal" columns */
2105     if (csize == PETSC_DECIDE) {
2106       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2107       if (mglobal == n*bs) { /* square matrix */
2108         nlocal = m;
2109       } else {
2110         nlocal = n/size + ((n % size) > rank);
2111       }
2112     } else {
2113       nlocal = csize/bs;
2114     }
2115     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2116     rstart = rend - nlocal;
2117     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2118 
2119     /* next, compute all the lengths */
2120     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2121     olens = dlens + m;
2122     for (i=0; i<m; i++) {
2123       jend = ii[i+1] - ii[i];
2124       olen = 0;
2125       dlen = 0;
2126       for (j=0; j<jend; j++) {
2127         if (*jj < rstart || *jj >= rend) olen++;
2128         else dlen++;
2129         jj++;
2130       }
2131       olens[i] = olen;
2132       dlens[i] = dlen;
2133     }
2134     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2135     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2136     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2137     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2138     ierr = PetscFree(dlens);CHKERRQ(ierr);
2139   } else {
2140     PetscInt ml,nl;
2141 
2142     M    = *newmat;
2143     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2144     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2145     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2146     /*
2147          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2148        rather than the slower MatSetValues().
2149     */
2150     M->was_assembled = PETSC_TRUE;
2151     M->assembled     = PETSC_FALSE;
2152   }
2153   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2154   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2155   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
2156   ii   = aij->i;
2157   jj   = aij->j;
2158   aa   = aij->a;
2159   for (i=0; i<m; i++) {
2160     row   = rstart/bs + i;
2161     nz    = ii[i+1] - ii[i];
2162     cwork = jj;     jj += nz;
2163     vwork = aa;     aa += nz*bs*bs;
2164     ierr  = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2165   }
2166 
2167   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2168   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2169   *newmat = M;
2170 
2171   /* save submatrix used in processor for next request */
2172   if (call ==  MAT_INITIAL_MATRIX) {
2173     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2174     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2175   }
2176   PetscFunctionReturn(0);
2177 }
2178 
2179 #undef __FUNCT__
2180 #define __FUNCT__ "MatPermute_MPIBAIJ"
2181 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2182 {
2183   MPI_Comm       comm,pcomm;
2184   PetscInt       first,rlocal_size,clocal_size,nrows;
2185   const PetscInt *rows;
2186   PetscMPIInt    size;
2187   IS             crowp,growp,irowp,lrowp,lcolp;
2188   PetscErrorCode ierr;
2189 
2190   PetscFunctionBegin;
2191   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2192   /* make a collective version of 'rowp' */
2193   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2194   if (pcomm==comm) {
2195     crowp = rowp;
2196   } else {
2197     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2198     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2199     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2200     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2201   }
2202   /* collect the global row permutation and invert it */
2203   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
2204   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
2205   if (pcomm!=comm) {
2206     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2207   }
2208   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
2209   ierr = ISDestroy(&growp);CHKERRQ(ierr);
2210   /* get the local target indices */
2211   ierr = MatGetOwnershipRange(A,&first,NULL);CHKERRQ(ierr);
2212   ierr = MatGetLocalSize(A,&rlocal_size,&clocal_size);CHKERRQ(ierr);
2213   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
2214   ierr = ISCreateGeneral(MPI_COMM_SELF,rlocal_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
2215   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
2216   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
2217   /* the column permutation is so much easier;
2218      make a local version of 'colp' and invert it */
2219   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2220   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2221   if (size==1) {
2222     lcolp = colp;
2223   } else {
2224     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2225   }
2226   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2227   /* now we just get the submatrix */
2228   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2229   if (size>1) {
2230     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2231   }
2232   /* clean up */
2233   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
2234   PetscFunctionReturn(0);
2235 }
2236 
2237 #undef __FUNCT__
2238 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2239 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2240 {
2241   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2242   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
2243 
2244   PetscFunctionBegin;
2245   if (nghosts) *nghosts = B->nbs;
2246   if (ghosts) *ghosts = baij->garray;
2247   PetscFunctionReturn(0);
2248 }
2249 
2250 extern PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat);
2251 
2252 #undef __FUNCT__
2253 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ"
2254 /*
2255     This routine is almost identical to MatFDColoringCreate_MPIBAIJ()!
2256 */
2257 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
2258 {
2259   Mat_MPIBAIJ            *baij = (Mat_MPIBAIJ*)mat->data;
2260   PetscErrorCode         ierr;
2261   PetscMPIInt            size,*ncolsonproc,*disp,nn;
2262   PetscInt               bs,i,n,nrows,j,k,m,ncols,col;
2263   const PetscInt         *is,*rows = 0,*A_ci,*A_cj,*B_ci,*B_cj,*ltog;
2264   PetscInt               nis = iscoloring->n,nctot,*cols;
2265   PetscInt               *rowhit,M,cstart,cend,colb;
2266   PetscInt               *columnsforrow,l;
2267   IS                     *isa;
2268   PetscBool              done,flg;
2269   ISLocalToGlobalMapping map = mat->cmap->bmapping;
2270   PetscInt               ctype=c->ctype;
2271 
2272   PetscFunctionBegin;
2273   if (!mat->assembled) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();");
2274   if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock");
2275 
2276   if (map) {ierr = ISLocalToGlobalMappingGetIndices(map,&ltog);CHKERRQ(ierr);}
2277   else     ltog = NULL;
2278   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
2279   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2280 
2281   M         = mat->rmap->n/bs;
2282   cstart    = mat->cmap->rstart/bs;
2283   cend      = mat->cmap->rend/bs;
2284   c->M      = mat->rmap->N/bs;         /* set the global rows and columns and local rows */
2285   c->N      = mat->cmap->N/bs;
2286   c->m      = mat->rmap->n/bs;
2287   c->rstart = mat->rmap->rstart/bs;
2288 
2289   c->ncolors = nis;
2290   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
2291   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
2292   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
2293   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
2294   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
2295   ierr       = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
2296 
2297   /* Allow access to data structures of local part of matrix */
2298   if (!baij->colmap) {
2299     ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
2300   }
2301   ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2302   ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2303 
2304   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
2305   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
2306 
2307   for (i=0; i<nis; i++) {
2308     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
2309     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
2310 
2311     c->ncolumns[i] = n;
2312     if (n) {
2313       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
2314       ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr);
2315       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
2316     } else {
2317       c->columns[i] = 0;
2318     }
2319 
2320     if (ctype == IS_COLORING_GLOBAL) {
2321       /* Determine the total (parallel) number of columns of this color */
2322       ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
2323       ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr);
2324 
2325       ierr  = PetscMPIIntCast(n,&nn);CHKERRQ(ierr);
2326       ierr  = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2327       nctot = 0; for (j=0; j<size; j++) nctot += ncolsonproc[j];
2328       if (!nctot) {
2329         ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
2330       }
2331 
2332       disp[0] = 0;
2333       for (j=1; j<size; j++) {
2334         disp[j] = disp[j-1] + ncolsonproc[j-1];
2335       }
2336 
2337       /* Get complete list of columns for color on each processor */
2338       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2339       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2340       ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr);
2341     } else if (ctype == IS_COLORING_GHOSTED) {
2342       /* Determine local number of columns of this color on this process, including ghost points */
2343       nctot = n;
2344       ierr  = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2345       ierr  = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
2346     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
2347 
2348     /*
2349        Mark all rows affect by these columns
2350     */
2351     /* Temporary option to allow for debugging/testing */
2352     flg  = PETSC_FALSE;
2353     ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr);
2354     if (!flg) { /*-----------------------------------------------------------------------------*/
2355       /* crude, fast version */
2356       ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
2357       /* loop over columns*/
2358       for (j=0; j<nctot; j++) {
2359         if (ctype == IS_COLORING_GHOSTED) {
2360           col = ltog[cols[j]];
2361         } else {
2362           col = cols[j];
2363         }
2364         if (col >= cstart && col < cend) {
2365           /* column is in diagonal block of matrix */
2366           rows = A_cj + A_ci[col-cstart];
2367           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2368         } else {
2369 #if defined(PETSC_USE_CTABLE)
2370           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2371           colb--;
2372 #else
2373           colb = baij->colmap[col] - 1;
2374 #endif
2375           if (colb == -1) {
2376             m = 0;
2377           } else {
2378             colb = colb/bs;
2379             rows = B_cj + B_ci[colb];
2380             m    = B_ci[colb+1] - B_ci[colb];
2381           }
2382         }
2383         /* loop over columns marking them in rowhit */
2384         for (k=0; k<m; k++) {
2385           rowhit[*rows++] = col + 1;
2386         }
2387       }
2388 
2389       /* count the number of hits */
2390       nrows = 0;
2391       for (j=0; j<M; j++) {
2392         if (rowhit[j]) nrows++;
2393       }
2394       c->nrows[i] = nrows;
2395       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2396       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2397       ierr        = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2398       nrows       = 0;
2399       for (j=0; j<M; j++) {
2400         if (rowhit[j]) {
2401           c->rows[i][nrows]          = j;
2402           c->columnsforrow[i][nrows] = rowhit[j] - 1;
2403           nrows++;
2404         }
2405       }
2406     } else { /*-------------------------------------------------------------------------------*/
2407       /* slow version, using rowhit as a linked list */
2408       PetscInt currentcol,fm,mfm;
2409       rowhit[M] = M;
2410       nrows     = 0;
2411       /* loop over columns*/
2412       for (j=0; j<nctot; j++) {
2413         if (ctype == IS_COLORING_GHOSTED) {
2414           col = ltog[cols[j]];
2415         } else {
2416           col = cols[j];
2417         }
2418         if (col >= cstart && col < cend) {
2419           /* column is in diagonal block of matrix */
2420           rows = A_cj + A_ci[col-cstart];
2421           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2422         } else {
2423 #if defined(PETSC_USE_CTABLE)
2424           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2425           colb--;
2426 #else
2427           colb = baij->colmap[col] - 1;
2428 #endif
2429           if (colb == -1) {
2430             m = 0;
2431           } else {
2432             colb = colb/bs;
2433             rows = B_cj + B_ci[colb];
2434             m    = B_ci[colb+1] - B_ci[colb];
2435           }
2436         }
2437 
2438         /* loop over columns marking them in rowhit */
2439         fm = M;    /* fm points to first entry in linked list */
2440         for (k=0; k<m; k++) {
2441           currentcol = *rows++;
2442           /* is it already in the list? */
2443           do {
2444             mfm = fm;
2445             fm  = rowhit[fm];
2446           } while (fm < currentcol);
2447           /* not in list so add it */
2448           if (fm != currentcol) {
2449             nrows++;
2450             columnsforrow[currentcol] = col;
2451             /* next three lines insert new entry into linked list */
2452             rowhit[mfm]        = currentcol;
2453             rowhit[currentcol] = fm;
2454             fm                 = currentcol;
2455             /* fm points to present position in list since we know the columns are sorted */
2456           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
2457         }
2458       }
2459       c->nrows[i] = nrows;
2460       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2461       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2462       ierr        = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2463       /* now store the linked list of rows into c->rows[i] */
2464       nrows = 0;
2465       fm    = rowhit[M];
2466       do {
2467         c->rows[i][nrows]            = fm;
2468         c->columnsforrow[i][nrows++] = columnsforrow[fm];
2469         fm                           = rowhit[fm];
2470       } while (fm < M);
2471     } /* ---------------------------------------------------------------------------------------*/
2472     ierr = PetscFree(cols);CHKERRQ(ierr);
2473   }
2474 
2475   /* Optimize by adding the vscale, and scaleforrow[][] fields */
2476   /*
2477        vscale will contain the "diagonal" on processor scalings followed by the off processor
2478   */
2479   if (ctype == IS_COLORING_GLOBAL) {
2480     PetscInt *garray;
2481     ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2482     for (i=0; i<baij->B->cmap->n/bs; i++) {
2483       for (j=0; j<bs; j++) {
2484         garray[i*bs+j] = bs*baij->garray[i]+j;
2485       }
2486     }
2487     ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr);
2488     ierr = PetscFree(garray);CHKERRQ(ierr);
2489     CHKMEMQ;
2490     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2491     for (k=0; k<c->ncolors; k++) {
2492       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2493       for (l=0; l<c->nrows[k]; l++) {
2494         col = c->columnsforrow[k][l];
2495         if (col >= cstart && col < cend) {
2496           /* column is in diagonal block of matrix */
2497           colb = col - cstart;
2498         } else {
2499           /* column  is in "off-processor" part */
2500 #if defined(PETSC_USE_CTABLE)
2501           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2502           colb--;
2503 #else
2504           colb = baij->colmap[col] - 1;
2505 #endif
2506           colb  = colb/bs;
2507           colb += cend - cstart;
2508         }
2509         c->vscaleforrow[k][l] = colb;
2510       }
2511     }
2512   } else if (ctype == IS_COLORING_GHOSTED) {
2513     /* Get gtol mapping */
2514     PetscInt N = mat->cmap->N,nlocal,*gtol;
2515     ierr = PetscMalloc((N+1)*sizeof(PetscInt),&gtol);CHKERRQ(ierr);
2516     for (i=0; i<N; i++) gtol[i] = -1;
2517     ierr = ISLocalToGlobalMappingGetSize(map,&nlocal);CHKERRQ(ierr);
2518     for (i=0; i<nlocal; i++) gtol[ltog[i]] = i;
2519 
2520     c->vscale = 0; /* will be created in MatFDColoringApply() */
2521     ierr      = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2522     for (k=0; k<c->ncolors; k++) {
2523       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2524       for (l=0; l<c->nrows[k]; l++) {
2525         col = c->columnsforrow[k][l];      /* global column index */
2526 
2527         c->vscaleforrow[k][l] = gtol[col]; /* local column index */
2528       }
2529     }
2530     ierr = PetscFree(gtol);CHKERRQ(ierr);
2531   }
2532   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
2533 
2534   ierr = PetscFree(rowhit);CHKERRQ(ierr);
2535   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
2536   ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2537   ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2538   if (map) {ierr = ISLocalToGlobalMappingRestoreIndices(map,&ltog);CHKERRQ(ierr);}
2539   CHKMEMQ;
2540   PetscFunctionReturn(0);
2541 }
2542 
2543 #undef __FUNCT__
2544 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ"
2545 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2546 {
2547   Mat            B;
2548   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2549   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2550   Mat_SeqAIJ     *b;
2551   PetscErrorCode ierr;
2552   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2553   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2554   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2555 
2556   PetscFunctionBegin;
2557   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2558   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2559 
2560   /* ----------------------------------------------------------------
2561      Tell every processor the number of nonzeros per row
2562   */
2563   ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
2564   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2565     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2566   }
2567   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2568   ierr      = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr);
2569   displs    = recvcounts + size;
2570   for (i=0; i<size; i++) {
2571     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2572     displs[i]     = A->rmap->range[i]/bs;
2573   }
2574 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2575   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2576 #else
2577   ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2578 #endif
2579   /* ---------------------------------------------------------------
2580      Create the sequential matrix of the same type as the local block diagonal
2581   */
2582   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2583   ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2584   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2585   ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2586   b    = (Mat_SeqAIJ*)B->data;
2587 
2588   /*--------------------------------------------------------------------
2589     Copy my part of matrix column indices over
2590   */
2591   sendcount  = ad->nz + bd->nz;
2592   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2593   a_jsendbuf = ad->j;
2594   b_jsendbuf = bd->j;
2595   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2596   cnt        = 0;
2597   for (i=0; i<n; i++) {
2598 
2599     /* put in lower diagonal portion */
2600     m = bd->i[i+1] - bd->i[i];
2601     while (m > 0) {
2602       /* is it above diagonal (in bd (compressed) numbering) */
2603       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2604       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2605       m--;
2606     }
2607 
2608     /* put in diagonal portion */
2609     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2610       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2611     }
2612 
2613     /* put in upper diagonal portion */
2614     while (m-- > 0) {
2615       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2616     }
2617   }
2618   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2619 
2620   /*--------------------------------------------------------------------
2621     Gather all column indices to all processors
2622   */
2623   for (i=0; i<size; i++) {
2624     recvcounts[i] = 0;
2625     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2626       recvcounts[i] += lens[j];
2627     }
2628   }
2629   displs[0] = 0;
2630   for (i=1; i<size; i++) {
2631     displs[i] = displs[i-1] + recvcounts[i-1];
2632   }
2633 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2634   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2635 #else
2636   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2637 #endif
2638   /*--------------------------------------------------------------------
2639     Assemble the matrix into useable form (note numerical values not yet set)
2640   */
2641   /* set the b->ilen (length of each row) values */
2642   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2643   /* set the b->i indices */
2644   b->i[0] = 0;
2645   for (i=1; i<=A->rmap->N/bs; i++) {
2646     b->i[i] = b->i[i-1] + lens[i-1];
2647   }
2648   ierr = PetscFree(lens);CHKERRQ(ierr);
2649   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2650   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2651   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2652 
2653   if (A->symmetric) {
2654     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2655   } else if (A->hermitian) {
2656     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2657   } else if (A->structurally_symmetric) {
2658     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2659   }
2660   *newmat = B;
2661   PetscFunctionReturn(0);
2662 }
2663 
2664 #undef __FUNCT__
2665 #define __FUNCT__ "MatSOR_MPIBAIJ"
2666 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2667 {
2668   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2669   PetscErrorCode ierr;
2670   Vec            bb1 = 0;
2671 
2672   PetscFunctionBegin;
2673   if (flag == SOR_APPLY_UPPER) {
2674     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2675     PetscFunctionReturn(0);
2676   }
2677 
2678   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2679     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2680   }
2681 
2682   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2683     if (flag & SOR_ZERO_INITIAL_GUESS) {
2684       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2685       its--;
2686     }
2687 
2688     while (its--) {
2689       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2690       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2691 
2692       /* update rhs: bb1 = bb - B*x */
2693       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2694       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2695 
2696       /* local sweep */
2697       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2698     }
2699   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2700     if (flag & SOR_ZERO_INITIAL_GUESS) {
2701       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2702       its--;
2703     }
2704     while (its--) {
2705       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2706       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2707 
2708       /* update rhs: bb1 = bb - B*x */
2709       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2710       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2711 
2712       /* local sweep */
2713       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2714     }
2715   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2716     if (flag & SOR_ZERO_INITIAL_GUESS) {
2717       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2718       its--;
2719     }
2720     while (its--) {
2721       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2722       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2723 
2724       /* update rhs: bb1 = bb - B*x */
2725       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2726       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2727 
2728       /* local sweep */
2729       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2730     }
2731   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2732 
2733   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2734   PetscFunctionReturn(0);
2735 }
2736 
2737 extern PetscErrorCode  MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2738 
2739 #undef __FUNCT__
2740 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ"
2741 PetscErrorCode  MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2742 {
2743   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2744   PetscErrorCode ierr;
2745 
2746   PetscFunctionBegin;
2747   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2748   PetscFunctionReturn(0);
2749 }
2750 
2751 
2752 /* -------------------------------------------------------------------*/
2753 static struct _MatOps MatOps_Values = {
2754        MatSetValues_MPIBAIJ,
2755        MatGetRow_MPIBAIJ,
2756        MatRestoreRow_MPIBAIJ,
2757        MatMult_MPIBAIJ,
2758 /* 4*/ MatMultAdd_MPIBAIJ,
2759        MatMultTranspose_MPIBAIJ,
2760        MatMultTransposeAdd_MPIBAIJ,
2761        0,
2762        0,
2763        0,
2764 /*10*/ 0,
2765        0,
2766        0,
2767        MatSOR_MPIBAIJ,
2768        MatTranspose_MPIBAIJ,
2769 /*15*/ MatGetInfo_MPIBAIJ,
2770        MatEqual_MPIBAIJ,
2771        MatGetDiagonal_MPIBAIJ,
2772        MatDiagonalScale_MPIBAIJ,
2773        MatNorm_MPIBAIJ,
2774 /*20*/ MatAssemblyBegin_MPIBAIJ,
2775        MatAssemblyEnd_MPIBAIJ,
2776        MatSetOption_MPIBAIJ,
2777        MatZeroEntries_MPIBAIJ,
2778 /*24*/ MatZeroRows_MPIBAIJ,
2779        0,
2780        0,
2781        0,
2782        0,
2783 /*29*/ MatSetUp_MPIBAIJ,
2784        0,
2785        0,
2786        0,
2787        0,
2788 /*34*/ MatDuplicate_MPIBAIJ,
2789        0,
2790        0,
2791        0,
2792        0,
2793 /*39*/ MatAXPY_MPIBAIJ,
2794        MatGetSubMatrices_MPIBAIJ,
2795        MatIncreaseOverlap_MPIBAIJ,
2796        MatGetValues_MPIBAIJ,
2797        MatCopy_MPIBAIJ,
2798 /*44*/ 0,
2799        MatScale_MPIBAIJ,
2800        0,
2801        0,
2802        0,
2803 /*49*/ 0,
2804        0,
2805        0,
2806        0,
2807        0,
2808 /*54*/ MatFDColoringCreate_MPIBAIJ,
2809        0,
2810        MatSetUnfactored_MPIBAIJ,
2811        MatPermute_MPIBAIJ,
2812        MatSetValuesBlocked_MPIBAIJ,
2813 /*59*/ MatGetSubMatrix_MPIBAIJ,
2814        MatDestroy_MPIBAIJ,
2815        MatView_MPIBAIJ,
2816        0,
2817        0,
2818 /*64*/ 0,
2819        0,
2820        0,
2821        0,
2822        0,
2823 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2824        0,
2825        0,
2826        0,
2827        0,
2828 /*74*/ 0,
2829        MatFDColoringApply_BAIJ,
2830        0,
2831        0,
2832        0,
2833 /*79*/ 0,
2834        0,
2835        0,
2836        0,
2837        MatLoad_MPIBAIJ,
2838 /*84*/ 0,
2839        0,
2840        0,
2841        0,
2842        0,
2843 /*89*/ 0,
2844        0,
2845        0,
2846        0,
2847        0,
2848 /*94*/ 0,
2849        0,
2850        0,
2851        0,
2852        0,
2853 /*99*/ 0,
2854        0,
2855        0,
2856        0,
2857        0,
2858 /*104*/0,
2859        MatRealPart_MPIBAIJ,
2860        MatImaginaryPart_MPIBAIJ,
2861        0,
2862        0,
2863 /*109*/0,
2864        0,
2865        0,
2866        0,
2867        0,
2868 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2869        0,
2870        MatGetGhosts_MPIBAIJ,
2871        0,
2872        0,
2873 /*119*/0,
2874        0,
2875        0,
2876        0,
2877        0,
2878 /*124*/0,
2879        0,
2880        MatInvertBlockDiagonal_MPIBAIJ
2881 };
2882 
2883 #undef __FUNCT__
2884 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2885 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2886 {
2887   PetscFunctionBegin;
2888   *a = ((Mat_MPIBAIJ*)A->data)->A;
2889   PetscFunctionReturn(0);
2890 }
2891 
2892 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2893 
2894 #undef __FUNCT__
2895 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2896 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2897 {
2898   PetscInt       m,rstart,cstart,cend;
2899   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2900   const PetscInt *JJ    =0;
2901   PetscScalar    *values=0;
2902   PetscErrorCode ierr;
2903 
2904   PetscFunctionBegin;
2905   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2906   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2907   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2908   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2909   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2910   m      = B->rmap->n/bs;
2911   rstart = B->rmap->rstart/bs;
2912   cstart = B->cmap->rstart/bs;
2913   cend   = B->cmap->rend/bs;
2914 
2915   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2916   ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
2917   for (i=0; i<m; i++) {
2918     nz = ii[i+1] - ii[i];
2919     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2920     nz_max = PetscMax(nz_max,nz);
2921     JJ     = jj + ii[i];
2922     for (j=0; j<nz; j++) {
2923       if (*JJ >= cstart) break;
2924       JJ++;
2925     }
2926     d = 0;
2927     for (; j<nz; j++) {
2928       if (*JJ++ >= cend) break;
2929       d++;
2930     }
2931     d_nnz[i] = d;
2932     o_nnz[i] = nz - d;
2933   }
2934   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2935   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2936 
2937   values = (PetscScalar*)V;
2938   if (!values) {
2939     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2940     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2941   }
2942   for (i=0; i<m; i++) {
2943     PetscInt          row    = i + rstart;
2944     PetscInt          ncols  = ii[i+1] - ii[i];
2945     const PetscInt    *icols = jj + ii[i];
2946     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2947     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2948   }
2949 
2950   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2951   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2952   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2953   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2954   PetscFunctionReturn(0);
2955 }
2956 
2957 #undef __FUNCT__
2958 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2959 /*@C
2960    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2961    (the default parallel PETSc format).
2962 
2963    Collective on MPI_Comm
2964 
2965    Input Parameters:
2966 +  A - the matrix
2967 .  bs - the block size
2968 .  i - the indices into j for the start of each local row (starts with zero)
2969 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2970 -  v - optional values in the matrix
2971 
2972    Level: developer
2973 
2974 .keywords: matrix, aij, compressed row, sparse, parallel
2975 
2976 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2977 @*/
2978 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2979 {
2980   PetscErrorCode ierr;
2981 
2982   PetscFunctionBegin;
2983   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2984   PetscValidType(B,1);
2985   PetscValidLogicalCollectiveInt(B,bs,2);
2986   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2987   PetscFunctionReturn(0);
2988 }
2989 
2990 #undef __FUNCT__
2991 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2992 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2993 {
2994   Mat_MPIBAIJ    *b;
2995   PetscErrorCode ierr;
2996   PetscInt       i;
2997   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
2998 
2999   PetscFunctionBegin;
3000   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3001   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3002 
3003   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3004   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3005   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3006   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3007 
3008   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3009   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3010   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3011   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3012   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3013 
3014   if (d_nnz) {
3015     for (i=0; i<B->rmap->n/bs; i++) {
3016       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
3017     }
3018   }
3019   if (o_nnz) {
3020     for (i=0; i<B->rmap->n/bs; i++) {
3021       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
3022     }
3023   }
3024 
3025   b      = (Mat_MPIBAIJ*)B->data;
3026   b->bs2 = bs*bs;
3027   b->mbs = B->rmap->n/bs;
3028   b->nbs = B->cmap->n/bs;
3029   b->Mbs = B->rmap->N/bs;
3030   b->Nbs = B->cmap->N/bs;
3031 
3032   for (i=0; i<=b->size; i++) {
3033     b->rangebs[i] = B->rmap->range[i]/bs;
3034   }
3035   b->rstartbs = B->rmap->rstart/bs;
3036   b->rendbs   = B->rmap->rend/bs;
3037   b->cstartbs = B->cmap->rstart/bs;
3038   b->cendbs   = B->cmap->rend/bs;
3039 
3040   if (!B->preallocated) {
3041     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3042     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3043     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
3044     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3045     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3046     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3047     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
3048     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3049     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
3050   }
3051 
3052   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3053   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
3054   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3055   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3056   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3057   B->preallocated = PETSC_TRUE;
3058   PetscFunctionReturn(0);
3059 }
3060 
3061 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
3062 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
3063 
3064 #undef __FUNCT__
3065 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
3066 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
3067 {
3068   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
3069   PetscErrorCode ierr;
3070   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
3071   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
3072   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
3073 
3074   PetscFunctionBegin;
3075   ierr  = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr);
3076   ii[0] = 0;
3077   CHKMEMQ;
3078   for (i=0; i<M; i++) {
3079     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
3080     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
3081     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
3082     /* remove one from count of matrix has diagonal */
3083     for (j=id[i]; j<id[i+1]; j++) {
3084       if (jd[j] == i) {ii[i+1]--;break;}
3085     }
3086     CHKMEMQ;
3087   }
3088   ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3089   cnt  = 0;
3090   for (i=0; i<M; i++) {
3091     for (j=io[i]; j<io[i+1]; j++) {
3092       if (garray[jo[j]] > rstart) break;
3093       jj[cnt++] = garray[jo[j]];
3094       CHKMEMQ;
3095     }
3096     for (k=id[i]; k<id[i+1]; k++) {
3097       if (jd[k] != i) {
3098         jj[cnt++] = rstart + jd[k];
3099         CHKMEMQ;
3100       }
3101     }
3102     for (; j<io[i+1]; j++) {
3103       jj[cnt++] = garray[jo[j]];
3104       CHKMEMQ;
3105     }
3106   }
3107   ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr);
3108   PetscFunctionReturn(0);
3109 }
3110 
3111 #include <../src/mat/impls/aij/mpi/mpiaij.h>
3112 
3113 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
3114 
3115 #undef __FUNCT__
3116 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ"
3117 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
3118 {
3119   PetscErrorCode ierr;
3120   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3121   Mat            B;
3122   Mat_MPIAIJ     *b;
3123 
3124   PetscFunctionBegin;
3125   if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
3126 
3127   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3128   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3129   ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
3130   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
3131   ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
3132   b    = (Mat_MPIAIJ*) B->data;
3133 
3134   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
3135   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
3136   ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
3137   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
3138   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
3139   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3140   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3141   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3142   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3143   if (reuse == MAT_REUSE_MATRIX) {
3144     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
3145   } else {
3146    *newmat = B;
3147   }
3148   PetscFunctionReturn(0);
3149 }
3150 
3151 #if defined(PETSC_HAVE_MUMPS)
3152 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3153 #endif
3154 
3155 /*MC
3156    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
3157 
3158    Options Database Keys:
3159 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
3160 . -mat_block_size <bs> - set the blocksize used to store the matrix
3161 - -mat_use_hash_table <fact>
3162 
3163   Level: beginner
3164 
3165 .seealso: MatCreateMPIBAIJ
3166 M*/
3167 
3168 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
3169 
3170 #undef __FUNCT__
3171 #define __FUNCT__ "MatCreate_MPIBAIJ"
3172 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
3173 {
3174   Mat_MPIBAIJ    *b;
3175   PetscErrorCode ierr;
3176   PetscBool      flg;
3177 
3178   PetscFunctionBegin;
3179   ierr    = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
3180   B->data = (void*)b;
3181 
3182   ierr         = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3183   B->assembled = PETSC_FALSE;
3184 
3185   B->insertmode = NOT_SET_VALUES;
3186   ierr          = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
3187   ierr          = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
3188 
3189   /* build local table of row and column ownerships */
3190   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
3191 
3192   /* build cache for off array entries formed */
3193   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
3194 
3195   b->donotstash  = PETSC_FALSE;
3196   b->colmap      = NULL;
3197   b->garray      = NULL;
3198   b->roworiented = PETSC_TRUE;
3199 
3200   /* stuff used in block assembly */
3201   b->barray = 0;
3202 
3203   /* stuff used for matrix vector multiply */
3204   b->lvec  = 0;
3205   b->Mvctx = 0;
3206 
3207   /* stuff for MatGetRow() */
3208   b->rowindices   = 0;
3209   b->rowvalues    = 0;
3210   b->getrowactive = PETSC_FALSE;
3211 
3212   /* hash table stuff */
3213   b->ht           = 0;
3214   b->hd           = 0;
3215   b->ht_size      = 0;
3216   b->ht_flag      = PETSC_FALSE;
3217   b->ht_fact      = 0;
3218   b->ht_total_ct  = 0;
3219   b->ht_insert_ct = 0;
3220 
3221   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
3222   b->ijonly = PETSC_FALSE;
3223 
3224   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3225   ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr);
3226   if (flg) {
3227     PetscReal fact = 1.39;
3228     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3229     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
3230     if (fact <= 1.0) fact = 1.39;
3231     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3232     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3233   }
3234   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3235 
3236 #if defined(PETSC_HAVE_MUMPS)
3237   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr);
3238 #endif
3239   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C","MatConvert_MPIBAIJ_MPIAdj",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3240   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C","MatConvert_MPIBAIJ_MPIAIJ",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3241   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C","MatConvert_MPIBAIJ_MPISBAIJ",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3242   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C","MatStoreValues_MPIBAIJ",MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3243   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C","MatRetrieveValues_MPIBAIJ",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3244   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C","MatGetDiagonalBlock_MPIBAIJ",MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3245   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C","MatMPIBAIJSetPreallocation_MPIBAIJ",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3246   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C","MatMPIBAIJSetPreallocationCSR_MPIBAIJ",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3247   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C","MatDiagonalScaleLocal_MPIBAIJ",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3248   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C","MatSetHashTableFactor_MPIBAIJ",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3249   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C","MatConvert_MPIBAIJ_MPIBSTRM",MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr);
3250   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3251   PetscFunctionReturn(0);
3252 }
3253 
3254 /*MC
3255    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3256 
3257    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3258    and MATMPIBAIJ otherwise.
3259 
3260    Options Database Keys:
3261 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3262 
3263   Level: beginner
3264 
3265 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3266 M*/
3267 
3268 #undef __FUNCT__
3269 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3270 /*@C
3271    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3272    (block compressed row).  For good matrix assembly performance
3273    the user should preallocate the matrix storage by setting the parameters
3274    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3275    performance can be increased by more than a factor of 50.
3276 
3277    Collective on Mat
3278 
3279    Input Parameters:
3280 +  A - the matrix
3281 .  bs   - size of blockk
3282 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3283            submatrix  (same for all local rows)
3284 .  d_nnz - array containing the number of block nonzeros in the various block rows
3285            of the in diagonal portion of the local (possibly different for each block
3286            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3287            set it even if it is zero.
3288 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3289            submatrix (same for all local rows).
3290 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3291            off-diagonal portion of the local submatrix (possibly different for
3292            each block row) or NULL.
3293 
3294    If the *_nnz parameter is given then the *_nz parameter is ignored
3295 
3296    Options Database Keys:
3297 +   -mat_block_size - size of the blocks to use
3298 -   -mat_use_hash_table <fact>
3299 
3300    Notes:
3301    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3302    than it must be used on all processors that share the object for that argument.
3303 
3304    Storage Information:
3305    For a square global matrix we define each processor's diagonal portion
3306    to be its local rows and the corresponding columns (a square submatrix);
3307    each processor's off-diagonal portion encompasses the remainder of the
3308    local matrix (a rectangular submatrix).
3309 
3310    The user can specify preallocated storage for the diagonal part of
3311    the local submatrix with either d_nz or d_nnz (not both).  Set
3312    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3313    memory allocation.  Likewise, specify preallocated storage for the
3314    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3315 
3316    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3317    the figure below we depict these three local rows and all columns (0-11).
3318 
3319 .vb
3320            0 1 2 3 4 5 6 7 8 9 10 11
3321           -------------------
3322    row 3  |  o o o d d d o o o o o o
3323    row 4  |  o o o d d d o o o o o o
3324    row 5  |  o o o d d d o o o o o o
3325           -------------------
3326 .ve
3327 
3328    Thus, any entries in the d locations are stored in the d (diagonal)
3329    submatrix, and any entries in the o locations are stored in the
3330    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3331    stored simply in the MATSEQBAIJ format for compressed row storage.
3332 
3333    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3334    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3335    In general, for PDE problems in which most nonzeros are near the diagonal,
3336    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3337    or you will get TERRIBLE performance; see the users' manual chapter on
3338    matrices.
3339 
3340    You can call MatGetInfo() to get information on how effective the preallocation was;
3341    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3342    You can also run with the option -info and look for messages with the string
3343    malloc in them to see if additional memory allocation was needed.
3344 
3345    Level: intermediate
3346 
3347 .keywords: matrix, block, aij, compressed row, sparse, parallel
3348 
3349 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3350 @*/
3351 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3352 {
3353   PetscErrorCode ierr;
3354 
3355   PetscFunctionBegin;
3356   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3357   PetscValidType(B,1);
3358   PetscValidLogicalCollectiveInt(B,bs,2);
3359   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3360   PetscFunctionReturn(0);
3361 }
3362 
3363 #undef __FUNCT__
3364 #define __FUNCT__ "MatCreateBAIJ"
3365 /*@C
3366    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3367    (block compressed row).  For good matrix assembly performance
3368    the user should preallocate the matrix storage by setting the parameters
3369    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3370    performance can be increased by more than a factor of 50.
3371 
3372    Collective on MPI_Comm
3373 
3374    Input Parameters:
3375 +  comm - MPI communicator
3376 .  bs   - size of blockk
3377 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3378            This value should be the same as the local size used in creating the
3379            y vector for the matrix-vector product y = Ax.
3380 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3381            This value should be the same as the local size used in creating the
3382            x vector for the matrix-vector product y = Ax.
3383 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3384 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3385 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3386            submatrix  (same for all local rows)
3387 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3388            of the in diagonal portion of the local (possibly different for each block
3389            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3390            and set it even if it is zero.
3391 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3392            submatrix (same for all local rows).
3393 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3394            off-diagonal portion of the local submatrix (possibly different for
3395            each block row) or NULL.
3396 
3397    Output Parameter:
3398 .  A - the matrix
3399 
3400    Options Database Keys:
3401 +   -mat_block_size - size of the blocks to use
3402 -   -mat_use_hash_table <fact>
3403 
3404    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3405    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3406    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3407 
3408    Notes:
3409    If the *_nnz parameter is given then the *_nz parameter is ignored
3410 
3411    A nonzero block is any block that as 1 or more nonzeros in it
3412 
3413    The user MUST specify either the local or global matrix dimensions
3414    (possibly both).
3415 
3416    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3417    than it must be used on all processors that share the object for that argument.
3418 
3419    Storage Information:
3420    For a square global matrix we define each processor's diagonal portion
3421    to be its local rows and the corresponding columns (a square submatrix);
3422    each processor's off-diagonal portion encompasses the remainder of the
3423    local matrix (a rectangular submatrix).
3424 
3425    The user can specify preallocated storage for the diagonal part of
3426    the local submatrix with either d_nz or d_nnz (not both).  Set
3427    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3428    memory allocation.  Likewise, specify preallocated storage for the
3429    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3430 
3431    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3432    the figure below we depict these three local rows and all columns (0-11).
3433 
3434 .vb
3435            0 1 2 3 4 5 6 7 8 9 10 11
3436           -------------------
3437    row 3  |  o o o d d d o o o o o o
3438    row 4  |  o o o d d d o o o o o o
3439    row 5  |  o o o d d d o o o o o o
3440           -------------------
3441 .ve
3442 
3443    Thus, any entries in the d locations are stored in the d (diagonal)
3444    submatrix, and any entries in the o locations are stored in the
3445    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3446    stored simply in the MATSEQBAIJ format for compressed row storage.
3447 
3448    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3449    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3450    In general, for PDE problems in which most nonzeros are near the diagonal,
3451    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3452    or you will get TERRIBLE performance; see the users' manual chapter on
3453    matrices.
3454 
3455    Level: intermediate
3456 
3457 .keywords: matrix, block, aij, compressed row, sparse, parallel
3458 
3459 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3460 @*/
3461 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3462 {
3463   PetscErrorCode ierr;
3464   PetscMPIInt    size;
3465 
3466   PetscFunctionBegin;
3467   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3468   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3469   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3470   if (size > 1) {
3471     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3472     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3473   } else {
3474     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3475     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3476   }
3477   PetscFunctionReturn(0);
3478 }
3479 
3480 #undef __FUNCT__
3481 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3482 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3483 {
3484   Mat            mat;
3485   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3486   PetscErrorCode ierr;
3487   PetscInt       len=0;
3488 
3489   PetscFunctionBegin;
3490   *newmat = 0;
3491   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3492   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3493   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3494   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3495 
3496   mat->factortype   = matin->factortype;
3497   mat->preallocated = PETSC_TRUE;
3498   mat->assembled    = PETSC_TRUE;
3499   mat->insertmode   = NOT_SET_VALUES;
3500 
3501   a             = (Mat_MPIBAIJ*)mat->data;
3502   mat->rmap->bs = matin->rmap->bs;
3503   a->bs2        = oldmat->bs2;
3504   a->mbs        = oldmat->mbs;
3505   a->nbs        = oldmat->nbs;
3506   a->Mbs        = oldmat->Mbs;
3507   a->Nbs        = oldmat->Nbs;
3508 
3509   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3510   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3511 
3512   a->size         = oldmat->size;
3513   a->rank         = oldmat->rank;
3514   a->donotstash   = oldmat->donotstash;
3515   a->roworiented  = oldmat->roworiented;
3516   a->rowindices   = 0;
3517   a->rowvalues    = 0;
3518   a->getrowactive = PETSC_FALSE;
3519   a->barray       = 0;
3520   a->rstartbs     = oldmat->rstartbs;
3521   a->rendbs       = oldmat->rendbs;
3522   a->cstartbs     = oldmat->cstartbs;
3523   a->cendbs       = oldmat->cendbs;
3524 
3525   /* hash table stuff */
3526   a->ht           = 0;
3527   a->hd           = 0;
3528   a->ht_size      = 0;
3529   a->ht_flag      = oldmat->ht_flag;
3530   a->ht_fact      = oldmat->ht_fact;
3531   a->ht_total_ct  = 0;
3532   a->ht_insert_ct = 0;
3533 
3534   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3535   if (oldmat->colmap) {
3536 #if defined(PETSC_USE_CTABLE)
3537     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3538 #else
3539     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3540     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3541     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3542 #endif
3543   } else a->colmap = 0;
3544 
3545   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3546     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3547     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3548     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3549   } else a->garray = 0;
3550 
3551   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3552   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3553   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3554   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3555   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3556 
3557   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3558   ierr    = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3559   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3560   ierr    = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3561   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3562   *newmat = mat;
3563   PetscFunctionReturn(0);
3564 }
3565 
3566 #undef __FUNCT__
3567 #define __FUNCT__ "MatLoad_MPIBAIJ"
3568 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3569 {
3570   PetscErrorCode ierr;
3571   int            fd;
3572   PetscInt       i,nz,j,rstart,rend;
3573   PetscScalar    *vals,*buf;
3574   MPI_Comm       comm;
3575   MPI_Status     status;
3576   PetscMPIInt    rank,size,maxnz;
3577   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3578   PetscInt       *locrowlens = NULL,*procsnz = NULL,*browners = NULL;
3579   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3580   PetscMPIInt    tag    = ((PetscObject)viewer)->tag;
3581   PetscInt       *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount;
3582   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3583 
3584   PetscFunctionBegin;
3585   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3586   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3587   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3588   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3589 
3590   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3591   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3592   if (!rank) {
3593     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3594     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3595     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3596   }
3597 
3598   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3599 
3600   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3601   M    = header[1]; N = header[2];
3602 
3603   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3604   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3605   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3606 
3607   /* If global sizes are set, check if they are consistent with that given in the file */
3608   if (sizesset) {
3609     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3610   }
3611   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3612   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3613 
3614   if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices");
3615 
3616   /*
3617      This code adds extra rows to make sure the number of rows is
3618      divisible by the blocksize
3619   */
3620   Mbs        = M/bs;
3621   extra_rows = bs - M + bs*Mbs;
3622   if (extra_rows == bs) extra_rows = 0;
3623   else                  Mbs++;
3624   if (extra_rows && !rank) {
3625     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3626   }
3627 
3628   /* determine ownership of all rows */
3629   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3630     mbs = Mbs/size + ((Mbs % size) > rank);
3631     m   = mbs*bs;
3632   } else { /* User set */
3633     m   = newmat->rmap->n;
3634     mbs = m/bs;
3635   }
3636   ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
3637   ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3638 
3639   /* process 0 needs enough room for process with most rows */
3640   if (!rank) {
3641     mmax = rowners[1];
3642     for (i=2; i<=size; i++) {
3643       mmax = PetscMax(mmax,rowners[i]);
3644     }
3645     mmax*=bs;
3646   } else mmax = m;
3647 
3648   rowners[0] = 0;
3649   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
3650   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
3651   rstart = rowners[rank];
3652   rend   = rowners[rank+1];
3653 
3654   /* distribute row lengths to all processors */
3655   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
3656   if (!rank) {
3657     mend = m;
3658     if (size == 1) mend = mend - extra_rows;
3659     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3660     for (j=mend; j<m; j++) locrowlens[j] = 1;
3661     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3662     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3663     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3664     for (j=0; j<m; j++) {
3665       procsnz[0] += locrowlens[j];
3666     }
3667     for (i=1; i<size; i++) {
3668       mend = browners[i+1] - browners[i];
3669       if (i == size-1) mend = mend - extra_rows;
3670       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3671       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3672       /* calculate the number of nonzeros on each processor */
3673       for (j=0; j<browners[i+1]-browners[i]; j++) {
3674         procsnz[i] += rowlengths[j];
3675       }
3676       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3677     }
3678     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3679   } else {
3680     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3681   }
3682 
3683   if (!rank) {
3684     /* determine max buffer needed and allocate it */
3685     maxnz = procsnz[0];
3686     for (i=1; i<size; i++) {
3687       maxnz = PetscMax(maxnz,procsnz[i]);
3688     }
3689     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3690 
3691     /* read in my part of the matrix column indices  */
3692     nz     = procsnz[0];
3693     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3694     mycols = ibuf;
3695     if (size == 1) nz -= extra_rows;
3696     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3697     if (size == 1) {
3698       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
3699     }
3700 
3701     /* read in every ones (except the last) and ship off */
3702     for (i=1; i<size-1; i++) {
3703       nz   = procsnz[i];
3704       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3705       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3706     }
3707     /* read in the stuff for the last proc */
3708     if (size != 1) {
3709       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3710       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3711       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3712       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3713     }
3714     ierr = PetscFree(cols);CHKERRQ(ierr);
3715   } else {
3716     /* determine buffer space needed for message */
3717     nz = 0;
3718     for (i=0; i<m; i++) {
3719       nz += locrowlens[i];
3720     }
3721     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3722     mycols = ibuf;
3723     /* receive message of column indices*/
3724     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3725     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3726     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3727   }
3728 
3729   /* loop over local rows, determining number of off diagonal entries */
3730   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
3731   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
3732   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3733   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3734   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3735   rowcount = 0; nzcount = 0;
3736   for (i=0; i<mbs; i++) {
3737     dcount  = 0;
3738     odcount = 0;
3739     for (j=0; j<bs; j++) {
3740       kmax = locrowlens[rowcount];
3741       for (k=0; k<kmax; k++) {
3742         tmp = mycols[nzcount++]/bs;
3743         if (!mask[tmp]) {
3744           mask[tmp] = 1;
3745           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3746           else masked1[dcount++] = tmp;
3747         }
3748       }
3749       rowcount++;
3750     }
3751 
3752     dlens[i]  = dcount;
3753     odlens[i] = odcount;
3754 
3755     /* zero out the mask elements we set */
3756     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3757     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3758   }
3759 
3760 
3761   if (!sizesset) {
3762     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3763   }
3764   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3765 
3766   if (!rank) {
3767     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3768     /* read in my part of the matrix numerical values  */
3769     nz     = procsnz[0];
3770     vals   = buf;
3771     mycols = ibuf;
3772     if (size == 1) nz -= extra_rows;
3773     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3774     if (size == 1) {
3775       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
3776     }
3777 
3778     /* insert into matrix */
3779     jj = rstart*bs;
3780     for (i=0; i<m; i++) {
3781       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3782       mycols += locrowlens[i];
3783       vals   += locrowlens[i];
3784       jj++;
3785     }
3786     /* read in other processors (except the last one) and ship out */
3787     for (i=1; i<size-1; i++) {
3788       nz   = procsnz[i];
3789       vals = buf;
3790       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3791       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3792     }
3793     /* the last proc */
3794     if (size != 1) {
3795       nz   = procsnz[i] - extra_rows;
3796       vals = buf;
3797       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3798       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3799       ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3800     }
3801     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3802   } else {
3803     /* receive numeric values */
3804     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3805 
3806     /* receive message of values*/
3807     vals   = buf;
3808     mycols = ibuf;
3809     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3810 
3811     /* insert into matrix */
3812     jj = rstart*bs;
3813     for (i=0; i<m; i++) {
3814       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3815       mycols += locrowlens[i];
3816       vals   += locrowlens[i];
3817       jj++;
3818     }
3819   }
3820   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3821   ierr = PetscFree(buf);CHKERRQ(ierr);
3822   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3823   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3824   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3825   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3826   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3827   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3828   PetscFunctionReturn(0);
3829 }
3830 
3831 #undef __FUNCT__
3832 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3833 /*@
3834    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3835 
3836    Input Parameters:
3837 .  mat  - the matrix
3838 .  fact - factor
3839 
3840    Not Collective, each process can use a different factor
3841 
3842    Level: advanced
3843 
3844   Notes:
3845    This can also be set by the command line option: -mat_use_hash_table <fact>
3846 
3847 .keywords: matrix, hashtable, factor, HT
3848 
3849 .seealso: MatSetOption()
3850 @*/
3851 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3852 {
3853   PetscErrorCode ierr;
3854 
3855   PetscFunctionBegin;
3856   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3857   PetscFunctionReturn(0);
3858 }
3859 
3860 #undef __FUNCT__
3861 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3862 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3863 {
3864   Mat_MPIBAIJ *baij;
3865 
3866   PetscFunctionBegin;
3867   baij          = (Mat_MPIBAIJ*)mat->data;
3868   baij->ht_fact = fact;
3869   PetscFunctionReturn(0);
3870 }
3871 
3872 #undef __FUNCT__
3873 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3874 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3875 {
3876   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
3877 
3878   PetscFunctionBegin;
3879   *Ad     = a->A;
3880   *Ao     = a->B;
3881   *colmap = a->garray;
3882   PetscFunctionReturn(0);
3883 }
3884 
3885 /*
3886     Special version for direct calls from Fortran (to eliminate two function call overheads
3887 */
3888 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3889 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3890 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3891 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3892 #endif
3893 
3894 #undef __FUNCT__
3895 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3896 /*@C
3897   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3898 
3899   Collective on Mat
3900 
3901   Input Parameters:
3902 + mat - the matrix
3903 . min - number of input rows
3904 . im - input rows
3905 . nin - number of input columns
3906 . in - input columns
3907 . v - numerical values input
3908 - addvin - INSERT_VALUES or ADD_VALUES
3909 
3910   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3911 
3912   Level: advanced
3913 
3914 .seealso:   MatSetValuesBlocked()
3915 @*/
3916 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3917 {
3918   /* convert input arguments to C version */
3919   Mat        mat  = *matin;
3920   PetscInt   m    = *min, n = *nin;
3921   InsertMode addv = *addvin;
3922 
3923   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3924   const MatScalar *value;
3925   MatScalar       *barray     = baij->barray;
3926   PetscBool       roworiented = baij->roworiented;
3927   PetscErrorCode  ierr;
3928   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3929   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3930   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3931 
3932   PetscFunctionBegin;
3933   /* tasks normally handled by MatSetValuesBlocked() */
3934   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3935 #if defined(PETSC_USE_DEBUG)
3936   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3937   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3938 #endif
3939   if (mat->assembled) {
3940     mat->was_assembled = PETSC_TRUE;
3941     mat->assembled     = PETSC_FALSE;
3942   }
3943   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3944 
3945 
3946   if (!barray) {
3947     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
3948     baij->barray = barray;
3949   }
3950 
3951   if (roworiented) stepval = (n-1)*bs;
3952   else stepval = (m-1)*bs;
3953 
3954   for (i=0; i<m; i++) {
3955     if (im[i] < 0) continue;
3956 #if defined(PETSC_USE_DEBUG)
3957     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3958 #endif
3959     if (im[i] >= rstart && im[i] < rend) {
3960       row = im[i] - rstart;
3961       for (j=0; j<n; j++) {
3962         /* If NumCol = 1 then a copy is not required */
3963         if ((roworiented) && (n == 1)) {
3964           barray = (MatScalar*)v + i*bs2;
3965         } else if ((!roworiented) && (m == 1)) {
3966           barray = (MatScalar*)v + j*bs2;
3967         } else { /* Here a copy is required */
3968           if (roworiented) {
3969             value = v + i*(stepval+bs)*bs + j*bs;
3970           } else {
3971             value = v + j*(stepval+bs)*bs + i*bs;
3972           }
3973           for (ii=0; ii<bs; ii++,value+=stepval) {
3974             for (jj=0; jj<bs; jj++) {
3975               *barray++ = *value++;
3976             }
3977           }
3978           barray -=bs2;
3979         }
3980 
3981         if (in[j] >= cstart && in[j] < cend) {
3982           col  = in[j] - cstart;
3983           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3984         } else if (in[j] < 0) continue;
3985 #if defined(PETSC_USE_DEBUG)
3986         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3987 #endif
3988         else {
3989           if (mat->was_assembled) {
3990             if (!baij->colmap) {
3991               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3992             }
3993 
3994 #if defined(PETSC_USE_DEBUG)
3995 #if defined(PETSC_USE_CTABLE)
3996             { PetscInt data;
3997               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3998               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3999             }
4000 #else
4001             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4002 #endif
4003 #endif
4004 #if defined(PETSC_USE_CTABLE)
4005             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4006             col  = (col - 1)/bs;
4007 #else
4008             col = (baij->colmap[in[j]] - 1)/bs;
4009 #endif
4010             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
4011               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
4012               col  =  in[j];
4013             }
4014           } else col = in[j];
4015           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4016         }
4017       }
4018     } else {
4019       if (!baij->donotstash) {
4020         if (roworiented) {
4021           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4022         } else {
4023           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4024         }
4025       }
4026     }
4027   }
4028 
4029   /* task normally handled by MatSetValuesBlocked() */
4030   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
4031   PetscFunctionReturn(0);
4032 }
4033 
4034 #undef __FUNCT__
4035 #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
4036 /*@
4037      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
4038          CSR format the local rows.
4039 
4040    Collective on MPI_Comm
4041 
4042    Input Parameters:
4043 +  comm - MPI communicator
4044 .  bs - the block size, only a block size of 1 is supported
4045 .  m - number of local rows (Cannot be PETSC_DECIDE)
4046 .  n - This value should be the same as the local size used in creating the
4047        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4048        calculated if N is given) For square matrices n is almost always m.
4049 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4050 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4051 .   i - row indices
4052 .   j - column indices
4053 -   a - matrix values
4054 
4055    Output Parameter:
4056 .   mat - the matrix
4057 
4058    Level: intermediate
4059 
4060    Notes:
4061        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4062      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4063      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4064 
4065        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4066 
4067 .keywords: matrix, aij, compressed row, sparse, parallel
4068 
4069 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4070           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4071 @*/
4072 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4073 {
4074   PetscErrorCode ierr;
4075 
4076   PetscFunctionBegin;
4077   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4078   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4079   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4080   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4081   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
4082   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
4083   PetscFunctionReturn(0);
4084 }
4085