xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision e366c154b69cf29c88be23f768f0f07dd2b3250c)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 #include <petscblaslapack.h>
4 
5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat);
7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec);
13 
14 #undef __FUNCT__
15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
17 {
18   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
19   PetscErrorCode ierr;
20   PetscInt       i,*idxb = 0;
21   PetscScalar    *va,*vb;
22   Vec            vtmp;
23 
24   PetscFunctionBegin;
25   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
26   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
27   if (idx) {
28     for (i=0; i<A->rmap->n; i++) {
29       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
30     }
31   }
32 
33   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
34   if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
35   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
36   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
37 
38   for (i=0; i<A->rmap->n; i++) {
39     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
40       va[i] = vb[i];
41       if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);
42     }
43   }
44 
45   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
46   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
47   ierr = PetscFree(idxb);CHKERRQ(ierr);
48   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
49   PetscFunctionReturn(0);
50 }
51 
52 #undef __FUNCT__
53 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
54 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
55 {
56   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
57   PetscErrorCode ierr;
58 
59   PetscFunctionBegin;
60   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
61   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
62   PetscFunctionReturn(0);
63 }
64 
65 #undef __FUNCT__
66 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
67 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
68 {
69   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
70   PetscErrorCode ierr;
71 
72   PetscFunctionBegin;
73   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
74   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
75   PetscFunctionReturn(0);
76 }
77 
78 /*
79      Local utility routine that creates a mapping from the global column
80    number to the local number in the off-diagonal part of the local
81    storage of the matrix.  This is done in a non scalable way since the
82    length of colmap equals the global matrix length.
83 */
84 #undef __FUNCT__
85 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private"
86 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
87 {
88   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
89   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
90   PetscErrorCode ierr;
91   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
92 
93   PetscFunctionBegin;
94 #if defined(PETSC_USE_CTABLE)
95   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
96   for (i=0; i<nbs; i++) {
97     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
98   }
99 #else
100   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
101   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
102   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
103   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
104 #endif
105   PetscFunctionReturn(0);
106 }
107 
108 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
109   { \
110  \
111     brow = row/bs;  \
112     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
113     rmax = aimax[brow]; nrow = ailen[brow]; \
114     bcol = col/bs; \
115     ridx = row % bs; cidx = col % bs; \
116     low  = 0; high = nrow; \
117     while (high-low > 3) { \
118       t = (low+high)/2; \
119       if (rp[t] > bcol) high = t; \
120       else              low  = t; \
121     } \
122     for (_i=low; _i<high; _i++) { \
123       if (rp[_i] > bcol) break; \
124       if (rp[_i] == bcol) { \
125         bap = ap +  bs2*_i + bs*cidx + ridx; \
126         if (addv == ADD_VALUES) *bap += value;  \
127         else                    *bap  = value;  \
128         goto a_noinsert; \
129       } \
130     } \
131     if (a->nonew == 1) goto a_noinsert; \
132     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
133     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
134     N = nrow++ - 1;  \
135     /* shift up all the later entries in this row */ \
136     for (ii=N; ii>=_i; ii--) { \
137       rp[ii+1] = rp[ii]; \
138       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
139     } \
140     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
141     rp[_i]                      = bcol;  \
142     ap[bs2*_i + bs*cidx + ridx] = value;  \
143 a_noinsert:; \
144     ailen[brow] = nrow; \
145   }
146 
147 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
148   { \
149     brow = row/bs;  \
150     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
151     rmax = bimax[brow]; nrow = bilen[brow]; \
152     bcol = col/bs; \
153     ridx = row % bs; cidx = col % bs; \
154     low  = 0; high = nrow; \
155     while (high-low > 3) { \
156       t = (low+high)/2; \
157       if (rp[t] > bcol) high = t; \
158       else              low  = t; \
159     } \
160     for (_i=low; _i<high; _i++) { \
161       if (rp[_i] > bcol) break; \
162       if (rp[_i] == bcol) { \
163         bap = ap +  bs2*_i + bs*cidx + ridx; \
164         if (addv == ADD_VALUES) *bap += value;  \
165         else                    *bap  = value;  \
166         goto b_noinsert; \
167       } \
168     } \
169     if (b->nonew == 1) goto b_noinsert; \
170     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
171     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
172     N = nrow++ - 1;  \
173     /* shift up all the later entries in this row */ \
174     for (ii=N; ii>=_i; ii--) { \
175       rp[ii+1] = rp[ii]; \
176       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
177     } \
178     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
179     rp[_i]                      = bcol;  \
180     ap[bs2*_i + bs*cidx + ridx] = value;  \
181 b_noinsert:; \
182     bilen[brow] = nrow; \
183   }
184 
185 #undef __FUNCT__
186 #define __FUNCT__ "MatSetValues_MPIBAIJ"
187 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
188 {
189   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
190   MatScalar      value;
191   PetscBool      roworiented = baij->roworiented;
192   PetscErrorCode ierr;
193   PetscInt       i,j,row,col;
194   PetscInt       rstart_orig=mat->rmap->rstart;
195   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
196   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
197 
198   /* Some Variables required in the macro */
199   Mat         A     = baij->A;
200   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
201   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
202   MatScalar   *aa   =a->a;
203 
204   Mat         B     = baij->B;
205   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
206   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
207   MatScalar   *ba   =b->a;
208 
209   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
210   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
211   MatScalar *ap,*bap;
212 
213   PetscFunctionBegin;
214   if (v) PetscValidScalarPointer(v,6);
215   for (i=0; i<m; i++) {
216     if (im[i] < 0) continue;
217 #if defined(PETSC_USE_DEBUG)
218     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
219 #endif
220     if (im[i] >= rstart_orig && im[i] < rend_orig) {
221       row = im[i] - rstart_orig;
222       for (j=0; j<n; j++) {
223         if (in[j] >= cstart_orig && in[j] < cend_orig) {
224           col = in[j] - cstart_orig;
225           if (roworiented) value = v[i*n+j];
226           else             value = v[i+j*m];
227           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
228           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
229         } else if (in[j] < 0) continue;
230 #if defined(PETSC_USE_DEBUG)
231         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
232 #endif
233         else {
234           if (mat->was_assembled) {
235             if (!baij->colmap) {
236               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
237             }
238 #if defined(PETSC_USE_CTABLE)
239             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
240             col  = col - 1;
241 #else
242             col = baij->colmap[in[j]/bs] - 1;
243 #endif
244             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
245               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
246               col  =  in[j];
247               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
248               B    = baij->B;
249               b    = (Mat_SeqBAIJ*)(B)->data;
250               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
251               ba   =b->a;
252             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
253             else col += in[j]%bs;
254           } else col = in[j];
255           if (roworiented) value = v[i*n+j];
256           else             value = v[i+j*m];
257           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
258           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
259         }
260       }
261     } else {
262       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
263       if (!baij->donotstash) {
264         mat->assembled = PETSC_FALSE;
265         if (roworiented) {
266           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
267         } else {
268           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
269         }
270       }
271     }
272   }
273   PetscFunctionReturn(0);
274 }
275 
276 #undef __FUNCT__
277 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
278 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
279 {
280   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
281   const PetscScalar *value;
282   MatScalar         *barray     = baij->barray;
283   PetscBool         roworiented = baij->roworiented;
284   PetscErrorCode    ierr;
285   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
286   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
287   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
288 
289   PetscFunctionBegin;
290   if (!barray) {
291     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
292     baij->barray = barray;
293   }
294 
295   if (roworiented) stepval = (n-1)*bs;
296   else stepval = (m-1)*bs;
297 
298   for (i=0; i<m; i++) {
299     if (im[i] < 0) continue;
300 #if defined(PETSC_USE_DEBUG)
301     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
302 #endif
303     if (im[i] >= rstart && im[i] < rend) {
304       row = im[i] - rstart;
305       for (j=0; j<n; j++) {
306         /* If NumCol = 1 then a copy is not required */
307         if ((roworiented) && (n == 1)) {
308           barray = (MatScalar*)v + i*bs2;
309         } else if ((!roworiented) && (m == 1)) {
310           barray = (MatScalar*)v + j*bs2;
311         } else { /* Here a copy is required */
312           if (roworiented) {
313             value = v + (i*(stepval+bs) + j)*bs;
314           } else {
315             value = v + (j*(stepval+bs) + i)*bs;
316           }
317           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
318             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
319             barray += bs;
320           }
321           barray -= bs2;
322         }
323 
324         if (in[j] >= cstart && in[j] < cend) {
325           col  = in[j] - cstart;
326           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
327         } else if (in[j] < 0) continue;
328 #if defined(PETSC_USE_DEBUG)
329         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
330 #endif
331         else {
332           if (mat->was_assembled) {
333             if (!baij->colmap) {
334               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
335             }
336 
337 #if defined(PETSC_USE_DEBUG)
338 #if defined(PETSC_USE_CTABLE)
339             { PetscInt data;
340               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
341               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
342             }
343 #else
344             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
345 #endif
346 #endif
347 #if defined(PETSC_USE_CTABLE)
348             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
349             col  = (col - 1)/bs;
350 #else
351             col = (baij->colmap[in[j]] - 1)/bs;
352 #endif
353             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
354               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
355               col  =  in[j];
356             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]);
357           } else col = in[j];
358           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
359         }
360       }
361     } else {
362       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
363       if (!baij->donotstash) {
364         if (roworiented) {
365           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
366         } else {
367           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
368         }
369       }
370     }
371   }
372   PetscFunctionReturn(0);
373 }
374 
375 #define HASH_KEY 0.6180339887
376 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
377 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
378 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
379 #undef __FUNCT__
380 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
381 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
382 {
383   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
384   PetscBool      roworiented = baij->roworiented;
385   PetscErrorCode ierr;
386   PetscInt       i,j,row,col;
387   PetscInt       rstart_orig=mat->rmap->rstart;
388   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
389   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
390   PetscReal      tmp;
391   MatScalar      **HD = baij->hd,value;
392 #if defined(PETSC_USE_DEBUG)
393   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
394 #endif
395 
396   PetscFunctionBegin;
397   if (v) PetscValidScalarPointer(v,6);
398   for (i=0; i<m; i++) {
399 #if defined(PETSC_USE_DEBUG)
400     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
401     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
402 #endif
403     row = im[i];
404     if (row >= rstart_orig && row < rend_orig) {
405       for (j=0; j<n; j++) {
406         col = in[j];
407         if (roworiented) value = v[i*n+j];
408         else             value = v[i+j*m];
409         /* Look up PetscInto the Hash Table */
410         key = (row/bs)*Nbs+(col/bs)+1;
411         h1  = HASH(size,key,tmp);
412 
413 
414         idx = h1;
415 #if defined(PETSC_USE_DEBUG)
416         insert_ct++;
417         total_ct++;
418         if (HT[idx] != key) {
419           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
420           if (idx == size) {
421             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
422             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
423           }
424         }
425 #else
426         if (HT[idx] != key) {
427           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
428           if (idx == size) {
429             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
430             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
431           }
432         }
433 #endif
434         /* A HASH table entry is found, so insert the values at the correct address */
435         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
436         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
437       }
438     } else if (!baij->donotstash) {
439       if (roworiented) {
440         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
441       } else {
442         ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
443       }
444     }
445   }
446 #if defined(PETSC_USE_DEBUG)
447   baij->ht_total_ct  = total_ct;
448   baij->ht_insert_ct = insert_ct;
449 #endif
450   PetscFunctionReturn(0);
451 }
452 
453 #undef __FUNCT__
454 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
455 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
456 {
457   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
458   PetscBool         roworiented = baij->roworiented;
459   PetscErrorCode    ierr;
460   PetscInt          i,j,ii,jj,row,col;
461   PetscInt          rstart=baij->rstartbs;
462   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
463   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
464   PetscReal         tmp;
465   MatScalar         **HD = baij->hd,*baij_a;
466   const PetscScalar *v_t,*value;
467 #if defined(PETSC_USE_DEBUG)
468   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
469 #endif
470 
471   PetscFunctionBegin;
472   if (roworiented) stepval = (n-1)*bs;
473   else stepval = (m-1)*bs;
474 
475   for (i=0; i<m; i++) {
476 #if defined(PETSC_USE_DEBUG)
477     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
478     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
479 #endif
480     row = im[i];
481     v_t = v + i*nbs2;
482     if (row >= rstart && row < rend) {
483       for (j=0; j<n; j++) {
484         col = in[j];
485 
486         /* Look up into the Hash Table */
487         key = row*Nbs+col+1;
488         h1  = HASH(size,key,tmp);
489 
490         idx = h1;
491 #if defined(PETSC_USE_DEBUG)
492         total_ct++;
493         insert_ct++;
494         if (HT[idx] != key) {
495           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
496           if (idx == size) {
497             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
498             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
499           }
500         }
501 #else
502         if (HT[idx] != key) {
503           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
504           if (idx == size) {
505             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
506             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
507           }
508         }
509 #endif
510         baij_a = HD[idx];
511         if (roworiented) {
512           /*value = v + i*(stepval+bs)*bs + j*bs;*/
513           /* value = v + (i*(stepval+bs)+j)*bs; */
514           value = v_t;
515           v_t  += bs;
516           if (addv == ADD_VALUES) {
517             for (ii=0; ii<bs; ii++,value+=stepval) {
518               for (jj=ii; jj<bs2; jj+=bs) {
519                 baij_a[jj] += *value++;
520               }
521             }
522           } else {
523             for (ii=0; ii<bs; ii++,value+=stepval) {
524               for (jj=ii; jj<bs2; jj+=bs) {
525                 baij_a[jj] = *value++;
526               }
527             }
528           }
529         } else {
530           value = v + j*(stepval+bs)*bs + i*bs;
531           if (addv == ADD_VALUES) {
532             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
533               for (jj=0; jj<bs; jj++) {
534                 baij_a[jj] += *value++;
535               }
536             }
537           } else {
538             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
539               for (jj=0; jj<bs; jj++) {
540                 baij_a[jj] = *value++;
541               }
542             }
543           }
544         }
545       }
546     } else {
547       if (!baij->donotstash) {
548         if (roworiented) {
549           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
550         } else {
551           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
552         }
553       }
554     }
555   }
556 #if defined(PETSC_USE_DEBUG)
557   baij->ht_total_ct  = total_ct;
558   baij->ht_insert_ct = insert_ct;
559 #endif
560   PetscFunctionReturn(0);
561 }
562 
563 #undef __FUNCT__
564 #define __FUNCT__ "MatGetValues_MPIBAIJ"
565 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
566 {
567   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
568   PetscErrorCode ierr;
569   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
570   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
571 
572   PetscFunctionBegin;
573   for (i=0; i<m; i++) {
574     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
575     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
576     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
577       row = idxm[i] - bsrstart;
578       for (j=0; j<n; j++) {
579         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
580         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
581         if (idxn[j] >= bscstart && idxn[j] < bscend) {
582           col  = idxn[j] - bscstart;
583           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
584         } else {
585           if (!baij->colmap) {
586             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
587           }
588 #if defined(PETSC_USE_CTABLE)
589           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
590           data--;
591 #else
592           data = baij->colmap[idxn[j]/bs]-1;
593 #endif
594           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
595           else {
596             col  = data + idxn[j]%bs;
597             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
598           }
599         }
600       }
601     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
602   }
603   PetscFunctionReturn(0);
604 }
605 
606 #undef __FUNCT__
607 #define __FUNCT__ "MatNorm_MPIBAIJ"
608 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
609 {
610   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
611   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
612   PetscErrorCode ierr;
613   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
614   PetscReal      sum = 0.0;
615   MatScalar      *v;
616 
617   PetscFunctionBegin;
618   if (baij->size == 1) {
619     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
620   } else {
621     if (type == NORM_FROBENIUS) {
622       v  = amat->a;
623       nz = amat->nz*bs2;
624       for (i=0; i<nz; i++) {
625         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
626       }
627       v  = bmat->a;
628       nz = bmat->nz*bs2;
629       for (i=0; i<nz; i++) {
630         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
631       }
632       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
633       *nrm = PetscSqrtReal(*nrm);
634     } else if (type == NORM_1) { /* max column sum */
635       PetscReal *tmp,*tmp2;
636       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
637       ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
638       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
639       v    = amat->a; jj = amat->j;
640       for (i=0; i<amat->nz; i++) {
641         for (j=0; j<bs; j++) {
642           col = bs*(cstart + *jj) + j; /* column index */
643           for (row=0; row<bs; row++) {
644             tmp[col] += PetscAbsScalar(*v);  v++;
645           }
646         }
647         jj++;
648       }
649       v = bmat->a; jj = bmat->j;
650       for (i=0; i<bmat->nz; i++) {
651         for (j=0; j<bs; j++) {
652           col = bs*garray[*jj] + j;
653           for (row=0; row<bs; row++) {
654             tmp[col] += PetscAbsScalar(*v); v++;
655           }
656         }
657         jj++;
658       }
659       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
660       *nrm = 0.0;
661       for (j=0; j<mat->cmap->N; j++) {
662         if (tmp2[j] > *nrm) *nrm = tmp2[j];
663       }
664       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
665     } else if (type == NORM_INFINITY) { /* max row sum */
666       PetscReal *sums;
667       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr);
668       sum  = 0.0;
669       for (j=0; j<amat->mbs; j++) {
670         for (row=0; row<bs; row++) sums[row] = 0.0;
671         v  = amat->a + bs2*amat->i[j];
672         nz = amat->i[j+1]-amat->i[j];
673         for (i=0; i<nz; i++) {
674           for (col=0; col<bs; col++) {
675             for (row=0; row<bs; row++) {
676               sums[row] += PetscAbsScalar(*v); v++;
677             }
678           }
679         }
680         v  = bmat->a + bs2*bmat->i[j];
681         nz = bmat->i[j+1]-bmat->i[j];
682         for (i=0; i<nz; i++) {
683           for (col=0; col<bs; col++) {
684             for (row=0; row<bs; row++) {
685               sums[row] += PetscAbsScalar(*v); v++;
686             }
687           }
688         }
689         for (row=0; row<bs; row++) {
690           if (sums[row] > sum) sum = sums[row];
691         }
692       }
693       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
694       ierr = PetscFree(sums);CHKERRQ(ierr);
695     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
696   }
697   PetscFunctionReturn(0);
698 }
699 
700 /*
701   Creates the hash table, and sets the table
702   This table is created only once.
703   If new entried need to be added to the matrix
704   then the hash table has to be destroyed and
705   recreated.
706 */
707 #undef __FUNCT__
708 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
709 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
710 {
711   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
712   Mat            A     = baij->A,B=baij->B;
713   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
714   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
715   PetscErrorCode ierr;
716   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
717   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
718   PetscInt       *HT,key;
719   MatScalar      **HD;
720   PetscReal      tmp;
721 #if defined(PETSC_USE_INFO)
722   PetscInt ct=0,max=0;
723 #endif
724 
725   PetscFunctionBegin;
726   if (baij->ht) PetscFunctionReturn(0);
727 
728   baij->ht_size = (PetscInt)(factor*nz);
729   ht_size       = baij->ht_size;
730 
731   /* Allocate Memory for Hash Table */
732   ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr);
733   ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr);
734   ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr);
735   HD   = baij->hd;
736   HT   = baij->ht;
737 
738   /* Loop Over A */
739   for (i=0; i<a->mbs; i++) {
740     for (j=ai[i]; j<ai[i+1]; j++) {
741       row = i+rstart;
742       col = aj[j]+cstart;
743 
744       key = row*Nbs + col + 1;
745       h1  = HASH(ht_size,key,tmp);
746       for (k=0; k<ht_size; k++) {
747         if (!HT[(h1+k)%ht_size]) {
748           HT[(h1+k)%ht_size] = key;
749           HD[(h1+k)%ht_size] = a->a + j*bs2;
750           break;
751 #if defined(PETSC_USE_INFO)
752         } else {
753           ct++;
754 #endif
755         }
756       }
757 #if defined(PETSC_USE_INFO)
758       if (k> max) max = k;
759 #endif
760     }
761   }
762   /* Loop Over B */
763   for (i=0; i<b->mbs; i++) {
764     for (j=bi[i]; j<bi[i+1]; j++) {
765       row = i+rstart;
766       col = garray[bj[j]];
767       key = row*Nbs + col + 1;
768       h1  = HASH(ht_size,key,tmp);
769       for (k=0; k<ht_size; k++) {
770         if (!HT[(h1+k)%ht_size]) {
771           HT[(h1+k)%ht_size] = key;
772           HD[(h1+k)%ht_size] = b->a + j*bs2;
773           break;
774 #if defined(PETSC_USE_INFO)
775         } else {
776           ct++;
777 #endif
778         }
779       }
780 #if defined(PETSC_USE_INFO)
781       if (k> max) max = k;
782 #endif
783     }
784   }
785 
786   /* Print Summary */
787 #if defined(PETSC_USE_INFO)
788   for (i=0,j=0; i<ht_size; i++) {
789     if (HT[i]) j++;
790   }
791   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
792 #endif
793   PetscFunctionReturn(0);
794 }
795 
796 #undef __FUNCT__
797 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
798 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
799 {
800   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
801   PetscErrorCode ierr;
802   PetscInt       nstash,reallocs;
803   InsertMode     addv;
804 
805   PetscFunctionBegin;
806   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
807 
808   /* make sure all processors are either in INSERTMODE or ADDMODE */
809   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
810   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
811   mat->insertmode = addv; /* in case this processor had no cache */
812 
813   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
814   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
815   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
816   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
817   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
818   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
819   PetscFunctionReturn(0);
820 }
821 
822 #undef __FUNCT__
823 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
824 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
825 {
826   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
827   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
828   PetscErrorCode ierr;
829   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
830   PetscInt       *row,*col;
831   PetscBool      r1,r2,r3,other_disassembled;
832   MatScalar      *val;
833   InsertMode     addv = mat->insertmode;
834   PetscMPIInt    n;
835 
836   PetscFunctionBegin;
837   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
838   if (!baij->donotstash && !mat->nooffprocentries) {
839     while (1) {
840       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
841       if (!flg) break;
842 
843       for (i=0; i<n;) {
844         /* Now identify the consecutive vals belonging to the same row */
845         for (j=i,rstart=row[j]; j<n; j++) {
846           if (row[j] != rstart) break;
847         }
848         if (j < n) ncols = j-i;
849         else       ncols = n-i;
850         /* Now assemble all these values with a single function call */
851         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
852         i    = j;
853       }
854     }
855     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
856     /* Now process the block-stash. Since the values are stashed column-oriented,
857        set the roworiented flag to column oriented, and after MatSetValues()
858        restore the original flags */
859     r1 = baij->roworiented;
860     r2 = a->roworiented;
861     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
862 
863     baij->roworiented = PETSC_FALSE;
864     a->roworiented    = PETSC_FALSE;
865 
866     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
867     while (1) {
868       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
869       if (!flg) break;
870 
871       for (i=0; i<n;) {
872         /* Now identify the consecutive vals belonging to the same row */
873         for (j=i,rstart=row[j]; j<n; j++) {
874           if (row[j] != rstart) break;
875         }
876         if (j < n) ncols = j-i;
877         else       ncols = n-i;
878         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
879         i    = j;
880       }
881     }
882     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
883 
884     baij->roworiented = r1;
885     a->roworiented    = r2;
886 
887     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
888   }
889 
890   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
891   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
892 
893   /* determine if any processor has disassembled, if so we must
894      also disassemble ourselfs, in order that we may reassemble. */
895   /*
896      if nonzero structure of submatrix B cannot change then we know that
897      no processor disassembled thus we can skip this stuff
898   */
899   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
900     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
901     if (mat->was_assembled && !other_disassembled) {
902       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
903     }
904   }
905 
906   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
907     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
908   }
909   ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
910   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
911   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
912 
913 #if defined(PETSC_USE_INFO)
914   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
915     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
916 
917     baij->ht_total_ct  = 0;
918     baij->ht_insert_ct = 0;
919   }
920 #endif
921   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
922     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
923 
924     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
925     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
926   }
927 
928   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
929 
930   baij->rowvalues = 0;
931   PetscFunctionReturn(0);
932 }
933 
934 #include <petscdraw.h>
935 #undef __FUNCT__
936 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
937 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
938 {
939   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
940   PetscErrorCode    ierr;
941   PetscMPIInt       size = baij->size,rank = baij->rank;
942   PetscInt          bs   = mat->rmap->bs;
943   PetscBool         iascii,isdraw;
944   PetscViewer       sviewer;
945   PetscViewerFormat format;
946 
947   PetscFunctionBegin;
948   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
949   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
950   if (iascii) {
951     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
952     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
953       MatInfo info;
954       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
955       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
956       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
957       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
958                                                 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
959       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
960       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
961       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
962       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
963       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
964       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
965       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
966       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
967       PetscFunctionReturn(0);
968     } else if (format == PETSC_VIEWER_ASCII_INFO) {
969       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
970       PetscFunctionReturn(0);
971     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
972       PetscFunctionReturn(0);
973     }
974   }
975 
976   if (isdraw) {
977     PetscDraw draw;
978     PetscBool isnull;
979     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
980     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
981   }
982 
983   if (size == 1) {
984     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
985     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
986   } else {
987     /* assemble the entire matrix onto first processor. */
988     Mat         A;
989     Mat_SeqBAIJ *Aloc;
990     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
991     MatScalar   *a;
992 
993     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
994     /* Perhaps this should be the type of mat? */
995     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
996     if (!rank) {
997       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
998     } else {
999       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1000     }
1001     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1002     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
1003     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1004     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1005 
1006     /* copy over the A part */
1007     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1008     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1009     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1010 
1011     for (i=0; i<mbs; i++) {
1012       rvals[0] = bs*(baij->rstartbs + i);
1013       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1014       for (j=ai[i]; j<ai[i+1]; j++) {
1015         col = (baij->cstartbs+aj[j])*bs;
1016         for (k=0; k<bs; k++) {
1017           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1018           col++; a += bs;
1019         }
1020       }
1021     }
1022     /* copy over the B part */
1023     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1024     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1025     for (i=0; i<mbs; i++) {
1026       rvals[0] = bs*(baij->rstartbs + i);
1027       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1028       for (j=ai[i]; j<ai[i+1]; j++) {
1029         col = baij->garray[aj[j]]*bs;
1030         for (k=0; k<bs; k++) {
1031           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1032           col++; a += bs;
1033         }
1034       }
1035     }
1036     ierr = PetscFree(rvals);CHKERRQ(ierr);
1037     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1038     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1039     /*
1040        Everyone has to call to draw the matrix since the graphics waits are
1041        synchronized across all processors that share the PetscDraw object
1042     */
1043     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1044     if (!rank) {
1045       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1046       /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/
1047       PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ);
1048       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1049     }
1050     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1051     ierr = MatDestroy(&A);CHKERRQ(ierr);
1052   }
1053   PetscFunctionReturn(0);
1054 }
1055 
1056 #undef __FUNCT__
1057 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1058 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1059 {
1060   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1061   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)a->A->data;
1062   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
1063   PetscErrorCode ierr;
1064   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1065   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1066   int            fd;
1067   PetscScalar    *column_values;
1068   FILE           *file;
1069   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1070   PetscInt       message_count,flowcontrolcount;
1071 
1072   PetscFunctionBegin;
1073   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1074   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1075   nz   = bs2*(A->nz + B->nz);
1076   rlen = mat->rmap->n;
1077   if (!rank) {
1078     header[0] = MAT_FILE_CLASSID;
1079     header[1] = mat->rmap->N;
1080     header[2] = mat->cmap->N;
1081 
1082     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1083     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1084     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1085     /* get largest number of rows any processor has */
1086     range = mat->rmap->range;
1087     for (i=1; i<size; i++) {
1088       rlen = PetscMax(rlen,range[i+1] - range[i]);
1089     }
1090   } else {
1091     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1092   }
1093 
1094   ierr = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr);
1095   /* compute lengths of each row  */
1096   for (i=0; i<a->mbs; i++) {
1097     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1098   }
1099   /* store the row lengths to the file */
1100   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1101   if (!rank) {
1102     MPI_Status status;
1103     ierr = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr);
1104     rlen = (range[1] - range[0])/bs;
1105     for (i=0; i<rlen; i++) {
1106       for (j=0; j<bs; j++) {
1107         row_lens[i*bs+j] = bs*crow_lens[i];
1108       }
1109     }
1110     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1111     for (i=1; i<size; i++) {
1112       rlen = (range[i+1] - range[i])/bs;
1113       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1114       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1115       for (k=0; k<rlen; k++) {
1116         for (j=0; j<bs; j++) {
1117           row_lens[k*bs+j] = bs*crow_lens[k];
1118         }
1119       }
1120       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1121     }
1122     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1123     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1124   } else {
1125     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1126     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1127     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1128   }
1129   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1130 
1131   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1132      information needed to make it for each row from a block row. This does require more communication but still not more than
1133      the communication needed for the nonzero values  */
1134   nzmax = nz; /*  space a largest processor needs */
1135   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1136   ierr  = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1137   cnt   = 0;
1138   for (i=0; i<a->mbs; i++) {
1139     pcnt = cnt;
1140     for (j=B->i[i]; j<B->i[i+1]; j++) {
1141       if ((col = garray[B->j[j]]) > cstart) break;
1142       for (l=0; l<bs; l++) {
1143         column_indices[cnt++] = bs*col+l;
1144       }
1145     }
1146     for (k=A->i[i]; k<A->i[i+1]; k++) {
1147       for (l=0; l<bs; l++) {
1148         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1149       }
1150     }
1151     for (; j<B->i[i+1]; j++) {
1152       for (l=0; l<bs; l++) {
1153         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1154       }
1155     }
1156     len = cnt - pcnt;
1157     for (k=1; k<bs; k++) {
1158       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1159       cnt += len;
1160     }
1161   }
1162   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1163 
1164   /* store the columns to the file */
1165   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1166   if (!rank) {
1167     MPI_Status status;
1168     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1169     for (i=1; i<size; i++) {
1170       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1171       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1172       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1173       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1174     }
1175     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1176   } else {
1177     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1178     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1179     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1180     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1181   }
1182   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1183 
1184   /* load up the numerical values */
1185   ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1186   cnt  = 0;
1187   for (i=0; i<a->mbs; i++) {
1188     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1189     for (j=B->i[i]; j<B->i[i+1]; j++) {
1190       if (garray[B->j[j]] > cstart) break;
1191       for (l=0; l<bs; l++) {
1192         for (ll=0; ll<bs; ll++) {
1193           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1194         }
1195       }
1196       cnt += bs;
1197     }
1198     for (k=A->i[i]; k<A->i[i+1]; k++) {
1199       for (l=0; l<bs; l++) {
1200         for (ll=0; ll<bs; ll++) {
1201           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1202         }
1203       }
1204       cnt += bs;
1205     }
1206     for (; j<B->i[i+1]; j++) {
1207       for (l=0; l<bs; l++) {
1208         for (ll=0; ll<bs; ll++) {
1209           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1210         }
1211       }
1212       cnt += bs;
1213     }
1214     cnt += (bs-1)*rlen;
1215   }
1216   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1217 
1218   /* store the column values to the file */
1219   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1220   if (!rank) {
1221     MPI_Status status;
1222     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1223     for (i=1; i<size; i++) {
1224       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1225       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1226       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1227       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1228     }
1229     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1230   } else {
1231     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1232     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1233     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1234     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1235   }
1236   ierr = PetscFree(column_values);CHKERRQ(ierr);
1237 
1238   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1239   if (file) {
1240     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1241   }
1242   PetscFunctionReturn(0);
1243 }
1244 
1245 #undef __FUNCT__
1246 #define __FUNCT__ "MatView_MPIBAIJ"
1247 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1248 {
1249   PetscErrorCode ierr;
1250   PetscBool      iascii,isdraw,issocket,isbinary;
1251 
1252   PetscFunctionBegin;
1253   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1254   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1255   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1256   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1257   if (iascii || isdraw || issocket) {
1258     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1259   } else if (isbinary) {
1260     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1261   }
1262   PetscFunctionReturn(0);
1263 }
1264 
1265 #undef __FUNCT__
1266 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1267 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1268 {
1269   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1270   PetscErrorCode ierr;
1271 
1272   PetscFunctionBegin;
1273 #if defined(PETSC_USE_LOG)
1274   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1275 #endif
1276   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1277   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1278   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1279   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1280 #if defined(PETSC_USE_CTABLE)
1281   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1282 #else
1283   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1284 #endif
1285   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1286   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1287   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1288   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1289   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1290   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1291   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1292   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1293 
1294   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1295   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1296   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr);
1304   PetscFunctionReturn(0);
1305 }
1306 
1307 #undef __FUNCT__
1308 #define __FUNCT__ "MatMult_MPIBAIJ"
1309 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1310 {
1311   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1312   PetscErrorCode ierr;
1313   PetscInt       nt;
1314 
1315   PetscFunctionBegin;
1316   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1317   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1318   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1319   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1320   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1321   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1322   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1323   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1324   PetscFunctionReturn(0);
1325 }
1326 
1327 #undef __FUNCT__
1328 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1329 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1330 {
1331   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1332   PetscErrorCode ierr;
1333 
1334   PetscFunctionBegin;
1335   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1336   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1337   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1338   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1339   PetscFunctionReturn(0);
1340 }
1341 
1342 #undef __FUNCT__
1343 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1344 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1345 {
1346   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1347   PetscErrorCode ierr;
1348   PetscBool      merged;
1349 
1350   PetscFunctionBegin;
1351   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1352   /* do nondiagonal part */
1353   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1354   if (!merged) {
1355     /* send it on its way */
1356     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1357     /* do local part */
1358     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1359     /* receive remote parts: note this assumes the values are not actually */
1360     /* inserted in yy until the next line */
1361     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1362   } else {
1363     /* do local part */
1364     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1365     /* send it on its way */
1366     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1367     /* values actually were received in the Begin() but we need to call this nop */
1368     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1369   }
1370   PetscFunctionReturn(0);
1371 }
1372 
1373 #undef __FUNCT__
1374 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1375 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1376 {
1377   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1378   PetscErrorCode ierr;
1379 
1380   PetscFunctionBegin;
1381   /* do nondiagonal part */
1382   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1383   /* send it on its way */
1384   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1385   /* do local part */
1386   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1387   /* receive remote parts: note this assumes the values are not actually */
1388   /* inserted in yy until the next line, which is true for my implementation*/
1389   /* but is not perhaps always true. */
1390   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1391   PetscFunctionReturn(0);
1392 }
1393 
1394 /*
1395   This only works correctly for square matrices where the subblock A->A is the
1396    diagonal block
1397 */
1398 #undef __FUNCT__
1399 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1400 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1401 {
1402   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1403   PetscErrorCode ierr;
1404 
1405   PetscFunctionBegin;
1406   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1407   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1408   PetscFunctionReturn(0);
1409 }
1410 
1411 #undef __FUNCT__
1412 #define __FUNCT__ "MatScale_MPIBAIJ"
1413 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1414 {
1415   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1416   PetscErrorCode ierr;
1417 
1418   PetscFunctionBegin;
1419   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1420   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1421   PetscFunctionReturn(0);
1422 }
1423 
1424 #undef __FUNCT__
1425 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1426 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1427 {
1428   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1429   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1430   PetscErrorCode ierr;
1431   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1432   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1433   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1434 
1435   PetscFunctionBegin;
1436   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1437   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1438   mat->getrowactive = PETSC_TRUE;
1439 
1440   if (!mat->rowvalues && (idx || v)) {
1441     /*
1442         allocate enough space to hold information from the longest row.
1443     */
1444     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1445     PetscInt    max = 1,mbs = mat->mbs,tmp;
1446     for (i=0; i<mbs; i++) {
1447       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1448       if (max < tmp) max = tmp;
1449     }
1450     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1451   }
1452   lrow = row - brstart;
1453 
1454   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1455   if (!v)   {pvA = 0; pvB = 0;}
1456   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1457   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1458   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1459   nztot = nzA + nzB;
1460 
1461   cmap = mat->garray;
1462   if (v  || idx) {
1463     if (nztot) {
1464       /* Sort by increasing column numbers, assuming A and B already sorted */
1465       PetscInt imark = -1;
1466       if (v) {
1467         *v = v_p = mat->rowvalues;
1468         for (i=0; i<nzB; i++) {
1469           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1470           else break;
1471         }
1472         imark = i;
1473         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1474         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1475       }
1476       if (idx) {
1477         *idx = idx_p = mat->rowindices;
1478         if (imark > -1) {
1479           for (i=0; i<imark; i++) {
1480             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1481           }
1482         } else {
1483           for (i=0; i<nzB; i++) {
1484             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1485             else break;
1486           }
1487           imark = i;
1488         }
1489         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1490         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1491       }
1492     } else {
1493       if (idx) *idx = 0;
1494       if (v)   *v   = 0;
1495     }
1496   }
1497   *nz  = nztot;
1498   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1499   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1500   PetscFunctionReturn(0);
1501 }
1502 
1503 #undef __FUNCT__
1504 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1505 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1506 {
1507   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1508 
1509   PetscFunctionBegin;
1510   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1511   baij->getrowactive = PETSC_FALSE;
1512   PetscFunctionReturn(0);
1513 }
1514 
1515 #undef __FUNCT__
1516 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1517 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1518 {
1519   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1520   PetscErrorCode ierr;
1521 
1522   PetscFunctionBegin;
1523   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1524   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1525   PetscFunctionReturn(0);
1526 }
1527 
1528 #undef __FUNCT__
1529 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1530 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1531 {
1532   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1533   Mat            A  = a->A,B = a->B;
1534   PetscErrorCode ierr;
1535   PetscReal      isend[5],irecv[5];
1536 
1537   PetscFunctionBegin;
1538   info->block_size = (PetscReal)matin->rmap->bs;
1539 
1540   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1541 
1542   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1543   isend[3] = info->memory;  isend[4] = info->mallocs;
1544 
1545   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1546 
1547   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1548   isend[3] += info->memory;  isend[4] += info->mallocs;
1549 
1550   if (flag == MAT_LOCAL) {
1551     info->nz_used      = isend[0];
1552     info->nz_allocated = isend[1];
1553     info->nz_unneeded  = isend[2];
1554     info->memory       = isend[3];
1555     info->mallocs      = isend[4];
1556   } else if (flag == MAT_GLOBAL_MAX) {
1557     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1558 
1559     info->nz_used      = irecv[0];
1560     info->nz_allocated = irecv[1];
1561     info->nz_unneeded  = irecv[2];
1562     info->memory       = irecv[3];
1563     info->mallocs      = irecv[4];
1564   } else if (flag == MAT_GLOBAL_SUM) {
1565     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1566 
1567     info->nz_used      = irecv[0];
1568     info->nz_allocated = irecv[1];
1569     info->nz_unneeded  = irecv[2];
1570     info->memory       = irecv[3];
1571     info->mallocs      = irecv[4];
1572   } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1573   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1574   info->fill_ratio_needed = 0;
1575   info->factor_mallocs    = 0;
1576   PetscFunctionReturn(0);
1577 }
1578 
1579 #undef __FUNCT__
1580 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1581 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1582 {
1583   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1584   PetscErrorCode ierr;
1585 
1586   PetscFunctionBegin;
1587   switch (op) {
1588   case MAT_NEW_NONZERO_LOCATIONS:
1589   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1590   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1591   case MAT_KEEP_NONZERO_PATTERN:
1592   case MAT_NEW_NONZERO_LOCATION_ERR:
1593     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1594     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1595     break;
1596   case MAT_ROW_ORIENTED:
1597     a->roworiented = flg;
1598 
1599     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1600     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1601     break;
1602   case MAT_NEW_DIAGONALS:
1603     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1604     break;
1605   case MAT_IGNORE_OFF_PROC_ENTRIES:
1606     a->donotstash = flg;
1607     break;
1608   case MAT_USE_HASH_TABLE:
1609     a->ht_flag = flg;
1610     break;
1611   case MAT_SYMMETRIC:
1612   case MAT_STRUCTURALLY_SYMMETRIC:
1613   case MAT_HERMITIAN:
1614   case MAT_SYMMETRY_ETERNAL:
1615     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1616     break;
1617   default:
1618     SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1619   }
1620   PetscFunctionReturn(0);
1621 }
1622 
1623 #undef __FUNCT__
1624 #define __FUNCT__ "MatTranspose_MPIBAIJ"
1625 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1626 {
1627   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1628   Mat_SeqBAIJ    *Aloc;
1629   Mat            B;
1630   PetscErrorCode ierr;
1631   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1632   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1633   MatScalar      *a;
1634 
1635   PetscFunctionBegin;
1636   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1637   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1638     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1639     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1640     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1641     /* Do not know preallocation information, but must set block size */
1642     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr);
1643   } else {
1644     B = *matout;
1645   }
1646 
1647   /* copy over the A part */
1648   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1649   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1650   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1651 
1652   for (i=0; i<mbs; i++) {
1653     rvals[0] = bs*(baij->rstartbs + i);
1654     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1655     for (j=ai[i]; j<ai[i+1]; j++) {
1656       col = (baij->cstartbs+aj[j])*bs;
1657       for (k=0; k<bs; k++) {
1658         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1659 
1660         col++; a += bs;
1661       }
1662     }
1663   }
1664   /* copy over the B part */
1665   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1666   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1667   for (i=0; i<mbs; i++) {
1668     rvals[0] = bs*(baij->rstartbs + i);
1669     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1670     for (j=ai[i]; j<ai[i+1]; j++) {
1671       col = baij->garray[aj[j]]*bs;
1672       for (k=0; k<bs; k++) {
1673         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1674         col++;
1675         a += bs;
1676       }
1677     }
1678   }
1679   ierr = PetscFree(rvals);CHKERRQ(ierr);
1680   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1681   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1682 
1683   if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B;
1684   else {
1685     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1686   }
1687   PetscFunctionReturn(0);
1688 }
1689 
1690 #undef __FUNCT__
1691 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1692 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1693 {
1694   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1695   Mat            a     = baij->A,b = baij->B;
1696   PetscErrorCode ierr;
1697   PetscInt       s1,s2,s3;
1698 
1699   PetscFunctionBegin;
1700   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1701   if (rr) {
1702     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1703     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1704     /* Overlap communication with computation. */
1705     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1706   }
1707   if (ll) {
1708     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1709     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1710     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1711   }
1712   /* scale  the diagonal block */
1713   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1714 
1715   if (rr) {
1716     /* Do a scatter end and then right scale the off-diagonal block */
1717     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1718     ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1719   }
1720   PetscFunctionReturn(0);
1721 }
1722 
1723 #undef __FUNCT__
1724 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1725 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1726 {
1727   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1728   PetscErrorCode    ierr;
1729   PetscMPIInt       imdex,size = l->size,n,rank = l->rank;
1730   PetscInt          i,*owners = A->rmap->range;
1731   PetscInt          *nprocs,j,idx,nsends,row;
1732   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
1733   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1734   PetscInt          *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1735   MPI_Comm          comm;
1736   MPI_Request       *send_waits,*recv_waits;
1737   MPI_Status        recv_status,*send_status;
1738   const PetscScalar *xx;
1739   PetscScalar       *bb;
1740 #if defined(PETSC_DEBUG)
1741   PetscBool         found = PETSC_FALSE;
1742 #endif
1743 
1744   PetscFunctionBegin;
1745   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1746   /*  first count number of contributors to each processor */
1747   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1748   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1749   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr);  /* see note*/
1750   j    = 0;
1751   for (i=0; i<N; i++) {
1752     if (lastidx > (idx = rows[i])) j = 0;
1753     lastidx = idx;
1754     for (; j<size; j++) {
1755       if (idx >= owners[j] && idx < owners[j+1]) {
1756         nprocs[2*j]++;
1757         nprocs[2*j+1] = 1;
1758         owner[i]      = j;
1759 #if defined(PETSC_DEBUG)
1760         found = PETSC_TRUE;
1761 #endif
1762         break;
1763       }
1764     }
1765 #if defined(PETSC_DEBUG)
1766     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1767     found = PETSC_FALSE;
1768 #endif
1769   }
1770   nsends = 0;  for (i=0; i<size; i++) nsends += nprocs[2*i+1];
1771 
1772   if (A->nooffproczerorows) {
1773     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
1774     nrecvs = nsends;
1775     nmax   = N;
1776   } else {
1777     /* inform other processors of number of messages and max length*/
1778     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1779   }
1780 
1781   /* post receives:   */
1782   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1783   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1784   for (i=0; i<nrecvs; i++) {
1785     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1786   }
1787 
1788   /* do sends:
1789      1) starts[i] gives the starting index in svalues for stuff going to
1790      the ith processor
1791   */
1792   ierr      = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1793   ierr      = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1794   ierr      = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1795   starts[0] = 0;
1796   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
1797   for (i=0; i<N; i++) {
1798     svalues[starts[owner[i]]++] = rows[i];
1799   }
1800 
1801   starts[0] = 0;
1802   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
1803   count = 0;
1804   for (i=0; i<size; i++) {
1805     if (nprocs[2*i+1]) {
1806       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1807     }
1808   }
1809   ierr = PetscFree(starts);CHKERRQ(ierr);
1810 
1811   base = owners[rank];
1812 
1813   /*  wait on receives */
1814   ierr  = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr);
1815   count = nrecvs;
1816   slen  = 0;
1817   while (count) {
1818     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1819     /* unpack receives into our local space */
1820     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1821 
1822     source[imdex] = recv_status.MPI_SOURCE;
1823     lens[imdex]   = n;
1824     slen         += n;
1825     count--;
1826   }
1827   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1828 
1829   /* move the data into the send scatter */
1830   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1831   count = 0;
1832   for (i=0; i<nrecvs; i++) {
1833     values = rvalues + i*nmax;
1834     for (j=0; j<lens[i]; j++) {
1835       lrows[count++] = values[j] - base;
1836     }
1837   }
1838   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1839   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1840   ierr = PetscFree(owner);CHKERRQ(ierr);
1841   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1842 
1843   /* fix right hand side if needed */
1844   if (x && b) {
1845     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1846     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1847     for (i=0; i<slen; i++) {
1848       bb[lrows[i]] = diag*xx[lrows[i]];
1849     }
1850     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1851     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1852   }
1853 
1854   /* actually zap the local rows */
1855   /*
1856         Zero the required rows. If the "diagonal block" of the matrix
1857      is square and the user wishes to set the diagonal we use separate
1858      code so that MatSetValues() is not called for each diagonal allocating
1859      new memory, thus calling lots of mallocs and slowing things down.
1860 
1861   */
1862   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1863   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1864   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1865     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
1866   } else if (diag != 0.0) {
1867     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1868     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1869        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1870     for (i=0; i<slen; i++) {
1871       row  = lrows[i] + rstart_bs;
1872       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1873     }
1874     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1875     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1876   } else {
1877     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1878   }
1879 
1880   ierr = PetscFree(lrows);CHKERRQ(ierr);
1881 
1882   /* wait on sends */
1883   if (nsends) {
1884     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1885     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1886     ierr = PetscFree(send_status);CHKERRQ(ierr);
1887   }
1888   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1889   ierr = PetscFree(svalues);CHKERRQ(ierr);
1890   PetscFunctionReturn(0);
1891 }
1892 
1893 #undef __FUNCT__
1894 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1895 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1896 {
1897   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1898   PetscErrorCode ierr;
1899 
1900   PetscFunctionBegin;
1901   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1902   PetscFunctionReturn(0);
1903 }
1904 
1905 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
1906 
1907 #undef __FUNCT__
1908 #define __FUNCT__ "MatEqual_MPIBAIJ"
1909 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1910 {
1911   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1912   Mat            a,b,c,d;
1913   PetscBool      flg;
1914   PetscErrorCode ierr;
1915 
1916   PetscFunctionBegin;
1917   a = matA->A; b = matA->B;
1918   c = matB->A; d = matB->B;
1919 
1920   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1921   if (flg) {
1922     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1923   }
1924   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1925   PetscFunctionReturn(0);
1926 }
1927 
1928 #undef __FUNCT__
1929 #define __FUNCT__ "MatCopy_MPIBAIJ"
1930 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1931 {
1932   PetscErrorCode ierr;
1933   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1934   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
1935 
1936   PetscFunctionBegin;
1937   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1938   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1939     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1940   } else {
1941     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1942     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1943   }
1944   PetscFunctionReturn(0);
1945 }
1946 
1947 #undef __FUNCT__
1948 #define __FUNCT__ "MatSetUp_MPIBAIJ"
1949 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1950 {
1951   PetscErrorCode ierr;
1952 
1953   PetscFunctionBegin;
1954   ierr =  MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1955   PetscFunctionReturn(0);
1956 }
1957 
1958 #undef __FUNCT__
1959 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1960 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1961 {
1962   PetscErrorCode ierr;
1963   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
1964   PetscBLASInt   bnz,one=1;
1965   Mat_SeqBAIJ    *x,*y;
1966 
1967   PetscFunctionBegin;
1968   if (str == SAME_NONZERO_PATTERN) {
1969     PetscScalar alpha = a;
1970     x    = (Mat_SeqBAIJ*)xx->A->data;
1971     y    = (Mat_SeqBAIJ*)yy->A->data;
1972     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
1973     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1974     x    = (Mat_SeqBAIJ*)xx->B->data;
1975     y    = (Mat_SeqBAIJ*)yy->B->data;
1976     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
1977     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1978   } else {
1979     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1980   }
1981   PetscFunctionReturn(0);
1982 }
1983 
1984 #undef __FUNCT__
1985 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1986 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1987 {
1988   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1989   PetscErrorCode ierr;
1990 
1991   PetscFunctionBegin;
1992   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1993   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1994   PetscFunctionReturn(0);
1995 }
1996 
1997 #undef __FUNCT__
1998 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
1999 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2000 {
2001   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2002   PetscErrorCode ierr;
2003 
2004   PetscFunctionBegin;
2005   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2006   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2007   PetscFunctionReturn(0);
2008 }
2009 
2010 #undef __FUNCT__
2011 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
2012 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2013 {
2014   PetscErrorCode ierr;
2015   IS             iscol_local;
2016   PetscInt       csize;
2017 
2018   PetscFunctionBegin;
2019   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2020   if (call == MAT_REUSE_MATRIX) {
2021     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2022     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2023   } else {
2024     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2025   }
2026   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2027   if (call == MAT_INITIAL_MATRIX) {
2028     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2029     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2030   }
2031   PetscFunctionReturn(0);
2032 }
2033 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*);
2034 #undef __FUNCT__
2035 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
2036 /*
2037   Not great since it makes two copies of the submatrix, first an SeqBAIJ
2038   in local and then by concatenating the local matrices the end result.
2039   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
2040 */
2041 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2042 {
2043   PetscErrorCode ierr;
2044   PetscMPIInt    rank,size;
2045   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2046   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow;
2047   Mat            M,Mreuse;
2048   MatScalar      *vwork,*aa;
2049   MPI_Comm       comm;
2050   IS             isrow_new, iscol_new;
2051   PetscBool      idflag,allrows, allcols;
2052   Mat_SeqBAIJ    *aij;
2053 
2054   PetscFunctionBegin;
2055   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2056   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2057   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2058   /* The compression and expansion should be avoided. Doesn't point
2059      out errors, might change the indices, hence buggey */
2060   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
2061   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
2062 
2063   /* Check for special case: each processor gets entire matrix columns */
2064   ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr);
2065   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
2066   if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE;
2067   else allcols = PETSC_FALSE;
2068 
2069   ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr);
2070   ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr);
2071   if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE;
2072   else allrows = PETSC_FALSE;
2073 
2074   if (call ==  MAT_REUSE_MATRIX) {
2075     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
2076     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2077     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2078   } else {
2079     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2080   }
2081   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
2082   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
2083   /*
2084       m - number of local rows
2085       n - number of columns (same on all processors)
2086       rstart - first row in new global matrix generated
2087   */
2088   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2089   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2090   m    = m/bs;
2091   n    = n/bs;
2092 
2093   if (call == MAT_INITIAL_MATRIX) {
2094     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2095     ii  = aij->i;
2096     jj  = aij->j;
2097 
2098     /*
2099         Determine the number of non-zeros in the diagonal and off-diagonal
2100         portions of the matrix in order to do correct preallocation
2101     */
2102 
2103     /* first get start and end of "diagonal" columns */
2104     if (csize == PETSC_DECIDE) {
2105       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2106       if (mglobal == n*bs) { /* square matrix */
2107         nlocal = m;
2108       } else {
2109         nlocal = n/size + ((n % size) > rank);
2110       }
2111     } else {
2112       nlocal = csize/bs;
2113     }
2114     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2115     rstart = rend - nlocal;
2116     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2117 
2118     /* next, compute all the lengths */
2119     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2120     olens = dlens + m;
2121     for (i=0; i<m; i++) {
2122       jend = ii[i+1] - ii[i];
2123       olen = 0;
2124       dlen = 0;
2125       for (j=0; j<jend; j++) {
2126         if (*jj < rstart || *jj >= rend) olen++;
2127         else dlen++;
2128         jj++;
2129       }
2130       olens[i] = olen;
2131       dlens[i] = dlen;
2132     }
2133     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2134     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2135     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2136     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2137     ierr = PetscFree(dlens);CHKERRQ(ierr);
2138   } else {
2139     PetscInt ml,nl;
2140 
2141     M    = *newmat;
2142     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2143     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2144     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2145     /*
2146          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2147        rather than the slower MatSetValues().
2148     */
2149     M->was_assembled = PETSC_TRUE;
2150     M->assembled     = PETSC_FALSE;
2151   }
2152   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2153   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2154   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
2155   ii   = aij->i;
2156   jj   = aij->j;
2157   aa   = aij->a;
2158   for (i=0; i<m; i++) {
2159     row   = rstart/bs + i;
2160     nz    = ii[i+1] - ii[i];
2161     cwork = jj;     jj += nz;
2162     vwork = aa;     aa += nz*bs*bs;
2163     ierr  = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2164   }
2165 
2166   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2167   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2168   *newmat = M;
2169 
2170   /* save submatrix used in processor for next request */
2171   if (call ==  MAT_INITIAL_MATRIX) {
2172     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2173     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2174   }
2175   PetscFunctionReturn(0);
2176 }
2177 
2178 #undef __FUNCT__
2179 #define __FUNCT__ "MatPermute_MPIBAIJ"
2180 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2181 {
2182   MPI_Comm       comm,pcomm;
2183   PetscInt       first,rlocal_size,clocal_size,nrows;
2184   const PetscInt *rows;
2185   PetscMPIInt    size;
2186   IS             crowp,growp,irowp,lrowp,lcolp;
2187   PetscErrorCode ierr;
2188 
2189   PetscFunctionBegin;
2190   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2191   /* make a collective version of 'rowp' */
2192   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2193   if (pcomm==comm) {
2194     crowp = rowp;
2195   } else {
2196     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2197     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2198     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2199     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2200   }
2201   /* collect the global row permutation and invert it */
2202   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
2203   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
2204   if (pcomm!=comm) {
2205     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2206   }
2207   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
2208   ierr = ISDestroy(&growp);CHKERRQ(ierr);
2209   /* get the local target indices */
2210   ierr = MatGetOwnershipRange(A,&first,NULL);CHKERRQ(ierr);
2211   ierr = MatGetLocalSize(A,&rlocal_size,&clocal_size);CHKERRQ(ierr);
2212   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
2213   ierr = ISCreateGeneral(MPI_COMM_SELF,rlocal_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
2214   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
2215   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
2216   /* the column permutation is so much easier;
2217      make a local version of 'colp' and invert it */
2218   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2219   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2220   if (size==1) {
2221     lcolp = colp;
2222   } else {
2223     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2224   }
2225   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2226   /* now we just get the submatrix */
2227   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2228   if (size>1) {
2229     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2230   }
2231   /* clean up */
2232   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
2233   PetscFunctionReturn(0);
2234 }
2235 
2236 #undef __FUNCT__
2237 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2238 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2239 {
2240   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2241   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
2242 
2243   PetscFunctionBegin;
2244   if (nghosts) *nghosts = B->nbs;
2245   if (ghosts) *ghosts = baij->garray;
2246   PetscFunctionReturn(0);
2247 }
2248 
2249 extern PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat);
2250 
2251 #undef __FUNCT__
2252 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ"
2253 /*
2254     This routine is almost identical to MatFDColoringCreate_MPIBAIJ()!
2255 */
2256 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
2257 {
2258   Mat_MPIBAIJ            *baij = (Mat_MPIBAIJ*)mat->data;
2259   PetscErrorCode         ierr;
2260   PetscMPIInt            size,*ncolsonproc,*disp,nn;
2261   PetscInt               bs,i,n,nrows,j,k,m,ncols,col;
2262   const PetscInt         *is,*rows = 0,*A_ci,*A_cj,*B_ci,*B_cj,*ltog;
2263   PetscInt               nis = iscoloring->n,nctot,*cols;
2264   PetscInt               *rowhit,M,cstart,cend,colb;
2265   PetscInt               *columnsforrow,l;
2266   IS                     *isa;
2267   PetscBool              done,flg;
2268   ISLocalToGlobalMapping map = mat->cmap->bmapping;
2269   PetscInt               ctype=c->ctype;
2270 
2271   PetscFunctionBegin;
2272   if (!mat->assembled) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();");
2273   if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock");
2274 
2275   if (map) {ierr = ISLocalToGlobalMappingGetIndices(map,&ltog);CHKERRQ(ierr);}
2276   else     ltog = NULL;
2277   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
2278   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2279 
2280   M         = mat->rmap->n/bs;
2281   cstart    = mat->cmap->rstart/bs;
2282   cend      = mat->cmap->rend/bs;
2283   c->M      = mat->rmap->N/bs;         /* set the global rows and columns and local rows */
2284   c->N      = mat->cmap->N/bs;
2285   c->m      = mat->rmap->n/bs;
2286   c->rstart = mat->rmap->rstart/bs;
2287 
2288   c->ncolors = nis;
2289   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
2290   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
2291   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
2292   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
2293   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
2294   ierr       = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
2295 
2296   /* Allow access to data structures of local part of matrix */
2297   if (!baij->colmap) {
2298     ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
2299   }
2300   ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2301   ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2302 
2303   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
2304   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
2305 
2306   for (i=0; i<nis; i++) {
2307     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
2308     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
2309 
2310     c->ncolumns[i] = n;
2311     if (n) {
2312       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
2313       ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr);
2314       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
2315     } else {
2316       c->columns[i] = 0;
2317     }
2318 
2319     if (ctype == IS_COLORING_GLOBAL) {
2320       /* Determine the total (parallel) number of columns of this color */
2321       ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
2322       ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr);
2323 
2324       ierr  = PetscMPIIntCast(n,&nn);CHKERRQ(ierr);
2325       ierr  = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2326       nctot = 0; for (j=0; j<size; j++) nctot += ncolsonproc[j];
2327       if (!nctot) {
2328         ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
2329       }
2330 
2331       disp[0] = 0;
2332       for (j=1; j<size; j++) {
2333         disp[j] = disp[j-1] + ncolsonproc[j-1];
2334       }
2335 
2336       /* Get complete list of columns for color on each processor */
2337       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2338       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2339       ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr);
2340     } else if (ctype == IS_COLORING_GHOSTED) {
2341       /* Determine local number of columns of this color on this process, including ghost points */
2342       nctot = n;
2343       ierr  = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2344       ierr  = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
2345     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
2346 
2347     /*
2348        Mark all rows affect by these columns
2349     */
2350     /* Temporary option to allow for debugging/testing */
2351     flg  = PETSC_FALSE;
2352     ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr);
2353     if (!flg) { /*-----------------------------------------------------------------------------*/
2354       /* crude, fast version */
2355       ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
2356       /* loop over columns*/
2357       for (j=0; j<nctot; j++) {
2358         if (ctype == IS_COLORING_GHOSTED) {
2359           col = ltog[cols[j]];
2360         } else {
2361           col = cols[j];
2362         }
2363         if (col >= cstart && col < cend) {
2364           /* column is in diagonal block of matrix */
2365           rows = A_cj + A_ci[col-cstart];
2366           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2367         } else {
2368 #if defined(PETSC_USE_CTABLE)
2369           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2370           colb--;
2371 #else
2372           colb = baij->colmap[col] - 1;
2373 #endif
2374           if (colb == -1) {
2375             m = 0;
2376           } else {
2377             colb = colb/bs;
2378             rows = B_cj + B_ci[colb];
2379             m    = B_ci[colb+1] - B_ci[colb];
2380           }
2381         }
2382         /* loop over columns marking them in rowhit */
2383         for (k=0; k<m; k++) {
2384           rowhit[*rows++] = col + 1;
2385         }
2386       }
2387 
2388       /* count the number of hits */
2389       nrows = 0;
2390       for (j=0; j<M; j++) {
2391         if (rowhit[j]) nrows++;
2392       }
2393       c->nrows[i] = nrows;
2394       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2395       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2396       ierr        = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2397       nrows       = 0;
2398       for (j=0; j<M; j++) {
2399         if (rowhit[j]) {
2400           c->rows[i][nrows]          = j;
2401           c->columnsforrow[i][nrows] = rowhit[j] - 1;
2402           nrows++;
2403         }
2404       }
2405     } else { /*-------------------------------------------------------------------------------*/
2406       /* slow version, using rowhit as a linked list */
2407       PetscInt currentcol,fm,mfm;
2408       rowhit[M] = M;
2409       nrows     = 0;
2410       /* loop over columns*/
2411       for (j=0; j<nctot; j++) {
2412         if (ctype == IS_COLORING_GHOSTED) {
2413           col = ltog[cols[j]];
2414         } else {
2415           col = cols[j];
2416         }
2417         if (col >= cstart && col < cend) {
2418           /* column is in diagonal block of matrix */
2419           rows = A_cj + A_ci[col-cstart];
2420           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2421         } else {
2422 #if defined(PETSC_USE_CTABLE)
2423           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2424           colb--;
2425 #else
2426           colb = baij->colmap[col] - 1;
2427 #endif
2428           if (colb == -1) {
2429             m = 0;
2430           } else {
2431             colb = colb/bs;
2432             rows = B_cj + B_ci[colb];
2433             m    = B_ci[colb+1] - B_ci[colb];
2434           }
2435         }
2436 
2437         /* loop over columns marking them in rowhit */
2438         fm = M;    /* fm points to first entry in linked list */
2439         for (k=0; k<m; k++) {
2440           currentcol = *rows++;
2441           /* is it already in the list? */
2442           do {
2443             mfm = fm;
2444             fm  = rowhit[fm];
2445           } while (fm < currentcol);
2446           /* not in list so add it */
2447           if (fm != currentcol) {
2448             nrows++;
2449             columnsforrow[currentcol] = col;
2450             /* next three lines insert new entry into linked list */
2451             rowhit[mfm]        = currentcol;
2452             rowhit[currentcol] = fm;
2453             fm                 = currentcol;
2454             /* fm points to present position in list since we know the columns are sorted */
2455           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
2456         }
2457       }
2458       c->nrows[i] = nrows;
2459       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2460       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2461       ierr        = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2462       /* now store the linked list of rows into c->rows[i] */
2463       nrows = 0;
2464       fm    = rowhit[M];
2465       do {
2466         c->rows[i][nrows]            = fm;
2467         c->columnsforrow[i][nrows++] = columnsforrow[fm];
2468         fm                           = rowhit[fm];
2469       } while (fm < M);
2470     } /* ---------------------------------------------------------------------------------------*/
2471     ierr = PetscFree(cols);CHKERRQ(ierr);
2472   }
2473 
2474   /* Optimize by adding the vscale, and scaleforrow[][] fields */
2475   /*
2476        vscale will contain the "diagonal" on processor scalings followed by the off processor
2477   */
2478   if (ctype == IS_COLORING_GLOBAL) {
2479     PetscInt *garray;
2480     ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2481     for (i=0; i<baij->B->cmap->n/bs; i++) {
2482       for (j=0; j<bs; j++) {
2483         garray[i*bs+j] = bs*baij->garray[i]+j;
2484       }
2485     }
2486     ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr);
2487     ierr = PetscFree(garray);CHKERRQ(ierr);
2488     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2489     for (k=0; k<c->ncolors; k++) {
2490       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2491       for (l=0; l<c->nrows[k]; l++) {
2492         col = c->columnsforrow[k][l];
2493         if (col >= cstart && col < cend) {
2494           /* column is in diagonal block of matrix */
2495           colb = col - cstart;
2496         } else {
2497           /* column  is in "off-processor" part */
2498 #if defined(PETSC_USE_CTABLE)
2499           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2500           colb--;
2501 #else
2502           colb = baij->colmap[col] - 1;
2503 #endif
2504           colb  = colb/bs;
2505           colb += cend - cstart;
2506         }
2507         c->vscaleforrow[k][l] = colb;
2508       }
2509     }
2510   } else if (ctype == IS_COLORING_GHOSTED) {
2511     /* Get gtol mapping */
2512     PetscInt N = mat->cmap->N,nlocal,*gtol;
2513     ierr = PetscMalloc((N+1)*sizeof(PetscInt),&gtol);CHKERRQ(ierr);
2514     for (i=0; i<N; i++) gtol[i] = -1;
2515     ierr = ISLocalToGlobalMappingGetSize(map,&nlocal);CHKERRQ(ierr);
2516     for (i=0; i<nlocal; i++) gtol[ltog[i]] = i;
2517 
2518     c->vscale = 0; /* will be created in MatFDColoringApply() */
2519     ierr      = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2520     for (k=0; k<c->ncolors; k++) {
2521       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2522       for (l=0; l<c->nrows[k]; l++) {
2523         col = c->columnsforrow[k][l];      /* global column index */
2524 
2525         c->vscaleforrow[k][l] = gtol[col]; /* local column index */
2526       }
2527     }
2528     ierr = PetscFree(gtol);CHKERRQ(ierr);
2529   }
2530   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
2531 
2532   ierr = PetscFree(rowhit);CHKERRQ(ierr);
2533   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
2534   ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2535   ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2536   if (map) {ierr = ISLocalToGlobalMappingRestoreIndices(map,&ltog);CHKERRQ(ierr);}
2537   PetscFunctionReturn(0);
2538 }
2539 
2540 #undef __FUNCT__
2541 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ"
2542 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2543 {
2544   Mat            B;
2545   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2546   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2547   Mat_SeqAIJ     *b;
2548   PetscErrorCode ierr;
2549   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2550   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2551   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2552 
2553   PetscFunctionBegin;
2554   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2555   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2556 
2557   /* ----------------------------------------------------------------
2558      Tell every processor the number of nonzeros per row
2559   */
2560   ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
2561   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2562     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2563   }
2564   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2565   ierr      = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr);
2566   displs    = recvcounts + size;
2567   for (i=0; i<size; i++) {
2568     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2569     displs[i]     = A->rmap->range[i]/bs;
2570   }
2571 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2572   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2573 #else
2574   ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2575 #endif
2576   /* ---------------------------------------------------------------
2577      Create the sequential matrix of the same type as the local block diagonal
2578   */
2579   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2580   ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2581   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2582   ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2583   b    = (Mat_SeqAIJ*)B->data;
2584 
2585   /*--------------------------------------------------------------------
2586     Copy my part of matrix column indices over
2587   */
2588   sendcount  = ad->nz + bd->nz;
2589   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2590   a_jsendbuf = ad->j;
2591   b_jsendbuf = bd->j;
2592   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2593   cnt        = 0;
2594   for (i=0; i<n; i++) {
2595 
2596     /* put in lower diagonal portion */
2597     m = bd->i[i+1] - bd->i[i];
2598     while (m > 0) {
2599       /* is it above diagonal (in bd (compressed) numbering) */
2600       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2601       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2602       m--;
2603     }
2604 
2605     /* put in diagonal portion */
2606     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2607       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2608     }
2609 
2610     /* put in upper diagonal portion */
2611     while (m-- > 0) {
2612       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2613     }
2614   }
2615   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2616 
2617   /*--------------------------------------------------------------------
2618     Gather all column indices to all processors
2619   */
2620   for (i=0; i<size; i++) {
2621     recvcounts[i] = 0;
2622     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2623       recvcounts[i] += lens[j];
2624     }
2625   }
2626   displs[0] = 0;
2627   for (i=1; i<size; i++) {
2628     displs[i] = displs[i-1] + recvcounts[i-1];
2629   }
2630 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2631   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2632 #else
2633   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2634 #endif
2635   /*--------------------------------------------------------------------
2636     Assemble the matrix into useable form (note numerical values not yet set)
2637   */
2638   /* set the b->ilen (length of each row) values */
2639   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2640   /* set the b->i indices */
2641   b->i[0] = 0;
2642   for (i=1; i<=A->rmap->N/bs; i++) {
2643     b->i[i] = b->i[i-1] + lens[i-1];
2644   }
2645   ierr = PetscFree(lens);CHKERRQ(ierr);
2646   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2647   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2648   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2649 
2650   if (A->symmetric) {
2651     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2652   } else if (A->hermitian) {
2653     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2654   } else if (A->structurally_symmetric) {
2655     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2656   }
2657   *newmat = B;
2658   PetscFunctionReturn(0);
2659 }
2660 
2661 #undef __FUNCT__
2662 #define __FUNCT__ "MatSOR_MPIBAIJ"
2663 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2664 {
2665   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2666   PetscErrorCode ierr;
2667   Vec            bb1 = 0;
2668 
2669   PetscFunctionBegin;
2670   if (flag == SOR_APPLY_UPPER) {
2671     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2672     PetscFunctionReturn(0);
2673   }
2674 
2675   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2676     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2677   }
2678 
2679   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2680     if (flag & SOR_ZERO_INITIAL_GUESS) {
2681       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2682       its--;
2683     }
2684 
2685     while (its--) {
2686       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2687       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2688 
2689       /* update rhs: bb1 = bb - B*x */
2690       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2691       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2692 
2693       /* local sweep */
2694       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2695     }
2696   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2697     if (flag & SOR_ZERO_INITIAL_GUESS) {
2698       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2699       its--;
2700     }
2701     while (its--) {
2702       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2703       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2704 
2705       /* update rhs: bb1 = bb - B*x */
2706       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2707       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2708 
2709       /* local sweep */
2710       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2711     }
2712   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2713     if (flag & SOR_ZERO_INITIAL_GUESS) {
2714       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2715       its--;
2716     }
2717     while (its--) {
2718       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2719       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2720 
2721       /* update rhs: bb1 = bb - B*x */
2722       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2723       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2724 
2725       /* local sweep */
2726       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2727     }
2728   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2729 
2730   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2731   PetscFunctionReturn(0);
2732 }
2733 
2734 extern PetscErrorCode  MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2735 
2736 #undef __FUNCT__
2737 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ"
2738 PetscErrorCode  MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2739 {
2740   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2741   PetscErrorCode ierr;
2742 
2743   PetscFunctionBegin;
2744   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2745   PetscFunctionReturn(0);
2746 }
2747 
2748 
2749 /* -------------------------------------------------------------------*/
2750 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2751                                        MatGetRow_MPIBAIJ,
2752                                        MatRestoreRow_MPIBAIJ,
2753                                        MatMult_MPIBAIJ,
2754                                 /* 4*/ MatMultAdd_MPIBAIJ,
2755                                        MatMultTranspose_MPIBAIJ,
2756                                        MatMultTransposeAdd_MPIBAIJ,
2757                                        0,
2758                                        0,
2759                                        0,
2760                                 /*10*/ 0,
2761                                        0,
2762                                        0,
2763                                        MatSOR_MPIBAIJ,
2764                                        MatTranspose_MPIBAIJ,
2765                                 /*15*/ MatGetInfo_MPIBAIJ,
2766                                        MatEqual_MPIBAIJ,
2767                                        MatGetDiagonal_MPIBAIJ,
2768                                        MatDiagonalScale_MPIBAIJ,
2769                                        MatNorm_MPIBAIJ,
2770                                 /*20*/ MatAssemblyBegin_MPIBAIJ,
2771                                        MatAssemblyEnd_MPIBAIJ,
2772                                        MatSetOption_MPIBAIJ,
2773                                        MatZeroEntries_MPIBAIJ,
2774                                 /*24*/ MatZeroRows_MPIBAIJ,
2775                                        0,
2776                                        0,
2777                                        0,
2778                                        0,
2779                                 /*29*/ MatSetUp_MPIBAIJ,
2780                                        0,
2781                                        0,
2782                                        0,
2783                                        0,
2784                                 /*34*/ MatDuplicate_MPIBAIJ,
2785                                        0,
2786                                        0,
2787                                        0,
2788                                        0,
2789                                 /*39*/ MatAXPY_MPIBAIJ,
2790                                        MatGetSubMatrices_MPIBAIJ,
2791                                        MatIncreaseOverlap_MPIBAIJ,
2792                                        MatGetValues_MPIBAIJ,
2793                                        MatCopy_MPIBAIJ,
2794                                 /*44*/ 0,
2795                                        MatScale_MPIBAIJ,
2796                                        0,
2797                                        0,
2798                                        0,
2799                                 /*49*/ 0,
2800                                        0,
2801                                        0,
2802                                        0,
2803                                        0,
2804                                 /*54*/ MatFDColoringCreate_MPIBAIJ,
2805                                        0,
2806                                        MatSetUnfactored_MPIBAIJ,
2807                                        MatPermute_MPIBAIJ,
2808                                        MatSetValuesBlocked_MPIBAIJ,
2809                                 /*59*/ MatGetSubMatrix_MPIBAIJ,
2810                                        MatDestroy_MPIBAIJ,
2811                                        MatView_MPIBAIJ,
2812                                        0,
2813                                        0,
2814                                 /*64*/ 0,
2815                                        0,
2816                                        0,
2817                                        0,
2818                                        0,
2819                                 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2820                                        0,
2821                                        0,
2822                                        0,
2823                                        0,
2824                                 /*74*/ 0,
2825                                        MatFDColoringApply_BAIJ,
2826                                        0,
2827                                        0,
2828                                        0,
2829                                 /*79*/ 0,
2830                                        0,
2831                                        0,
2832                                        0,
2833                                        MatLoad_MPIBAIJ,
2834                                 /*84*/ 0,
2835                                        0,
2836                                        0,
2837                                        0,
2838                                        0,
2839                                 /*89*/ 0,
2840                                        0,
2841                                        0,
2842                                        0,
2843                                        0,
2844                                 /*94*/ 0,
2845                                        0,
2846                                        0,
2847                                        0,
2848                                        0,
2849                                 /*99*/ 0,
2850                                        0,
2851                                        0,
2852                                        0,
2853                                        0,
2854                                 /*104*/0,
2855                                        MatRealPart_MPIBAIJ,
2856                                        MatImaginaryPart_MPIBAIJ,
2857                                        0,
2858                                        0,
2859                                 /*109*/0,
2860                                        0,
2861                                        0,
2862                                        0,
2863                                        0,
2864                                 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2865                                        0,
2866                                        MatGetGhosts_MPIBAIJ,
2867                                        0,
2868                                        0,
2869                                 /*119*/0,
2870                                        0,
2871                                        0,
2872                                        0,
2873                                        0,
2874                                 /*124*/0,
2875                                        0,
2876                                        MatInvertBlockDiagonal_MPIBAIJ,
2877                                        0,
2878                                        0,
2879                                /*129*/ 0,
2880                                        0,
2881                                        0,
2882                                        0,
2883                                        0,
2884                                /*134*/ 0,
2885                                        0,
2886                                        0,
2887                                        0,
2888                                        0,
2889                                /*139*/ 0,
2890                                        0
2891 };
2892 
2893 #undef __FUNCT__
2894 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2895 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2896 {
2897   PetscFunctionBegin;
2898   *a = ((Mat_MPIBAIJ*)A->data)->A;
2899   PetscFunctionReturn(0);
2900 }
2901 
2902 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2903 
2904 #undef __FUNCT__
2905 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2906 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2907 {
2908   PetscInt       m,rstart,cstart,cend;
2909   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2910   const PetscInt *JJ    =0;
2911   PetscScalar    *values=0;
2912   PetscBool      roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2913   PetscErrorCode ierr;
2914 
2915   PetscFunctionBegin;
2916   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2917   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2918   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2919   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2920   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2921   m      = B->rmap->n/bs;
2922   rstart = B->rmap->rstart/bs;
2923   cstart = B->cmap->rstart/bs;
2924   cend   = B->cmap->rend/bs;
2925 
2926   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2927   ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
2928   for (i=0; i<m; i++) {
2929     nz = ii[i+1] - ii[i];
2930     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2931     nz_max = PetscMax(nz_max,nz);
2932     JJ     = jj + ii[i];
2933     for (j=0; j<nz; j++) {
2934       if (*JJ >= cstart) break;
2935       JJ++;
2936     }
2937     d = 0;
2938     for (; j<nz; j++) {
2939       if (*JJ++ >= cend) break;
2940       d++;
2941     }
2942     d_nnz[i] = d;
2943     o_nnz[i] = nz - d;
2944   }
2945   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2946   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2947 
2948   values = (PetscScalar*)V;
2949   if (!values) {
2950     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2951     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2952   }
2953   for (i=0; i<m; i++) {
2954     PetscInt          row    = i + rstart;
2955     PetscInt          ncols  = ii[i+1] - ii[i];
2956     const PetscInt    *icols = jj + ii[i];
2957     if (!roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2958       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2959       ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2960     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2961       PetscInt j;
2962       for (j=0; j<ncols; j++) {
2963         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2964         ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2965       }
2966     }
2967   }
2968 
2969   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2970   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2971   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2972   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2973   PetscFunctionReturn(0);
2974 }
2975 
2976 #undef __FUNCT__
2977 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2978 /*@C
2979    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2980    (the default parallel PETSc format).
2981 
2982    Collective on MPI_Comm
2983 
2984    Input Parameters:
2985 +  A - the matrix
2986 .  bs - the block size
2987 .  i - the indices into j for the start of each local row (starts with zero)
2988 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2989 -  v - optional values in the matrix
2990 
2991    Level: developer
2992 
2993    Notes: The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
2994    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
2995    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2996    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2997    block column and the second index is over columns within a block.
2998 
2999 .keywords: matrix, aij, compressed row, sparse, parallel
3000 
3001 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ
3002 @*/
3003 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3004 {
3005   PetscErrorCode ierr;
3006 
3007   PetscFunctionBegin;
3008   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3009   PetscValidType(B,1);
3010   PetscValidLogicalCollectiveInt(B,bs,2);
3011   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3012   PetscFunctionReturn(0);
3013 }
3014 
3015 #undef __FUNCT__
3016 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
3017 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
3018 {
3019   Mat_MPIBAIJ    *b;
3020   PetscErrorCode ierr;
3021   PetscInt       i;
3022 
3023   PetscFunctionBegin;
3024   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3025   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3026   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3027   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3028   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3029 
3030   if (d_nnz) {
3031     for (i=0; i<B->rmap->n/bs; i++) {
3032       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
3033     }
3034   }
3035   if (o_nnz) {
3036     for (i=0; i<B->rmap->n/bs; i++) {
3037       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
3038     }
3039   }
3040 
3041   b      = (Mat_MPIBAIJ*)B->data;
3042   b->bs2 = bs*bs;
3043   b->mbs = B->rmap->n/bs;
3044   b->nbs = B->cmap->n/bs;
3045   b->Mbs = B->rmap->N/bs;
3046   b->Nbs = B->cmap->N/bs;
3047 
3048   for (i=0; i<=b->size; i++) {
3049     b->rangebs[i] = B->rmap->range[i]/bs;
3050   }
3051   b->rstartbs = B->rmap->rstart/bs;
3052   b->rendbs   = B->rmap->rend/bs;
3053   b->cstartbs = B->cmap->rstart/bs;
3054   b->cendbs   = B->cmap->rend/bs;
3055 
3056   if (!B->preallocated) {
3057     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3058     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3059     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
3060     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3061     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3062     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3063     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
3064     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3065     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
3066   }
3067 
3068   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3069   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
3070   B->preallocated = PETSC_TRUE;
3071   PetscFunctionReturn(0);
3072 }
3073 
3074 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
3075 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
3076 
3077 #undef __FUNCT__
3078 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
3079 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
3080 {
3081   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
3082   PetscErrorCode ierr;
3083   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
3084   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
3085   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
3086 
3087   PetscFunctionBegin;
3088   ierr  = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr);
3089   ii[0] = 0;
3090   for (i=0; i<M; i++) {
3091     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
3092     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
3093     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
3094     /* remove one from count of matrix has diagonal */
3095     for (j=id[i]; j<id[i+1]; j++) {
3096       if (jd[j] == i) {ii[i+1]--;break;}
3097     }
3098   }
3099   ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3100   cnt  = 0;
3101   for (i=0; i<M; i++) {
3102     for (j=io[i]; j<io[i+1]; j++) {
3103       if (garray[jo[j]] > rstart) break;
3104       jj[cnt++] = garray[jo[j]];
3105     }
3106     for (k=id[i]; k<id[i+1]; k++) {
3107       if (jd[k] != i) {
3108         jj[cnt++] = rstart + jd[k];
3109       }
3110     }
3111     for (; j<io[i+1]; j++) {
3112       jj[cnt++] = garray[jo[j]];
3113     }
3114   }
3115   ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr);
3116   PetscFunctionReturn(0);
3117 }
3118 
3119 #include <../src/mat/impls/aij/mpi/mpiaij.h>
3120 
3121 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
3122 
3123 #undef __FUNCT__
3124 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ"
3125 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
3126 {
3127   PetscErrorCode ierr;
3128   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3129   Mat            B;
3130   Mat_MPIAIJ     *b;
3131 
3132   PetscFunctionBegin;
3133   if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
3134 
3135   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3136   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3137   ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
3138   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
3139   ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
3140   b    = (Mat_MPIAIJ*) B->data;
3141 
3142   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
3143   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
3144   ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
3145   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
3146   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
3147   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3148   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3149   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3150   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3151   if (reuse == MAT_REUSE_MATRIX) {
3152     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
3153   } else {
3154    *newmat = B;
3155   }
3156   PetscFunctionReturn(0);
3157 }
3158 
3159 #if defined(PETSC_HAVE_MUMPS)
3160 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3161 #endif
3162 
3163 /*MC
3164    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
3165 
3166    Options Database Keys:
3167 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
3168 . -mat_block_size <bs> - set the blocksize used to store the matrix
3169 - -mat_use_hash_table <fact>
3170 
3171   Level: beginner
3172 
3173 .seealso: MatCreateMPIBAIJ
3174 M*/
3175 
3176 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
3177 
3178 #undef __FUNCT__
3179 #define __FUNCT__ "MatCreate_MPIBAIJ"
3180 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
3181 {
3182   Mat_MPIBAIJ    *b;
3183   PetscErrorCode ierr;
3184   PetscBool      flg;
3185 
3186   PetscFunctionBegin;
3187   ierr    = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
3188   B->data = (void*)b;
3189 
3190   ierr         = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3191   B->assembled = PETSC_FALSE;
3192 
3193   B->insertmode = NOT_SET_VALUES;
3194   ierr          = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
3195   ierr          = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
3196 
3197   /* build local table of row and column ownerships */
3198   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
3199 
3200   /* build cache for off array entries formed */
3201   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
3202 
3203   b->donotstash  = PETSC_FALSE;
3204   b->colmap      = NULL;
3205   b->garray      = NULL;
3206   b->roworiented = PETSC_TRUE;
3207 
3208   /* stuff used in block assembly */
3209   b->barray = 0;
3210 
3211   /* stuff used for matrix vector multiply */
3212   b->lvec  = 0;
3213   b->Mvctx = 0;
3214 
3215   /* stuff for MatGetRow() */
3216   b->rowindices   = 0;
3217   b->rowvalues    = 0;
3218   b->getrowactive = PETSC_FALSE;
3219 
3220   /* hash table stuff */
3221   b->ht           = 0;
3222   b->hd           = 0;
3223   b->ht_size      = 0;
3224   b->ht_flag      = PETSC_FALSE;
3225   b->ht_fact      = 0;
3226   b->ht_total_ct  = 0;
3227   b->ht_insert_ct = 0;
3228 
3229   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
3230   b->ijonly = PETSC_FALSE;
3231 
3232   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3233   ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr);
3234   if (flg) {
3235     PetscReal fact = 1.39;
3236     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3237     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
3238     if (fact <= 1.0) fact = 1.39;
3239     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3240     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3241   }
3242   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3243 
3244 #if defined(PETSC_HAVE_MUMPS)
3245   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_baij_mumps);CHKERRQ(ierr);
3246 #endif
3247   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3248   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3249   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3250   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3251   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3252   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3253   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3254   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3255   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3256   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3257   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr);
3258   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3259   PetscFunctionReturn(0);
3260 }
3261 
3262 /*MC
3263    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3264 
3265    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3266    and MATMPIBAIJ otherwise.
3267 
3268    Options Database Keys:
3269 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3270 
3271   Level: beginner
3272 
3273 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3274 M*/
3275 
3276 #undef __FUNCT__
3277 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3278 /*@C
3279    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3280    (block compressed row).  For good matrix assembly performance
3281    the user should preallocate the matrix storage by setting the parameters
3282    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3283    performance can be increased by more than a factor of 50.
3284 
3285    Collective on Mat
3286 
3287    Input Parameters:
3288 +  A - the matrix
3289 .  bs   - size of blockk
3290 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3291            submatrix  (same for all local rows)
3292 .  d_nnz - array containing the number of block nonzeros in the various block rows
3293            of the in diagonal portion of the local (possibly different for each block
3294            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3295            set it even if it is zero.
3296 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3297            submatrix (same for all local rows).
3298 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3299            off-diagonal portion of the local submatrix (possibly different for
3300            each block row) or NULL.
3301 
3302    If the *_nnz parameter is given then the *_nz parameter is ignored
3303 
3304    Options Database Keys:
3305 +   -mat_block_size - size of the blocks to use
3306 -   -mat_use_hash_table <fact>
3307 
3308    Notes:
3309    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3310    than it must be used on all processors that share the object for that argument.
3311 
3312    Storage Information:
3313    For a square global matrix we define each processor's diagonal portion
3314    to be its local rows and the corresponding columns (a square submatrix);
3315    each processor's off-diagonal portion encompasses the remainder of the
3316    local matrix (a rectangular submatrix).
3317 
3318    The user can specify preallocated storage for the diagonal part of
3319    the local submatrix with either d_nz or d_nnz (not both).  Set
3320    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3321    memory allocation.  Likewise, specify preallocated storage for the
3322    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3323 
3324    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3325    the figure below we depict these three local rows and all columns (0-11).
3326 
3327 .vb
3328            0 1 2 3 4 5 6 7 8 9 10 11
3329           --------------------------
3330    row 3  |o o o d d d o o o o  o  o
3331    row 4  |o o o d d d o o o o  o  o
3332    row 5  |o o o d d d o o o o  o  o
3333           --------------------------
3334 .ve
3335 
3336    Thus, any entries in the d locations are stored in the d (diagonal)
3337    submatrix, and any entries in the o locations are stored in the
3338    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3339    stored simply in the MATSEQBAIJ format for compressed row storage.
3340 
3341    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3342    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3343    In general, for PDE problems in which most nonzeros are near the diagonal,
3344    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3345    or you will get TERRIBLE performance; see the users' manual chapter on
3346    matrices.
3347 
3348    You can call MatGetInfo() to get information on how effective the preallocation was;
3349    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3350    You can also run with the option -info and look for messages with the string
3351    malloc in them to see if additional memory allocation was needed.
3352 
3353    Level: intermediate
3354 
3355 .keywords: matrix, block, aij, compressed row, sparse, parallel
3356 
3357 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3358 @*/
3359 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3360 {
3361   PetscErrorCode ierr;
3362 
3363   PetscFunctionBegin;
3364   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3365   PetscValidType(B,1);
3366   PetscValidLogicalCollectiveInt(B,bs,2);
3367   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3368   PetscFunctionReturn(0);
3369 }
3370 
3371 #undef __FUNCT__
3372 #define __FUNCT__ "MatCreateBAIJ"
3373 /*@C
3374    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3375    (block compressed row).  For good matrix assembly performance
3376    the user should preallocate the matrix storage by setting the parameters
3377    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3378    performance can be increased by more than a factor of 50.
3379 
3380    Collective on MPI_Comm
3381 
3382    Input Parameters:
3383 +  comm - MPI communicator
3384 .  bs   - size of blockk
3385 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3386            This value should be the same as the local size used in creating the
3387            y vector for the matrix-vector product y = Ax.
3388 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3389            This value should be the same as the local size used in creating the
3390            x vector for the matrix-vector product y = Ax.
3391 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3392 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3393 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3394            submatrix  (same for all local rows)
3395 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3396            of the in diagonal portion of the local (possibly different for each block
3397            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3398            and set it even if it is zero.
3399 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3400            submatrix (same for all local rows).
3401 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3402            off-diagonal portion of the local submatrix (possibly different for
3403            each block row) or NULL.
3404 
3405    Output Parameter:
3406 .  A - the matrix
3407 
3408    Options Database Keys:
3409 +   -mat_block_size - size of the blocks to use
3410 -   -mat_use_hash_table <fact>
3411 
3412    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3413    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3414    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3415 
3416    Notes:
3417    If the *_nnz parameter is given then the *_nz parameter is ignored
3418 
3419    A nonzero block is any block that as 1 or more nonzeros in it
3420 
3421    The user MUST specify either the local or global matrix dimensions
3422    (possibly both).
3423 
3424    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3425    than it must be used on all processors that share the object for that argument.
3426 
3427    Storage Information:
3428    For a square global matrix we define each processor's diagonal portion
3429    to be its local rows and the corresponding columns (a square submatrix);
3430    each processor's off-diagonal portion encompasses the remainder of the
3431    local matrix (a rectangular submatrix).
3432 
3433    The user can specify preallocated storage for the diagonal part of
3434    the local submatrix with either d_nz or d_nnz (not both).  Set
3435    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3436    memory allocation.  Likewise, specify preallocated storage for the
3437    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3438 
3439    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3440    the figure below we depict these three local rows and all columns (0-11).
3441 
3442 .vb
3443            0 1 2 3 4 5 6 7 8 9 10 11
3444           --------------------------
3445    row 3  |o o o d d d o o o o  o  o
3446    row 4  |o o o d d d o o o o  o  o
3447    row 5  |o o o d d d o o o o  o  o
3448           --------------------------
3449 .ve
3450 
3451    Thus, any entries in the d locations are stored in the d (diagonal)
3452    submatrix, and any entries in the o locations are stored in the
3453    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3454    stored simply in the MATSEQBAIJ format for compressed row storage.
3455 
3456    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3457    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3458    In general, for PDE problems in which most nonzeros are near the diagonal,
3459    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3460    or you will get TERRIBLE performance; see the users' manual chapter on
3461    matrices.
3462 
3463    Level: intermediate
3464 
3465 .keywords: matrix, block, aij, compressed row, sparse, parallel
3466 
3467 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3468 @*/
3469 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3470 {
3471   PetscErrorCode ierr;
3472   PetscMPIInt    size;
3473 
3474   PetscFunctionBegin;
3475   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3476   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3477   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3478   if (size > 1) {
3479     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3480     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3481   } else {
3482     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3483     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3484   }
3485   PetscFunctionReturn(0);
3486 }
3487 
3488 #undef __FUNCT__
3489 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3490 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3491 {
3492   Mat            mat;
3493   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3494   PetscErrorCode ierr;
3495   PetscInt       len=0;
3496 
3497   PetscFunctionBegin;
3498   *newmat = 0;
3499   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3500   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3501   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3502   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3503 
3504   mat->factortype   = matin->factortype;
3505   mat->preallocated = PETSC_TRUE;
3506   mat->assembled    = PETSC_TRUE;
3507   mat->insertmode   = NOT_SET_VALUES;
3508 
3509   a             = (Mat_MPIBAIJ*)mat->data;
3510   mat->rmap->bs = matin->rmap->bs;
3511   a->bs2        = oldmat->bs2;
3512   a->mbs        = oldmat->mbs;
3513   a->nbs        = oldmat->nbs;
3514   a->Mbs        = oldmat->Mbs;
3515   a->Nbs        = oldmat->Nbs;
3516 
3517   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3518   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3519 
3520   a->size         = oldmat->size;
3521   a->rank         = oldmat->rank;
3522   a->donotstash   = oldmat->donotstash;
3523   a->roworiented  = oldmat->roworiented;
3524   a->rowindices   = 0;
3525   a->rowvalues    = 0;
3526   a->getrowactive = PETSC_FALSE;
3527   a->barray       = 0;
3528   a->rstartbs     = oldmat->rstartbs;
3529   a->rendbs       = oldmat->rendbs;
3530   a->cstartbs     = oldmat->cstartbs;
3531   a->cendbs       = oldmat->cendbs;
3532 
3533   /* hash table stuff */
3534   a->ht           = 0;
3535   a->hd           = 0;
3536   a->ht_size      = 0;
3537   a->ht_flag      = oldmat->ht_flag;
3538   a->ht_fact      = oldmat->ht_fact;
3539   a->ht_total_ct  = 0;
3540   a->ht_insert_ct = 0;
3541 
3542   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3543   if (oldmat->colmap) {
3544 #if defined(PETSC_USE_CTABLE)
3545     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3546 #else
3547     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3548     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3549     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3550 #endif
3551   } else a->colmap = 0;
3552 
3553   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3554     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3555     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3556     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3557   } else a->garray = 0;
3558 
3559   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3560   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3561   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3562   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3563   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3564 
3565   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3566   ierr    = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3567   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3568   ierr    = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3569   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3570   *newmat = mat;
3571   PetscFunctionReturn(0);
3572 }
3573 
3574 #undef __FUNCT__
3575 #define __FUNCT__ "MatLoad_MPIBAIJ"
3576 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3577 {
3578   PetscErrorCode ierr;
3579   int            fd;
3580   PetscInt       i,nz,j,rstart,rend;
3581   PetscScalar    *vals,*buf;
3582   MPI_Comm       comm;
3583   MPI_Status     status;
3584   PetscMPIInt    rank,size,maxnz;
3585   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3586   PetscInt       *locrowlens = NULL,*procsnz = NULL,*browners = NULL;
3587   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3588   PetscMPIInt    tag    = ((PetscObject)viewer)->tag;
3589   PetscInt       *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount;
3590   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3591 
3592   PetscFunctionBegin;
3593   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3594   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3595   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3596   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3597 
3598   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3599   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3600   if (!rank) {
3601     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3602     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3603     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3604   }
3605 
3606   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3607 
3608   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3609   M    = header[1]; N = header[2];
3610 
3611   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3612   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3613   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3614 
3615   /* If global sizes are set, check if they are consistent with that given in the file */
3616   if (sizesset) {
3617     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3618   }
3619   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3620   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3621 
3622   if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices");
3623 
3624   /*
3625      This code adds extra rows to make sure the number of rows is
3626      divisible by the blocksize
3627   */
3628   Mbs        = M/bs;
3629   extra_rows = bs - M + bs*Mbs;
3630   if (extra_rows == bs) extra_rows = 0;
3631   else                  Mbs++;
3632   if (extra_rows && !rank) {
3633     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3634   }
3635 
3636   /* determine ownership of all rows */
3637   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3638     mbs = Mbs/size + ((Mbs % size) > rank);
3639     m   = mbs*bs;
3640   } else { /* User set */
3641     m   = newmat->rmap->n;
3642     mbs = m/bs;
3643   }
3644   ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
3645   ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3646 
3647   /* process 0 needs enough room for process with most rows */
3648   if (!rank) {
3649     mmax = rowners[1];
3650     for (i=2; i<=size; i++) {
3651       mmax = PetscMax(mmax,rowners[i]);
3652     }
3653     mmax*=bs;
3654   } else mmax = -1;             /* unused, but compiler warns anyway */
3655 
3656   rowners[0] = 0;
3657   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
3658   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
3659   rstart = rowners[rank];
3660   rend   = rowners[rank+1];
3661 
3662   /* distribute row lengths to all processors */
3663   ierr = PetscMalloc(m*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
3664   if (!rank) {
3665     mend = m;
3666     if (size == 1) mend = mend - extra_rows;
3667     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3668     for (j=mend; j<m; j++) locrowlens[j] = 1;
3669     ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3670     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3671     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3672     for (j=0; j<m; j++) {
3673       procsnz[0] += locrowlens[j];
3674     }
3675     for (i=1; i<size; i++) {
3676       mend = browners[i+1] - browners[i];
3677       if (i == size-1) mend = mend - extra_rows;
3678       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3679       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3680       /* calculate the number of nonzeros on each processor */
3681       for (j=0; j<browners[i+1]-browners[i]; j++) {
3682         procsnz[i] += rowlengths[j];
3683       }
3684       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3685     }
3686     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3687   } else {
3688     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3689   }
3690 
3691   if (!rank) {
3692     /* determine max buffer needed and allocate it */
3693     maxnz = procsnz[0];
3694     for (i=1; i<size; i++) {
3695       maxnz = PetscMax(maxnz,procsnz[i]);
3696     }
3697     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3698 
3699     /* read in my part of the matrix column indices  */
3700     nz     = procsnz[0];
3701     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3702     mycols = ibuf;
3703     if (size == 1) nz -= extra_rows;
3704     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3705     if (size == 1) {
3706       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
3707     }
3708 
3709     /* read in every ones (except the last) and ship off */
3710     for (i=1; i<size-1; i++) {
3711       nz   = procsnz[i];
3712       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3713       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3714     }
3715     /* read in the stuff for the last proc */
3716     if (size != 1) {
3717       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3718       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3719       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3720       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3721     }
3722     ierr = PetscFree(cols);CHKERRQ(ierr);
3723   } else {
3724     /* determine buffer space needed for message */
3725     nz = 0;
3726     for (i=0; i<m; i++) {
3727       nz += locrowlens[i];
3728     }
3729     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3730     mycols = ibuf;
3731     /* receive message of column indices*/
3732     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3733     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3734     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3735   }
3736 
3737   /* loop over local rows, determining number of off diagonal entries */
3738   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
3739   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
3740   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3741   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3742   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3743   rowcount = 0; nzcount = 0;
3744   for (i=0; i<mbs; i++) {
3745     dcount  = 0;
3746     odcount = 0;
3747     for (j=0; j<bs; j++) {
3748       kmax = locrowlens[rowcount];
3749       for (k=0; k<kmax; k++) {
3750         tmp = mycols[nzcount++]/bs;
3751         if (!mask[tmp]) {
3752           mask[tmp] = 1;
3753           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3754           else masked1[dcount++] = tmp;
3755         }
3756       }
3757       rowcount++;
3758     }
3759 
3760     dlens[i]  = dcount;
3761     odlens[i] = odcount;
3762 
3763     /* zero out the mask elements we set */
3764     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3765     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3766   }
3767 
3768 
3769   if (!sizesset) {
3770     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3771   }
3772   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3773 
3774   if (!rank) {
3775     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3776     /* read in my part of the matrix numerical values  */
3777     nz     = procsnz[0];
3778     vals   = buf;
3779     mycols = ibuf;
3780     if (size == 1) nz -= extra_rows;
3781     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3782     if (size == 1) {
3783       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
3784     }
3785 
3786     /* insert into matrix */
3787     jj = rstart*bs;
3788     for (i=0; i<m; i++) {
3789       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3790       mycols += locrowlens[i];
3791       vals   += locrowlens[i];
3792       jj++;
3793     }
3794     /* read in other processors (except the last one) and ship out */
3795     for (i=1; i<size-1; i++) {
3796       nz   = procsnz[i];
3797       vals = buf;
3798       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3799       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3800     }
3801     /* the last proc */
3802     if (size != 1) {
3803       nz   = procsnz[i] - extra_rows;
3804       vals = buf;
3805       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3806       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3807       ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3808     }
3809     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3810   } else {
3811     /* receive numeric values */
3812     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3813 
3814     /* receive message of values*/
3815     vals   = buf;
3816     mycols = ibuf;
3817     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3818 
3819     /* insert into matrix */
3820     jj = rstart*bs;
3821     for (i=0; i<m; i++) {
3822       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3823       mycols += locrowlens[i];
3824       vals   += locrowlens[i];
3825       jj++;
3826     }
3827   }
3828   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3829   ierr = PetscFree(buf);CHKERRQ(ierr);
3830   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3831   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3832   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3833   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3834   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3835   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3836   PetscFunctionReturn(0);
3837 }
3838 
3839 #undef __FUNCT__
3840 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3841 /*@
3842    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3843 
3844    Input Parameters:
3845 .  mat  - the matrix
3846 .  fact - factor
3847 
3848    Not Collective, each process can use a different factor
3849 
3850    Level: advanced
3851 
3852   Notes:
3853    This can also be set by the command line option: -mat_use_hash_table <fact>
3854 
3855 .keywords: matrix, hashtable, factor, HT
3856 
3857 .seealso: MatSetOption()
3858 @*/
3859 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3860 {
3861   PetscErrorCode ierr;
3862 
3863   PetscFunctionBegin;
3864   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3865   PetscFunctionReturn(0);
3866 }
3867 
3868 #undef __FUNCT__
3869 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3870 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3871 {
3872   Mat_MPIBAIJ *baij;
3873 
3874   PetscFunctionBegin;
3875   baij          = (Mat_MPIBAIJ*)mat->data;
3876   baij->ht_fact = fact;
3877   PetscFunctionReturn(0);
3878 }
3879 
3880 #undef __FUNCT__
3881 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3882 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3883 {
3884   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
3885 
3886   PetscFunctionBegin;
3887   *Ad     = a->A;
3888   *Ao     = a->B;
3889   *colmap = a->garray;
3890   PetscFunctionReturn(0);
3891 }
3892 
3893 /*
3894     Special version for direct calls from Fortran (to eliminate two function call overheads
3895 */
3896 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3897 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3898 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3899 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3900 #endif
3901 
3902 #undef __FUNCT__
3903 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3904 /*@C
3905   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3906 
3907   Collective on Mat
3908 
3909   Input Parameters:
3910 + mat - the matrix
3911 . min - number of input rows
3912 . im - input rows
3913 . nin - number of input columns
3914 . in - input columns
3915 . v - numerical values input
3916 - addvin - INSERT_VALUES or ADD_VALUES
3917 
3918   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3919 
3920   Level: advanced
3921 
3922 .seealso:   MatSetValuesBlocked()
3923 @*/
3924 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3925 {
3926   /* convert input arguments to C version */
3927   Mat        mat  = *matin;
3928   PetscInt   m    = *min, n = *nin;
3929   InsertMode addv = *addvin;
3930 
3931   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3932   const MatScalar *value;
3933   MatScalar       *barray     = baij->barray;
3934   PetscBool       roworiented = baij->roworiented;
3935   PetscErrorCode  ierr;
3936   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3937   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3938   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3939 
3940   PetscFunctionBegin;
3941   /* tasks normally handled by MatSetValuesBlocked() */
3942   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3943 #if defined(PETSC_USE_DEBUG)
3944   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3945   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3946 #endif
3947   if (mat->assembled) {
3948     mat->was_assembled = PETSC_TRUE;
3949     mat->assembled     = PETSC_FALSE;
3950   }
3951   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3952 
3953 
3954   if (!barray) {
3955     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
3956     baij->barray = barray;
3957   }
3958 
3959   if (roworiented) stepval = (n-1)*bs;
3960   else stepval = (m-1)*bs;
3961 
3962   for (i=0; i<m; i++) {
3963     if (im[i] < 0) continue;
3964 #if defined(PETSC_USE_DEBUG)
3965     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3966 #endif
3967     if (im[i] >= rstart && im[i] < rend) {
3968       row = im[i] - rstart;
3969       for (j=0; j<n; j++) {
3970         /* If NumCol = 1 then a copy is not required */
3971         if ((roworiented) && (n == 1)) {
3972           barray = (MatScalar*)v + i*bs2;
3973         } else if ((!roworiented) && (m == 1)) {
3974           barray = (MatScalar*)v + j*bs2;
3975         } else { /* Here a copy is required */
3976           if (roworiented) {
3977             value = v + i*(stepval+bs)*bs + j*bs;
3978           } else {
3979             value = v + j*(stepval+bs)*bs + i*bs;
3980           }
3981           for (ii=0; ii<bs; ii++,value+=stepval) {
3982             for (jj=0; jj<bs; jj++) {
3983               *barray++ = *value++;
3984             }
3985           }
3986           barray -=bs2;
3987         }
3988 
3989         if (in[j] >= cstart && in[j] < cend) {
3990           col  = in[j] - cstart;
3991           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3992         } else if (in[j] < 0) continue;
3993 #if defined(PETSC_USE_DEBUG)
3994         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3995 #endif
3996         else {
3997           if (mat->was_assembled) {
3998             if (!baij->colmap) {
3999               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
4000             }
4001 
4002 #if defined(PETSC_USE_DEBUG)
4003 #if defined(PETSC_USE_CTABLE)
4004             { PetscInt data;
4005               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
4006               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4007             }
4008 #else
4009             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4010 #endif
4011 #endif
4012 #if defined(PETSC_USE_CTABLE)
4013             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4014             col  = (col - 1)/bs;
4015 #else
4016             col = (baij->colmap[in[j]] - 1)/bs;
4017 #endif
4018             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
4019               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
4020               col  =  in[j];
4021             }
4022           } else col = in[j];
4023           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4024         }
4025       }
4026     } else {
4027       if (!baij->donotstash) {
4028         if (roworiented) {
4029           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4030         } else {
4031           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4032         }
4033       }
4034     }
4035   }
4036 
4037   /* task normally handled by MatSetValuesBlocked() */
4038   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
4039   PetscFunctionReturn(0);
4040 }
4041 
4042 #undef __FUNCT__
4043 #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
4044 /*@
4045      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
4046          CSR format the local rows.
4047 
4048    Collective on MPI_Comm
4049 
4050    Input Parameters:
4051 +  comm - MPI communicator
4052 .  bs - the block size, only a block size of 1 is supported
4053 .  m - number of local rows (Cannot be PETSC_DECIDE)
4054 .  n - This value should be the same as the local size used in creating the
4055        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4056        calculated if N is given) For square matrices n is almost always m.
4057 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4058 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4059 .   i - row indices
4060 .   j - column indices
4061 -   a - matrix values
4062 
4063    Output Parameter:
4064 .   mat - the matrix
4065 
4066    Level: intermediate
4067 
4068    Notes:
4069        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4070      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4071      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4072 
4073      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
4074      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
4075      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
4076      with column-major ordering within blocks.
4077 
4078        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4079 
4080 .keywords: matrix, aij, compressed row, sparse, parallel
4081 
4082 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4083           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4084 @*/
4085 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4086 {
4087   PetscErrorCode ierr;
4088 
4089   PetscFunctionBegin;
4090   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4091   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4092   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4093   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4094   ierr = MatSetType(*mat,MATMPIBAIJ);CHKERRQ(ierr);
4095   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
4096   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
4097   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
4098   PetscFunctionReturn(0);
4099 }
4100