xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 5e201a00ff46f24f99cdbb0a37beac2e6038bc29)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 #include <petscblaslapack.h>
4 
5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat);
7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec);
13 
14 #undef __FUNCT__
15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
17 {
18   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
19   PetscErrorCode ierr;
20   PetscInt       i,*idxb = 0;
21   PetscScalar    *va,*vb;
22   Vec            vtmp;
23 
24   PetscFunctionBegin;
25   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
26   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
27   if (idx) {
28     for (i=0; i<A->rmap->n; i++) {
29       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
30     }
31   }
32 
33   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
34   if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
35   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
36   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
37 
38   for (i=0; i<A->rmap->n; i++) {
39     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
40       va[i] = vb[i];
41       if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);
42     }
43   }
44 
45   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
46   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
47   ierr = PetscFree(idxb);CHKERRQ(ierr);
48   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
49   PetscFunctionReturn(0);
50 }
51 
52 #undef __FUNCT__
53 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
54 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
55 {
56   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
57   PetscErrorCode ierr;
58 
59   PetscFunctionBegin;
60   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
61   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
62   PetscFunctionReturn(0);
63 }
64 
65 #undef __FUNCT__
66 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
67 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
68 {
69   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
70   PetscErrorCode ierr;
71 
72   PetscFunctionBegin;
73   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
74   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
75   PetscFunctionReturn(0);
76 }
77 
78 /*
79      Local utility routine that creates a mapping from the global column
80    number to the local number in the off-diagonal part of the local
81    storage of the matrix.  This is done in a non scalable way since the
82    length of colmap equals the global matrix length.
83 */
84 #undef __FUNCT__
85 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private"
86 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
87 {
88   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
89   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
90   PetscErrorCode ierr;
91   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
92 
93   PetscFunctionBegin;
94 #if defined(PETSC_USE_CTABLE)
95   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
96   for (i=0; i<nbs; i++) {
97     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
98   }
99 #else
100   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
101   ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
102   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
103   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
104 #endif
105   PetscFunctionReturn(0);
106 }
107 
108 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
109   { \
110  \
111     brow = row/bs;  \
112     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
113     rmax = aimax[brow]; nrow = ailen[brow]; \
114     bcol = col/bs; \
115     ridx = row % bs; cidx = col % bs; \
116     low  = 0; high = nrow; \
117     while (high-low > 3) { \
118       t = (low+high)/2; \
119       if (rp[t] > bcol) high = t; \
120       else              low  = t; \
121     } \
122     for (_i=low; _i<high; _i++) { \
123       if (rp[_i] > bcol) break; \
124       if (rp[_i] == bcol) { \
125         bap = ap +  bs2*_i + bs*cidx + ridx; \
126         if (addv == ADD_VALUES) *bap += value;  \
127         else                    *bap  = value;  \
128         goto a_noinsert; \
129       } \
130     } \
131     if (a->nonew == 1) goto a_noinsert; \
132     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
133     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
134     N = nrow++ - 1;  \
135     /* shift up all the later entries in this row */ \
136     for (ii=N; ii>=_i; ii--) { \
137       rp[ii+1] = rp[ii]; \
138       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
139     } \
140     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
141     rp[_i]                      = bcol;  \
142     ap[bs2*_i + bs*cidx + ridx] = value;  \
143 a_noinsert:; \
144     ailen[brow] = nrow; \
145   }
146 
147 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
148   { \
149     brow = row/bs;  \
150     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
151     rmax = bimax[brow]; nrow = bilen[brow]; \
152     bcol = col/bs; \
153     ridx = row % bs; cidx = col % bs; \
154     low  = 0; high = nrow; \
155     while (high-low > 3) { \
156       t = (low+high)/2; \
157       if (rp[t] > bcol) high = t; \
158       else              low  = t; \
159     } \
160     for (_i=low; _i<high; _i++) { \
161       if (rp[_i] > bcol) break; \
162       if (rp[_i] == bcol) { \
163         bap = ap +  bs2*_i + bs*cidx + ridx; \
164         if (addv == ADD_VALUES) *bap += value;  \
165         else                    *bap  = value;  \
166         goto b_noinsert; \
167       } \
168     } \
169     if (b->nonew == 1) goto b_noinsert; \
170     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
171     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
172     N = nrow++ - 1;  \
173     /* shift up all the later entries in this row */ \
174     for (ii=N; ii>=_i; ii--) { \
175       rp[ii+1] = rp[ii]; \
176       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
177     } \
178     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
179     rp[_i]                      = bcol;  \
180     ap[bs2*_i + bs*cidx + ridx] = value;  \
181 b_noinsert:; \
182     bilen[brow] = nrow; \
183   }
184 
185 #undef __FUNCT__
186 #define __FUNCT__ "MatSetValues_MPIBAIJ"
187 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
188 {
189   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
190   MatScalar      value;
191   PetscBool      roworiented = baij->roworiented;
192   PetscErrorCode ierr;
193   PetscInt       i,j,row,col;
194   PetscInt       rstart_orig=mat->rmap->rstart;
195   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
196   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
197 
198   /* Some Variables required in the macro */
199   Mat         A     = baij->A;
200   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
201   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
202   MatScalar   *aa   =a->a;
203 
204   Mat         B     = baij->B;
205   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
206   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
207   MatScalar   *ba   =b->a;
208 
209   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
210   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
211   MatScalar *ap,*bap;
212 
213   PetscFunctionBegin;
214   if (v) PetscValidScalarPointer(v,6);
215   for (i=0; i<m; i++) {
216     if (im[i] < 0) continue;
217 #if defined(PETSC_USE_DEBUG)
218     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
219 #endif
220     if (im[i] >= rstart_orig && im[i] < rend_orig) {
221       row = im[i] - rstart_orig;
222       for (j=0; j<n; j++) {
223         if (in[j] >= cstart_orig && in[j] < cend_orig) {
224           col = in[j] - cstart_orig;
225           if (roworiented) value = v[i*n+j];
226           else             value = v[i+j*m];
227           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
228           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
229         } else if (in[j] < 0) continue;
230 #if defined(PETSC_USE_DEBUG)
231         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
232 #endif
233         else {
234           if (mat->was_assembled) {
235             if (!baij->colmap) {
236               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
237             }
238 #if defined(PETSC_USE_CTABLE)
239             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
240             col  = col - 1;
241 #else
242             col = baij->colmap[in[j]/bs] - 1;
243 #endif
244             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
245               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
246               col  =  in[j];
247               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
248               B    = baij->B;
249               b    = (Mat_SeqBAIJ*)(B)->data;
250               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
251               ba   =b->a;
252             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
253             else col += in[j]%bs;
254           } else col = in[j];
255           if (roworiented) value = v[i*n+j];
256           else             value = v[i+j*m];
257           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
258           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
259         }
260       }
261     } else {
262       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
263       if (!baij->donotstash) {
264         mat->assembled = PETSC_FALSE;
265         if (roworiented) {
266           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
267         } else {
268           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
269         }
270       }
271     }
272   }
273   PetscFunctionReturn(0);
274 }
275 
276 #undef __FUNCT__
277 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
278 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
279 {
280   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
281   const PetscScalar *value;
282   MatScalar         *barray     = baij->barray;
283   PetscBool         roworiented = baij->roworiented;
284   PetscErrorCode    ierr;
285   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
286   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
287   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
288 
289   PetscFunctionBegin;
290   if (!barray) {
291     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
292     baij->barray = barray;
293   }
294 
295   if (roworiented) stepval = (n-1)*bs;
296   else stepval = (m-1)*bs;
297 
298   for (i=0; i<m; i++) {
299     if (im[i] < 0) continue;
300 #if defined(PETSC_USE_DEBUG)
301     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
302 #endif
303     if (im[i] >= rstart && im[i] < rend) {
304       row = im[i] - rstart;
305       for (j=0; j<n; j++) {
306         /* If NumCol = 1 then a copy is not required */
307         if ((roworiented) && (n == 1)) {
308           barray = (MatScalar*)v + i*bs2;
309         } else if ((!roworiented) && (m == 1)) {
310           barray = (MatScalar*)v + j*bs2;
311         } else { /* Here a copy is required */
312           if (roworiented) {
313             value = v + (i*(stepval+bs) + j)*bs;
314           } else {
315             value = v + (j*(stepval+bs) + i)*bs;
316           }
317           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
318             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
319             barray += bs;
320           }
321           barray -= bs2;
322         }
323 
324         if (in[j] >= cstart && in[j] < cend) {
325           col  = in[j] - cstart;
326           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
327         } else if (in[j] < 0) continue;
328 #if defined(PETSC_USE_DEBUG)
329         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
330 #endif
331         else {
332           if (mat->was_assembled) {
333             if (!baij->colmap) {
334               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
335             }
336 
337 #if defined(PETSC_USE_DEBUG)
338 #if defined(PETSC_USE_CTABLE)
339             { PetscInt data;
340               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
341               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
342             }
343 #else
344             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
345 #endif
346 #endif
347 #if defined(PETSC_USE_CTABLE)
348             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
349             col  = (col - 1)/bs;
350 #else
351             col = (baij->colmap[in[j]] - 1)/bs;
352 #endif
353             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
354               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
355               col  =  in[j];
356             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]);
357           } else col = in[j];
358           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
359         }
360       }
361     } else {
362       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
363       if (!baij->donotstash) {
364         if (roworiented) {
365           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
366         } else {
367           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
368         }
369       }
370     }
371   }
372   PetscFunctionReturn(0);
373 }
374 
375 #define HASH_KEY 0.6180339887
376 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
377 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
378 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
379 #undef __FUNCT__
380 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
381 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
382 {
383   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
384   PetscBool      roworiented = baij->roworiented;
385   PetscErrorCode ierr;
386   PetscInt       i,j,row,col;
387   PetscInt       rstart_orig=mat->rmap->rstart;
388   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
389   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
390   PetscReal      tmp;
391   MatScalar      **HD = baij->hd,value;
392 #if defined(PETSC_USE_DEBUG)
393   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
394 #endif
395 
396   PetscFunctionBegin;
397   if (v) PetscValidScalarPointer(v,6);
398   for (i=0; i<m; i++) {
399 #if defined(PETSC_USE_DEBUG)
400     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
401     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
402 #endif
403     row = im[i];
404     if (row >= rstart_orig && row < rend_orig) {
405       for (j=0; j<n; j++) {
406         col = in[j];
407         if (roworiented) value = v[i*n+j];
408         else             value = v[i+j*m];
409         /* Look up PetscInto the Hash Table */
410         key = (row/bs)*Nbs+(col/bs)+1;
411         h1  = HASH(size,key,tmp);
412 
413 
414         idx = h1;
415 #if defined(PETSC_USE_DEBUG)
416         insert_ct++;
417         total_ct++;
418         if (HT[idx] != key) {
419           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
420           if (idx == size) {
421             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
422             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
423           }
424         }
425 #else
426         if (HT[idx] != key) {
427           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
428           if (idx == size) {
429             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
430             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
431           }
432         }
433 #endif
434         /* A HASH table entry is found, so insert the values at the correct address */
435         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
436         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
437       }
438     } else if (!baij->donotstash) {
439       if (roworiented) {
440         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
441       } else {
442         ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
443       }
444     }
445   }
446 #if defined(PETSC_USE_DEBUG)
447   baij->ht_total_ct  = total_ct;
448   baij->ht_insert_ct = insert_ct;
449 #endif
450   PetscFunctionReturn(0);
451 }
452 
453 #undef __FUNCT__
454 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
455 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
456 {
457   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
458   PetscBool         roworiented = baij->roworiented;
459   PetscErrorCode    ierr;
460   PetscInt          i,j,ii,jj,row,col;
461   PetscInt          rstart=baij->rstartbs;
462   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
463   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
464   PetscReal         tmp;
465   MatScalar         **HD = baij->hd,*baij_a;
466   const PetscScalar *v_t,*value;
467 #if defined(PETSC_USE_DEBUG)
468   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
469 #endif
470 
471   PetscFunctionBegin;
472   if (roworiented) stepval = (n-1)*bs;
473   else stepval = (m-1)*bs;
474 
475   for (i=0; i<m; i++) {
476 #if defined(PETSC_USE_DEBUG)
477     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
478     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
479 #endif
480     row = im[i];
481     v_t = v + i*nbs2;
482     if (row >= rstart && row < rend) {
483       for (j=0; j<n; j++) {
484         col = in[j];
485 
486         /* Look up into the Hash Table */
487         key = row*Nbs+col+1;
488         h1  = HASH(size,key,tmp);
489 
490         idx = h1;
491 #if defined(PETSC_USE_DEBUG)
492         total_ct++;
493         insert_ct++;
494         if (HT[idx] != key) {
495           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
496           if (idx == size) {
497             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
498             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
499           }
500         }
501 #else
502         if (HT[idx] != key) {
503           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
504           if (idx == size) {
505             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
506             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
507           }
508         }
509 #endif
510         baij_a = HD[idx];
511         if (roworiented) {
512           /*value = v + i*(stepval+bs)*bs + j*bs;*/
513           /* value = v + (i*(stepval+bs)+j)*bs; */
514           value = v_t;
515           v_t  += bs;
516           if (addv == ADD_VALUES) {
517             for (ii=0; ii<bs; ii++,value+=stepval) {
518               for (jj=ii; jj<bs2; jj+=bs) {
519                 baij_a[jj] += *value++;
520               }
521             }
522           } else {
523             for (ii=0; ii<bs; ii++,value+=stepval) {
524               for (jj=ii; jj<bs2; jj+=bs) {
525                 baij_a[jj] = *value++;
526               }
527             }
528           }
529         } else {
530           value = v + j*(stepval+bs)*bs + i*bs;
531           if (addv == ADD_VALUES) {
532             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
533               for (jj=0; jj<bs; jj++) {
534                 baij_a[jj] += *value++;
535               }
536             }
537           } else {
538             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
539               for (jj=0; jj<bs; jj++) {
540                 baij_a[jj] = *value++;
541               }
542             }
543           }
544         }
545       }
546     } else {
547       if (!baij->donotstash) {
548         if (roworiented) {
549           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
550         } else {
551           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
552         }
553       }
554     }
555   }
556 #if defined(PETSC_USE_DEBUG)
557   baij->ht_total_ct  = total_ct;
558   baij->ht_insert_ct = insert_ct;
559 #endif
560   PetscFunctionReturn(0);
561 }
562 
563 #undef __FUNCT__
564 #define __FUNCT__ "MatGetValues_MPIBAIJ"
565 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
566 {
567   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
568   PetscErrorCode ierr;
569   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
570   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
571 
572   PetscFunctionBegin;
573   for (i=0; i<m; i++) {
574     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
575     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
576     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
577       row = idxm[i] - bsrstart;
578       for (j=0; j<n; j++) {
579         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
580         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
581         if (idxn[j] >= bscstart && idxn[j] < bscend) {
582           col  = idxn[j] - bscstart;
583           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
584         } else {
585           if (!baij->colmap) {
586             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
587           }
588 #if defined(PETSC_USE_CTABLE)
589           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
590           data--;
591 #else
592           data = baij->colmap[idxn[j]/bs]-1;
593 #endif
594           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
595           else {
596             col  = data + idxn[j]%bs;
597             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
598           }
599         }
600       }
601     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
602   }
603   PetscFunctionReturn(0);
604 }
605 
606 #undef __FUNCT__
607 #define __FUNCT__ "MatNorm_MPIBAIJ"
608 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
609 {
610   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
611   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
612   PetscErrorCode ierr;
613   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
614   PetscReal      sum = 0.0;
615   MatScalar      *v;
616 
617   PetscFunctionBegin;
618   if (baij->size == 1) {
619     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
620   } else {
621     if (type == NORM_FROBENIUS) {
622       v  = amat->a;
623       nz = amat->nz*bs2;
624       for (i=0; i<nz; i++) {
625         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
626       }
627       v  = bmat->a;
628       nz = bmat->nz*bs2;
629       for (i=0; i<nz; i++) {
630         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
631       }
632       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
633       *nrm = PetscSqrtReal(*nrm);
634     } else if (type == NORM_1) { /* max column sum */
635       PetscReal *tmp,*tmp2;
636       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
637       ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
638       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
639       v    = amat->a; jj = amat->j;
640       for (i=0; i<amat->nz; i++) {
641         for (j=0; j<bs; j++) {
642           col = bs*(cstart + *jj) + j; /* column index */
643           for (row=0; row<bs; row++) {
644             tmp[col] += PetscAbsScalar(*v);  v++;
645           }
646         }
647         jj++;
648       }
649       v = bmat->a; jj = bmat->j;
650       for (i=0; i<bmat->nz; i++) {
651         for (j=0; j<bs; j++) {
652           col = bs*garray[*jj] + j;
653           for (row=0; row<bs; row++) {
654             tmp[col] += PetscAbsScalar(*v); v++;
655           }
656         }
657         jj++;
658       }
659       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
660       *nrm = 0.0;
661       for (j=0; j<mat->cmap->N; j++) {
662         if (tmp2[j] > *nrm) *nrm = tmp2[j];
663       }
664       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
665     } else if (type == NORM_INFINITY) { /* max row sum */
666       PetscReal *sums;
667       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr);
668       sum  = 0.0;
669       for (j=0; j<amat->mbs; j++) {
670         for (row=0; row<bs; row++) sums[row] = 0.0;
671         v  = amat->a + bs2*amat->i[j];
672         nz = amat->i[j+1]-amat->i[j];
673         for (i=0; i<nz; i++) {
674           for (col=0; col<bs; col++) {
675             for (row=0; row<bs; row++) {
676               sums[row] += PetscAbsScalar(*v); v++;
677             }
678           }
679         }
680         v  = bmat->a + bs2*bmat->i[j];
681         nz = bmat->i[j+1]-bmat->i[j];
682         for (i=0; i<nz; i++) {
683           for (col=0; col<bs; col++) {
684             for (row=0; row<bs; row++) {
685               sums[row] += PetscAbsScalar(*v); v++;
686             }
687           }
688         }
689         for (row=0; row<bs; row++) {
690           if (sums[row] > sum) sum = sums[row];
691         }
692       }
693       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
694       ierr = PetscFree(sums);CHKERRQ(ierr);
695     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
696   }
697   PetscFunctionReturn(0);
698 }
699 
700 /*
701   Creates the hash table, and sets the table
702   This table is created only once.
703   If new entried need to be added to the matrix
704   then the hash table has to be destroyed and
705   recreated.
706 */
707 #undef __FUNCT__
708 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
709 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
710 {
711   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
712   Mat            A     = baij->A,B=baij->B;
713   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
714   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
715   PetscErrorCode ierr;
716   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
717   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
718   PetscInt       *HT,key;
719   MatScalar      **HD;
720   PetscReal      tmp;
721 #if defined(PETSC_USE_INFO)
722   PetscInt ct=0,max=0;
723 #endif
724 
725   PetscFunctionBegin;
726   if (baij->ht) PetscFunctionReturn(0);
727 
728   baij->ht_size = (PetscInt)(factor*nz);
729   ht_size       = baij->ht_size;
730 
731   /* Allocate Memory for Hash Table */
732   ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr);
733   ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr);
734   ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr);
735   HD   = baij->hd;
736   HT   = baij->ht;
737 
738   /* Loop Over A */
739   for (i=0; i<a->mbs; i++) {
740     for (j=ai[i]; j<ai[i+1]; j++) {
741       row = i+rstart;
742       col = aj[j]+cstart;
743 
744       key = row*Nbs + col + 1;
745       h1  = HASH(ht_size,key,tmp);
746       for (k=0; k<ht_size; k++) {
747         if (!HT[(h1+k)%ht_size]) {
748           HT[(h1+k)%ht_size] = key;
749           HD[(h1+k)%ht_size] = a->a + j*bs2;
750           break;
751 #if defined(PETSC_USE_INFO)
752         } else {
753           ct++;
754 #endif
755         }
756       }
757 #if defined(PETSC_USE_INFO)
758       if (k> max) max = k;
759 #endif
760     }
761   }
762   /* Loop Over B */
763   for (i=0; i<b->mbs; i++) {
764     for (j=bi[i]; j<bi[i+1]; j++) {
765       row = i+rstart;
766       col = garray[bj[j]];
767       key = row*Nbs + col + 1;
768       h1  = HASH(ht_size,key,tmp);
769       for (k=0; k<ht_size; k++) {
770         if (!HT[(h1+k)%ht_size]) {
771           HT[(h1+k)%ht_size] = key;
772           HD[(h1+k)%ht_size] = b->a + j*bs2;
773           break;
774 #if defined(PETSC_USE_INFO)
775         } else {
776           ct++;
777 #endif
778         }
779       }
780 #if defined(PETSC_USE_INFO)
781       if (k> max) max = k;
782 #endif
783     }
784   }
785 
786   /* Print Summary */
787 #if defined(PETSC_USE_INFO)
788   for (i=0,j=0; i<ht_size; i++) {
789     if (HT[i]) j++;
790   }
791   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
792 #endif
793   PetscFunctionReturn(0);
794 }
795 
796 #undef __FUNCT__
797 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
798 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
799 {
800   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
801   PetscErrorCode ierr;
802   PetscInt       nstash,reallocs;
803   InsertMode     addv;
804 
805   PetscFunctionBegin;
806   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
807 
808   /* make sure all processors are either in INSERTMODE or ADDMODE */
809   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
810   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
811   mat->insertmode = addv; /* in case this processor had no cache */
812 
813   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
814   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
815   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
816   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
817   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
818   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
819   PetscFunctionReturn(0);
820 }
821 
822 #undef __FUNCT__
823 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
824 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
825 {
826   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
827   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
828   PetscErrorCode ierr;
829   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
830   PetscInt       *row,*col;
831   PetscBool      r1,r2,r3,other_disassembled;
832   MatScalar      *val;
833   InsertMode     addv = mat->insertmode;
834   PetscMPIInt    n;
835 
836   PetscFunctionBegin;
837   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
838   if (!baij->donotstash && !mat->nooffprocentries) {
839     while (1) {
840       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
841       if (!flg) break;
842 
843       for (i=0; i<n;) {
844         /* Now identify the consecutive vals belonging to the same row */
845         for (j=i,rstart=row[j]; j<n; j++) {
846           if (row[j] != rstart) break;
847         }
848         if (j < n) ncols = j-i;
849         else       ncols = n-i;
850         /* Now assemble all these values with a single function call */
851         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
852         i    = j;
853       }
854     }
855     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
856     /* Now process the block-stash. Since the values are stashed column-oriented,
857        set the roworiented flag to column oriented, and after MatSetValues()
858        restore the original flags */
859     r1 = baij->roworiented;
860     r2 = a->roworiented;
861     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
862 
863     baij->roworiented = PETSC_FALSE;
864     a->roworiented    = PETSC_FALSE;
865 
866     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
867     while (1) {
868       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
869       if (!flg) break;
870 
871       for (i=0; i<n;) {
872         /* Now identify the consecutive vals belonging to the same row */
873         for (j=i,rstart=row[j]; j<n; j++) {
874           if (row[j] != rstart) break;
875         }
876         if (j < n) ncols = j-i;
877         else       ncols = n-i;
878         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
879         i    = j;
880       }
881     }
882     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
883 
884     baij->roworiented = r1;
885     a->roworiented    = r2;
886 
887     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
888   }
889 
890   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
891   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
892 
893   /* determine if any processor has disassembled, if so we must
894      also disassemble ourselfs, in order that we may reassemble. */
895   /*
896      if nonzero structure of submatrix B cannot change then we know that
897      no processor disassembled thus we can skip this stuff
898   */
899   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
900     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
901     if (mat->was_assembled && !other_disassembled) {
902       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
903     }
904   }
905 
906   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
907     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
908   }
909   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
910   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
911 
912 #if defined(PETSC_USE_INFO)
913   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
914     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
915 
916     baij->ht_total_ct  = 0;
917     baij->ht_insert_ct = 0;
918   }
919 #endif
920   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
921     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
922 
923     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
924     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
925   }
926 
927   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
928 
929   baij->rowvalues = 0;
930   PetscFunctionReturn(0);
931 }
932 
933 #include <petscdraw.h>
934 #undef __FUNCT__
935 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
936 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
937 {
938   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
939   PetscErrorCode    ierr;
940   PetscMPIInt       size = baij->size,rank = baij->rank;
941   PetscInt          bs   = mat->rmap->bs;
942   PetscBool         iascii,isdraw;
943   PetscViewer       sviewer;
944   PetscViewerFormat format;
945 
946   PetscFunctionBegin;
947   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
948   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
949   if (iascii) {
950     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
951     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
952       MatInfo info;
953       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
954       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
955       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
956       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
957                                                 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
958       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
959       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
960       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
961       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
962       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
963       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
964       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
965       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
966       PetscFunctionReturn(0);
967     } else if (format == PETSC_VIEWER_ASCII_INFO) {
968       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
969       PetscFunctionReturn(0);
970     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
971       PetscFunctionReturn(0);
972     }
973   }
974 
975   if (isdraw) {
976     PetscDraw draw;
977     PetscBool isnull;
978     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
979     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
980   }
981 
982   if (size == 1) {
983     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
984     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
985   } else {
986     /* assemble the entire matrix onto first processor. */
987     Mat         A;
988     Mat_SeqBAIJ *Aloc;
989     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
990     MatScalar   *a;
991 
992     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
993     /* Perhaps this should be the type of mat? */
994     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
995     if (!rank) {
996       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
997     } else {
998       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
999     }
1000     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1001     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
1002     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1003     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1004 
1005     /* copy over the A part */
1006     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1007     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1008     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1009 
1010     for (i=0; i<mbs; i++) {
1011       rvals[0] = bs*(baij->rstartbs + i);
1012       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1013       for (j=ai[i]; j<ai[i+1]; j++) {
1014         col = (baij->cstartbs+aj[j])*bs;
1015         for (k=0; k<bs; k++) {
1016           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1017           col++; a += bs;
1018         }
1019       }
1020     }
1021     /* copy over the B part */
1022     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1023     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1024     for (i=0; i<mbs; i++) {
1025       rvals[0] = bs*(baij->rstartbs + i);
1026       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1027       for (j=ai[i]; j<ai[i+1]; j++) {
1028         col = baij->garray[aj[j]]*bs;
1029         for (k=0; k<bs; k++) {
1030           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1031           col++; a += bs;
1032         }
1033       }
1034     }
1035     ierr = PetscFree(rvals);CHKERRQ(ierr);
1036     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1037     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1038     /*
1039        Everyone has to call to draw the matrix since the graphics waits are
1040        synchronized across all processors that share the PetscDraw object
1041     */
1042     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1043     if (!rank) {
1044       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1045       /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/
1046       PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ);
1047       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1048     }
1049     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1050     ierr = MatDestroy(&A);CHKERRQ(ierr);
1051   }
1052   PetscFunctionReturn(0);
1053 }
1054 
1055 #undef __FUNCT__
1056 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1057 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1058 {
1059   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1060   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)a->A->data;
1061   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
1062   PetscErrorCode ierr;
1063   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1064   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1065   int            fd;
1066   PetscScalar    *column_values;
1067   FILE           *file;
1068   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1069   PetscInt       message_count,flowcontrolcount;
1070 
1071   PetscFunctionBegin;
1072   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1073   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1074   nz   = bs2*(A->nz + B->nz);
1075   rlen = mat->rmap->n;
1076   if (!rank) {
1077     header[0] = MAT_FILE_CLASSID;
1078     header[1] = mat->rmap->N;
1079     header[2] = mat->cmap->N;
1080 
1081     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1082     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1083     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1084     /* get largest number of rows any processor has */
1085     range = mat->rmap->range;
1086     for (i=1; i<size; i++) {
1087       rlen = PetscMax(rlen,range[i+1] - range[i]);
1088     }
1089   } else {
1090     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1091   }
1092 
1093   ierr = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr);
1094   /* compute lengths of each row  */
1095   for (i=0; i<a->mbs; i++) {
1096     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1097   }
1098   /* store the row lengths to the file */
1099   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1100   if (!rank) {
1101     MPI_Status status;
1102     ierr = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr);
1103     rlen = (range[1] - range[0])/bs;
1104     for (i=0; i<rlen; i++) {
1105       for (j=0; j<bs; j++) {
1106         row_lens[i*bs+j] = bs*crow_lens[i];
1107       }
1108     }
1109     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1110     for (i=1; i<size; i++) {
1111       rlen = (range[i+1] - range[i])/bs;
1112       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1113       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1114       for (k=0; k<rlen; k++) {
1115         for (j=0; j<bs; j++) {
1116           row_lens[k*bs+j] = bs*crow_lens[k];
1117         }
1118       }
1119       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1120     }
1121     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1122     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1123   } else {
1124     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1125     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1126     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1127   }
1128   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1129 
1130   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1131      information needed to make it for each row from a block row. This does require more communication but still not more than
1132      the communication needed for the nonzero values  */
1133   nzmax = nz; /*  space a largest processor needs */
1134   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1135   ierr  = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1136   cnt   = 0;
1137   for (i=0; i<a->mbs; i++) {
1138     pcnt = cnt;
1139     for (j=B->i[i]; j<B->i[i+1]; j++) {
1140       if ((col = garray[B->j[j]]) > cstart) break;
1141       for (l=0; l<bs; l++) {
1142         column_indices[cnt++] = bs*col+l;
1143       }
1144     }
1145     for (k=A->i[i]; k<A->i[i+1]; k++) {
1146       for (l=0; l<bs; l++) {
1147         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1148       }
1149     }
1150     for (; j<B->i[i+1]; j++) {
1151       for (l=0; l<bs; l++) {
1152         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1153       }
1154     }
1155     len = cnt - pcnt;
1156     for (k=1; k<bs; k++) {
1157       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1158       cnt += len;
1159     }
1160   }
1161   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1162 
1163   /* store the columns to the file */
1164   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1165   if (!rank) {
1166     MPI_Status status;
1167     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1168     for (i=1; i<size; i++) {
1169       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1170       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1171       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1172       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1173     }
1174     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1175   } else {
1176     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1177     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1178     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1179     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1180   }
1181   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1182 
1183   /* load up the numerical values */
1184   ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1185   cnt  = 0;
1186   for (i=0; i<a->mbs; i++) {
1187     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1188     for (j=B->i[i]; j<B->i[i+1]; j++) {
1189       if (garray[B->j[j]] > cstart) break;
1190       for (l=0; l<bs; l++) {
1191         for (ll=0; ll<bs; ll++) {
1192           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1193         }
1194       }
1195       cnt += bs;
1196     }
1197     for (k=A->i[i]; k<A->i[i+1]; k++) {
1198       for (l=0; l<bs; l++) {
1199         for (ll=0; ll<bs; ll++) {
1200           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1201         }
1202       }
1203       cnt += bs;
1204     }
1205     for (; j<B->i[i+1]; j++) {
1206       for (l=0; l<bs; l++) {
1207         for (ll=0; ll<bs; ll++) {
1208           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1209         }
1210       }
1211       cnt += bs;
1212     }
1213     cnt += (bs-1)*rlen;
1214   }
1215   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1216 
1217   /* store the column values to the file */
1218   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1219   if (!rank) {
1220     MPI_Status status;
1221     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1222     for (i=1; i<size; i++) {
1223       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1224       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1225       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1226       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1227     }
1228     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1229   } else {
1230     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1231     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1232     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1233     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1234   }
1235   ierr = PetscFree(column_values);CHKERRQ(ierr);
1236 
1237   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1238   if (file) {
1239     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1240   }
1241   PetscFunctionReturn(0);
1242 }
1243 
1244 #undef __FUNCT__
1245 #define __FUNCT__ "MatView_MPIBAIJ"
1246 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1247 {
1248   PetscErrorCode ierr;
1249   PetscBool      iascii,isdraw,issocket,isbinary;
1250 
1251   PetscFunctionBegin;
1252   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1253   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1254   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1255   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1256   if (iascii || isdraw || issocket) {
1257     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1258   } else if (isbinary) {
1259     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1260   }
1261   PetscFunctionReturn(0);
1262 }
1263 
1264 #undef __FUNCT__
1265 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1266 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1267 {
1268   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1269   PetscErrorCode ierr;
1270 
1271   PetscFunctionBegin;
1272 #if defined(PETSC_USE_LOG)
1273   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1274 #endif
1275   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1276   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1277   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1278   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1279 #if defined(PETSC_USE_CTABLE)
1280   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1281 #else
1282   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1283 #endif
1284   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1285   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1286   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1287   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1288   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1289   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1290   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1291   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1292 
1293   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1294   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1295   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1296   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr);
1303   PetscFunctionReturn(0);
1304 }
1305 
1306 #undef __FUNCT__
1307 #define __FUNCT__ "MatMult_MPIBAIJ"
1308 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1309 {
1310   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1311   PetscErrorCode ierr;
1312   PetscInt       nt;
1313 
1314   PetscFunctionBegin;
1315   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1316   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1317   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1318   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1319   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1320   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1321   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1322   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1323   PetscFunctionReturn(0);
1324 }
1325 
1326 #undef __FUNCT__
1327 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1328 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1329 {
1330   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1331   PetscErrorCode ierr;
1332 
1333   PetscFunctionBegin;
1334   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1335   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1336   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1337   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1338   PetscFunctionReturn(0);
1339 }
1340 
1341 #undef __FUNCT__
1342 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1343 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1344 {
1345   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1346   PetscErrorCode ierr;
1347   PetscBool      merged;
1348 
1349   PetscFunctionBegin;
1350   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1351   /* do nondiagonal part */
1352   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1353   if (!merged) {
1354     /* send it on its way */
1355     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1356     /* do local part */
1357     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1358     /* receive remote parts: note this assumes the values are not actually */
1359     /* inserted in yy until the next line */
1360     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1361   } else {
1362     /* do local part */
1363     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1364     /* send it on its way */
1365     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1366     /* values actually were received in the Begin() but we need to call this nop */
1367     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1368   }
1369   PetscFunctionReturn(0);
1370 }
1371 
1372 #undef __FUNCT__
1373 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1374 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1375 {
1376   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1377   PetscErrorCode ierr;
1378 
1379   PetscFunctionBegin;
1380   /* do nondiagonal part */
1381   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1382   /* send it on its way */
1383   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1384   /* do local part */
1385   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1386   /* receive remote parts: note this assumes the values are not actually */
1387   /* inserted in yy until the next line, which is true for my implementation*/
1388   /* but is not perhaps always true. */
1389   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1390   PetscFunctionReturn(0);
1391 }
1392 
1393 /*
1394   This only works correctly for square matrices where the subblock A->A is the
1395    diagonal block
1396 */
1397 #undef __FUNCT__
1398 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1399 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1400 {
1401   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1402   PetscErrorCode ierr;
1403 
1404   PetscFunctionBegin;
1405   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1406   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1407   PetscFunctionReturn(0);
1408 }
1409 
1410 #undef __FUNCT__
1411 #define __FUNCT__ "MatScale_MPIBAIJ"
1412 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1413 {
1414   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1415   PetscErrorCode ierr;
1416 
1417   PetscFunctionBegin;
1418   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1419   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1420   PetscFunctionReturn(0);
1421 }
1422 
1423 #undef __FUNCT__
1424 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1425 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1426 {
1427   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1428   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1429   PetscErrorCode ierr;
1430   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1431   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1432   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1433 
1434   PetscFunctionBegin;
1435   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1436   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1437   mat->getrowactive = PETSC_TRUE;
1438 
1439   if (!mat->rowvalues && (idx || v)) {
1440     /*
1441         allocate enough space to hold information from the longest row.
1442     */
1443     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1444     PetscInt    max = 1,mbs = mat->mbs,tmp;
1445     for (i=0; i<mbs; i++) {
1446       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1447       if (max < tmp) max = tmp;
1448     }
1449     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1450   }
1451   lrow = row - brstart;
1452 
1453   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1454   if (!v)   {pvA = 0; pvB = 0;}
1455   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1456   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1457   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1458   nztot = nzA + nzB;
1459 
1460   cmap = mat->garray;
1461   if (v  || idx) {
1462     if (nztot) {
1463       /* Sort by increasing column numbers, assuming A and B already sorted */
1464       PetscInt imark = -1;
1465       if (v) {
1466         *v = v_p = mat->rowvalues;
1467         for (i=0; i<nzB; i++) {
1468           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1469           else break;
1470         }
1471         imark = i;
1472         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1473         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1474       }
1475       if (idx) {
1476         *idx = idx_p = mat->rowindices;
1477         if (imark > -1) {
1478           for (i=0; i<imark; i++) {
1479             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1480           }
1481         } else {
1482           for (i=0; i<nzB; i++) {
1483             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1484             else break;
1485           }
1486           imark = i;
1487         }
1488         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1489         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1490       }
1491     } else {
1492       if (idx) *idx = 0;
1493       if (v)   *v   = 0;
1494     }
1495   }
1496   *nz  = nztot;
1497   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1498   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1499   PetscFunctionReturn(0);
1500 }
1501 
1502 #undef __FUNCT__
1503 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1504 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1505 {
1506   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1507 
1508   PetscFunctionBegin;
1509   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1510   baij->getrowactive = PETSC_FALSE;
1511   PetscFunctionReturn(0);
1512 }
1513 
1514 #undef __FUNCT__
1515 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1516 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1517 {
1518   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1519   PetscErrorCode ierr;
1520 
1521   PetscFunctionBegin;
1522   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1523   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1524   PetscFunctionReturn(0);
1525 }
1526 
1527 #undef __FUNCT__
1528 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1529 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1530 {
1531   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1532   Mat            A  = a->A,B = a->B;
1533   PetscErrorCode ierr;
1534   PetscReal      isend[5],irecv[5];
1535 
1536   PetscFunctionBegin;
1537   info->block_size = (PetscReal)matin->rmap->bs;
1538 
1539   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1540 
1541   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1542   isend[3] = info->memory;  isend[4] = info->mallocs;
1543 
1544   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1545 
1546   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1547   isend[3] += info->memory;  isend[4] += info->mallocs;
1548 
1549   if (flag == MAT_LOCAL) {
1550     info->nz_used      = isend[0];
1551     info->nz_allocated = isend[1];
1552     info->nz_unneeded  = isend[2];
1553     info->memory       = isend[3];
1554     info->mallocs      = isend[4];
1555   } else if (flag == MAT_GLOBAL_MAX) {
1556     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1557 
1558     info->nz_used      = irecv[0];
1559     info->nz_allocated = irecv[1];
1560     info->nz_unneeded  = irecv[2];
1561     info->memory       = irecv[3];
1562     info->mallocs      = irecv[4];
1563   } else if (flag == MAT_GLOBAL_SUM) {
1564     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1565 
1566     info->nz_used      = irecv[0];
1567     info->nz_allocated = irecv[1];
1568     info->nz_unneeded  = irecv[2];
1569     info->memory       = irecv[3];
1570     info->mallocs      = irecv[4];
1571   } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1572   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1573   info->fill_ratio_needed = 0;
1574   info->factor_mallocs    = 0;
1575   PetscFunctionReturn(0);
1576 }
1577 
1578 #undef __FUNCT__
1579 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1580 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1581 {
1582   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1583   PetscErrorCode ierr;
1584 
1585   PetscFunctionBegin;
1586   switch (op) {
1587   case MAT_NEW_NONZERO_LOCATIONS:
1588   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1589   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1590   case MAT_KEEP_NONZERO_PATTERN:
1591   case MAT_NEW_NONZERO_LOCATION_ERR:
1592     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1593     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1594     break;
1595   case MAT_ROW_ORIENTED:
1596     a->roworiented = flg;
1597 
1598     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1599     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1600     break;
1601   case MAT_NEW_DIAGONALS:
1602     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1603     break;
1604   case MAT_IGNORE_OFF_PROC_ENTRIES:
1605     a->donotstash = flg;
1606     break;
1607   case MAT_USE_HASH_TABLE:
1608     a->ht_flag = flg;
1609     break;
1610   case MAT_SYMMETRIC:
1611   case MAT_STRUCTURALLY_SYMMETRIC:
1612   case MAT_HERMITIAN:
1613   case MAT_SYMMETRY_ETERNAL:
1614     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1615     break;
1616   default:
1617     SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1618   }
1619   PetscFunctionReturn(0);
1620 }
1621 
1622 #undef __FUNCT__
1623 #define __FUNCT__ "MatTranspose_MPIBAIJ"
1624 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1625 {
1626   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1627   Mat_SeqBAIJ    *Aloc;
1628   Mat            B;
1629   PetscErrorCode ierr;
1630   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1631   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1632   MatScalar      *a;
1633 
1634   PetscFunctionBegin;
1635   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1636   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1637     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1638     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1639     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1640     /* Do not know preallocation information, but must set block size */
1641     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr);
1642   } else {
1643     B = *matout;
1644   }
1645 
1646   /* copy over the A part */
1647   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1648   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1649   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1650 
1651   for (i=0; i<mbs; i++) {
1652     rvals[0] = bs*(baij->rstartbs + i);
1653     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1654     for (j=ai[i]; j<ai[i+1]; j++) {
1655       col = (baij->cstartbs+aj[j])*bs;
1656       for (k=0; k<bs; k++) {
1657         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1658 
1659         col++; a += bs;
1660       }
1661     }
1662   }
1663   /* copy over the B part */
1664   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1665   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1666   for (i=0; i<mbs; i++) {
1667     rvals[0] = bs*(baij->rstartbs + i);
1668     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1669     for (j=ai[i]; j<ai[i+1]; j++) {
1670       col = baij->garray[aj[j]]*bs;
1671       for (k=0; k<bs; k++) {
1672         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1673         col++;
1674         a += bs;
1675       }
1676     }
1677   }
1678   ierr = PetscFree(rvals);CHKERRQ(ierr);
1679   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1680   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1681 
1682   if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B;
1683   else {
1684     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1685   }
1686   PetscFunctionReturn(0);
1687 }
1688 
1689 #undef __FUNCT__
1690 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1691 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1692 {
1693   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1694   Mat            a     = baij->A,b = baij->B;
1695   PetscErrorCode ierr;
1696   PetscInt       s1,s2,s3;
1697 
1698   PetscFunctionBegin;
1699   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1700   if (rr) {
1701     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1702     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1703     /* Overlap communication with computation. */
1704     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1705   }
1706   if (ll) {
1707     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1708     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1709     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1710   }
1711   /* scale  the diagonal block */
1712   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1713 
1714   if (rr) {
1715     /* Do a scatter end and then right scale the off-diagonal block */
1716     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1717     ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1718   }
1719   PetscFunctionReturn(0);
1720 }
1721 
1722 #undef __FUNCT__
1723 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1724 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1725 {
1726   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1727   PetscErrorCode    ierr;
1728   PetscMPIInt       imdex,size = l->size,n,rank = l->rank;
1729   PetscInt          i,*owners = A->rmap->range;
1730   PetscInt          *nprocs,j,idx,nsends,row;
1731   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
1732   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1733   PetscInt          *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1734   MPI_Comm          comm;
1735   MPI_Request       *send_waits,*recv_waits;
1736   MPI_Status        recv_status,*send_status;
1737   const PetscScalar *xx;
1738   PetscScalar       *bb;
1739 #if defined(PETSC_DEBUG)
1740   PetscBool         found = PETSC_FALSE;
1741 #endif
1742 
1743   PetscFunctionBegin;
1744   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1745   /*  first count number of contributors to each processor */
1746   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1747   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1748   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr);  /* see note*/
1749   j    = 0;
1750   for (i=0; i<N; i++) {
1751     if (lastidx > (idx = rows[i])) j = 0;
1752     lastidx = idx;
1753     for (; j<size; j++) {
1754       if (idx >= owners[j] && idx < owners[j+1]) {
1755         nprocs[2*j]++;
1756         nprocs[2*j+1] = 1;
1757         owner[i]      = j;
1758 #if defined(PETSC_DEBUG)
1759         found = PETSC_TRUE;
1760 #endif
1761         break;
1762       }
1763     }
1764 #if defined(PETSC_DEBUG)
1765     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1766     found = PETSC_FALSE;
1767 #endif
1768   }
1769   nsends = 0;  for (i=0; i<size; i++) nsends += nprocs[2*i+1];
1770 
1771   if (A->nooffproczerorows) {
1772     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
1773     nrecvs = nsends;
1774     nmax   = N;
1775   } else {
1776     /* inform other processors of number of messages and max length*/
1777     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1778   }
1779 
1780   /* post receives:   */
1781   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1782   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1783   for (i=0; i<nrecvs; i++) {
1784     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1785   }
1786 
1787   /* do sends:
1788      1) starts[i] gives the starting index in svalues for stuff going to
1789      the ith processor
1790   */
1791   ierr      = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1792   ierr      = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1793   ierr      = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1794   starts[0] = 0;
1795   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
1796   for (i=0; i<N; i++) {
1797     svalues[starts[owner[i]]++] = rows[i];
1798   }
1799 
1800   starts[0] = 0;
1801   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
1802   count = 0;
1803   for (i=0; i<size; i++) {
1804     if (nprocs[2*i+1]) {
1805       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1806     }
1807   }
1808   ierr = PetscFree(starts);CHKERRQ(ierr);
1809 
1810   base = owners[rank];
1811 
1812   /*  wait on receives */
1813   ierr  = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr);
1814   count = nrecvs;
1815   slen  = 0;
1816   while (count) {
1817     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1818     /* unpack receives into our local space */
1819     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1820 
1821     source[imdex] = recv_status.MPI_SOURCE;
1822     lens[imdex]   = n;
1823     slen         += n;
1824     count--;
1825   }
1826   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1827 
1828   /* move the data into the send scatter */
1829   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1830   count = 0;
1831   for (i=0; i<nrecvs; i++) {
1832     values = rvalues + i*nmax;
1833     for (j=0; j<lens[i]; j++) {
1834       lrows[count++] = values[j] - base;
1835     }
1836   }
1837   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1838   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1839   ierr = PetscFree(owner);CHKERRQ(ierr);
1840   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1841 
1842   /* fix right hand side if needed */
1843   if (x && b) {
1844     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1845     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1846     for (i=0; i<slen; i++) {
1847       bb[lrows[i]] = diag*xx[lrows[i]];
1848     }
1849     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1850     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1851   }
1852 
1853   /* actually zap the local rows */
1854   /*
1855         Zero the required rows. If the "diagonal block" of the matrix
1856      is square and the user wishes to set the diagonal we use separate
1857      code so that MatSetValues() is not called for each diagonal allocating
1858      new memory, thus calling lots of mallocs and slowing things down.
1859 
1860   */
1861   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1862   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1863   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1864     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
1865   } else if (diag != 0.0) {
1866     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1867     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1868        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1869     for (i=0; i<slen; i++) {
1870       row  = lrows[i] + rstart_bs;
1871       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1872     }
1873     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1874     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1875   } else {
1876     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1877   }
1878 
1879   ierr = PetscFree(lrows);CHKERRQ(ierr);
1880 
1881   /* wait on sends */
1882   if (nsends) {
1883     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1884     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1885     ierr = PetscFree(send_status);CHKERRQ(ierr);
1886   }
1887   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1888   ierr = PetscFree(svalues);CHKERRQ(ierr);
1889   PetscFunctionReturn(0);
1890 }
1891 
1892 #undef __FUNCT__
1893 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1894 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1895 {
1896   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1897   PetscErrorCode ierr;
1898 
1899   PetscFunctionBegin;
1900   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1901   PetscFunctionReturn(0);
1902 }
1903 
1904 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
1905 
1906 #undef __FUNCT__
1907 #define __FUNCT__ "MatEqual_MPIBAIJ"
1908 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1909 {
1910   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1911   Mat            a,b,c,d;
1912   PetscBool      flg;
1913   PetscErrorCode ierr;
1914 
1915   PetscFunctionBegin;
1916   a = matA->A; b = matA->B;
1917   c = matB->A; d = matB->B;
1918 
1919   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1920   if (flg) {
1921     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1922   }
1923   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1924   PetscFunctionReturn(0);
1925 }
1926 
1927 #undef __FUNCT__
1928 #define __FUNCT__ "MatCopy_MPIBAIJ"
1929 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1930 {
1931   PetscErrorCode ierr;
1932   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1933   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
1934 
1935   PetscFunctionBegin;
1936   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1937   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1938     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1939   } else {
1940     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1941     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1942   }
1943   PetscFunctionReturn(0);
1944 }
1945 
1946 #undef __FUNCT__
1947 #define __FUNCT__ "MatSetUp_MPIBAIJ"
1948 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1949 {
1950   PetscErrorCode ierr;
1951 
1952   PetscFunctionBegin;
1953   ierr =  MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1954   PetscFunctionReturn(0);
1955 }
1956 
1957 #undef __FUNCT__
1958 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1959 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1960 {
1961   PetscErrorCode ierr;
1962   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
1963   PetscBLASInt   bnz,one=1;
1964   Mat_SeqBAIJ    *x,*y;
1965 
1966   PetscFunctionBegin;
1967   if (str == SAME_NONZERO_PATTERN) {
1968     PetscScalar alpha = a;
1969     x    = (Mat_SeqBAIJ*)xx->A->data;
1970     y    = (Mat_SeqBAIJ*)yy->A->data;
1971     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
1972     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1973     x    = (Mat_SeqBAIJ*)xx->B->data;
1974     y    = (Mat_SeqBAIJ*)yy->B->data;
1975     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
1976     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1977   } else {
1978     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1979   }
1980   PetscFunctionReturn(0);
1981 }
1982 
1983 #undef __FUNCT__
1984 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1985 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1986 {
1987   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1988   PetscErrorCode ierr;
1989 
1990   PetscFunctionBegin;
1991   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1992   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1993   PetscFunctionReturn(0);
1994 }
1995 
1996 #undef __FUNCT__
1997 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
1998 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1999 {
2000   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2001   PetscErrorCode ierr;
2002 
2003   PetscFunctionBegin;
2004   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2005   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2006   PetscFunctionReturn(0);
2007 }
2008 
2009 #undef __FUNCT__
2010 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
2011 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2012 {
2013   PetscErrorCode ierr;
2014   IS             iscol_local;
2015   PetscInt       csize;
2016 
2017   PetscFunctionBegin;
2018   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2019   if (call == MAT_REUSE_MATRIX) {
2020     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2021     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2022   } else {
2023     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2024   }
2025   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2026   if (call == MAT_INITIAL_MATRIX) {
2027     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2028     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2029   }
2030   PetscFunctionReturn(0);
2031 }
2032 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*);
2033 #undef __FUNCT__
2034 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
2035 /*
2036   Not great since it makes two copies of the submatrix, first an SeqBAIJ
2037   in local and then by concatenating the local matrices the end result.
2038   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
2039 */
2040 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2041 {
2042   PetscErrorCode ierr;
2043   PetscMPIInt    rank,size;
2044   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2045   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow;
2046   Mat            M,Mreuse;
2047   MatScalar      *vwork,*aa;
2048   MPI_Comm       comm;
2049   IS             isrow_new, iscol_new;
2050   PetscBool      idflag,allrows, allcols;
2051   Mat_SeqBAIJ    *aij;
2052 
2053   PetscFunctionBegin;
2054   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2055   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2056   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2057   /* The compression and expansion should be avoided. Doesn't point
2058      out errors, might change the indices, hence buggey */
2059   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
2060   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
2061 
2062   /* Check for special case: each processor gets entire matrix columns */
2063   ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr);
2064   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
2065   if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE;
2066   else allcols = PETSC_FALSE;
2067 
2068   ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr);
2069   ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr);
2070   if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE;
2071   else allrows = PETSC_FALSE;
2072 
2073   if (call ==  MAT_REUSE_MATRIX) {
2074     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
2075     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2076     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2077   } else {
2078     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2079   }
2080   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
2081   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
2082   /*
2083       m - number of local rows
2084       n - number of columns (same on all processors)
2085       rstart - first row in new global matrix generated
2086   */
2087   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2088   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2089   m    = m/bs;
2090   n    = n/bs;
2091 
2092   if (call == MAT_INITIAL_MATRIX) {
2093     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2094     ii  = aij->i;
2095     jj  = aij->j;
2096 
2097     /*
2098         Determine the number of non-zeros in the diagonal and off-diagonal
2099         portions of the matrix in order to do correct preallocation
2100     */
2101 
2102     /* first get start and end of "diagonal" columns */
2103     if (csize == PETSC_DECIDE) {
2104       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2105       if (mglobal == n*bs) { /* square matrix */
2106         nlocal = m;
2107       } else {
2108         nlocal = n/size + ((n % size) > rank);
2109       }
2110     } else {
2111       nlocal = csize/bs;
2112     }
2113     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2114     rstart = rend - nlocal;
2115     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2116 
2117     /* next, compute all the lengths */
2118     ierr  = PetscMalloc2(m+1,PetscInt,&dlens,m+1,PetscInt,&olens);CHKERRQ(ierr);
2119     for (i=0; i<m; i++) {
2120       jend = ii[i+1] - ii[i];
2121       olen = 0;
2122       dlen = 0;
2123       for (j=0; j<jend; j++) {
2124         if (*jj < rstart || *jj >= rend) olen++;
2125         else dlen++;
2126         jj++;
2127       }
2128       olens[i] = olen;
2129       dlens[i] = dlen;
2130     }
2131     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2132     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2133     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2134     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2135     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
2136   } else {
2137     PetscInt ml,nl;
2138 
2139     M    = *newmat;
2140     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2141     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2142     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2143     /*
2144          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2145        rather than the slower MatSetValues().
2146     */
2147     M->was_assembled = PETSC_TRUE;
2148     M->assembled     = PETSC_FALSE;
2149   }
2150   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2151   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2152   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
2153   ii   = aij->i;
2154   jj   = aij->j;
2155   aa   = aij->a;
2156   for (i=0; i<m; i++) {
2157     row   = rstart/bs + i;
2158     nz    = ii[i+1] - ii[i];
2159     cwork = jj;     jj += nz;
2160     vwork = aa;     aa += nz*bs*bs;
2161     ierr  = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2162   }
2163 
2164   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2165   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2166   *newmat = M;
2167 
2168   /* save submatrix used in processor for next request */
2169   if (call ==  MAT_INITIAL_MATRIX) {
2170     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2171     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2172   }
2173   PetscFunctionReturn(0);
2174 }
2175 
2176 #undef __FUNCT__
2177 #define __FUNCT__ "MatPermute_MPIBAIJ"
2178 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2179 {
2180   MPI_Comm       comm,pcomm;
2181   PetscInt       first,rlocal_size,clocal_size,nrows;
2182   const PetscInt *rows;
2183   PetscMPIInt    size;
2184   IS             crowp,growp,irowp,lrowp,lcolp;
2185   PetscErrorCode ierr;
2186 
2187   PetscFunctionBegin;
2188   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2189   /* make a collective version of 'rowp' */
2190   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2191   if (pcomm==comm) {
2192     crowp = rowp;
2193   } else {
2194     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2195     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2196     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2197     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2198   }
2199   /* collect the global row permutation and invert it */
2200   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
2201   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
2202   if (pcomm!=comm) {
2203     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2204   }
2205   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
2206   ierr = ISDestroy(&growp);CHKERRQ(ierr);
2207   /* get the local target indices */
2208   ierr = MatGetOwnershipRange(A,&first,NULL);CHKERRQ(ierr);
2209   ierr = MatGetLocalSize(A,&rlocal_size,&clocal_size);CHKERRQ(ierr);
2210   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
2211   ierr = ISCreateGeneral(MPI_COMM_SELF,rlocal_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
2212   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
2213   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
2214   /* the column permutation is so much easier;
2215      make a local version of 'colp' and invert it */
2216   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2217   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2218   if (size==1) {
2219     lcolp = colp;
2220   } else {
2221     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2222   }
2223   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2224   /* now we just get the submatrix */
2225   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2226   if (size>1) {
2227     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2228   }
2229   /* clean up */
2230   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
2231   PetscFunctionReturn(0);
2232 }
2233 
2234 #undef __FUNCT__
2235 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2236 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2237 {
2238   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2239   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
2240 
2241   PetscFunctionBegin;
2242   if (nghosts) *nghosts = B->nbs;
2243   if (ghosts) *ghosts = baij->garray;
2244   PetscFunctionReturn(0);
2245 }
2246 
2247 extern PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat);
2248 
2249 #undef __FUNCT__
2250 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ"
2251 /*
2252     This routine is almost identical to MatFDColoringCreate_MPIBAIJ()!
2253 */
2254 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
2255 {
2256   Mat_MPIBAIJ            *baij = (Mat_MPIBAIJ*)mat->data;
2257   PetscErrorCode         ierr;
2258   PetscMPIInt            size,*ncolsonproc,*disp,nn;
2259   PetscInt               bs,i,n,nrows,j,k,m,ncols,col;
2260   const PetscInt         *is,*rows = 0,*A_ci,*A_cj,*B_ci,*B_cj,*ltog;
2261   PetscInt               nis = iscoloring->n,nctot,*cols;
2262   PetscInt               *rowhit,M,cstart,cend,colb;
2263   PetscInt               *columnsforrow,l;
2264   IS                     *isa;
2265   PetscBool              done,flg;
2266   ISLocalToGlobalMapping map = mat->cmap->bmapping;
2267   PetscInt               ctype=c->ctype;
2268 
2269   PetscFunctionBegin;
2270   if (!mat->assembled) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();");
2271   if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock");
2272 
2273   if (map) {ierr = ISLocalToGlobalMappingGetIndices(map,&ltog);CHKERRQ(ierr);}
2274   else     ltog = NULL;
2275   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
2276   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2277 
2278   M         = mat->rmap->n/bs;
2279   cstart    = mat->cmap->rstart/bs;
2280   cend      = mat->cmap->rend/bs;
2281   c->M      = mat->rmap->N/bs;         /* set the global rows and columns and local rows */
2282   c->N      = mat->cmap->N/bs;
2283   c->m      = mat->rmap->n/bs;
2284   c->rstart = mat->rmap->rstart/bs;
2285 
2286   c->ncolors = nis;
2287   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
2288   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
2289   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
2290   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
2291   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
2292   ierr       = PetscLogObjectMemory((PetscObject)c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
2293 
2294   /* Allow access to data structures of local part of matrix */
2295   if (!baij->colmap) {
2296     ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
2297   }
2298   ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2299   ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2300 
2301   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
2302   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
2303 
2304   for (i=0; i<nis; i++) {
2305     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
2306     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
2307 
2308     c->ncolumns[i] = n;
2309     if (n) {
2310       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
2311       ierr = PetscLogObjectMemory((PetscObject)c,n*sizeof(PetscInt));CHKERRQ(ierr);
2312       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
2313     } else {
2314       c->columns[i] = 0;
2315     }
2316 
2317     if (ctype == IS_COLORING_GLOBAL) {
2318       /* Determine the total (parallel) number of columns of this color */
2319       ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
2320       ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr);
2321 
2322       ierr  = PetscMPIIntCast(n,&nn);CHKERRQ(ierr);
2323       ierr  = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2324       nctot = 0; for (j=0; j<size; j++) nctot += ncolsonproc[j];
2325       if (!nctot) {
2326         ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
2327       }
2328 
2329       disp[0] = 0;
2330       for (j=1; j<size; j++) {
2331         disp[j] = disp[j-1] + ncolsonproc[j-1];
2332       }
2333 
2334       /* Get complete list of columns for color on each processor */
2335       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2336       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2337       ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr);
2338     } else if (ctype == IS_COLORING_GHOSTED) {
2339       /* Determine local number of columns of this color on this process, including ghost points */
2340       nctot = n;
2341       ierr  = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2342       ierr  = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
2343     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
2344 
2345     /*
2346        Mark all rows affect by these columns
2347     */
2348     /* Temporary option to allow for debugging/testing */
2349     flg  = PETSC_FALSE;
2350     ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr);
2351     if (!flg) { /*-----------------------------------------------------------------------------*/
2352       /* crude, fast version */
2353       ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
2354       /* loop over columns*/
2355       for (j=0; j<nctot; j++) {
2356         if (ctype == IS_COLORING_GHOSTED) {
2357           col = ltog[cols[j]];
2358         } else {
2359           col = cols[j];
2360         }
2361         if (col >= cstart && col < cend) {
2362           /* column is in diagonal block of matrix */
2363           rows = A_cj + A_ci[col-cstart];
2364           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2365         } else {
2366 #if defined(PETSC_USE_CTABLE)
2367           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2368           colb--;
2369 #else
2370           colb = baij->colmap[col] - 1;
2371 #endif
2372           if (colb == -1) {
2373             m = 0;
2374           } else {
2375             colb = colb/bs;
2376             rows = B_cj + B_ci[colb];
2377             m    = B_ci[colb+1] - B_ci[colb];
2378           }
2379         }
2380         /* loop over columns marking them in rowhit */
2381         for (k=0; k<m; k++) {
2382           rowhit[*rows++] = col + 1;
2383         }
2384       }
2385 
2386       /* count the number of hits */
2387       nrows = 0;
2388       for (j=0; j<M; j++) {
2389         if (rowhit[j]) nrows++;
2390       }
2391       c->nrows[i] = nrows;
2392       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2393       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2394       ierr        = PetscLogObjectMemory((PetscObject)c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2395       nrows       = 0;
2396       for (j=0; j<M; j++) {
2397         if (rowhit[j]) {
2398           c->rows[i][nrows]          = j;
2399           c->columnsforrow[i][nrows] = rowhit[j] - 1;
2400           nrows++;
2401         }
2402       }
2403     } else { /*-------------------------------------------------------------------------------*/
2404       /* slow version, using rowhit as a linked list */
2405       PetscInt currentcol,fm,mfm;
2406       rowhit[M] = M;
2407       nrows     = 0;
2408       /* loop over columns*/
2409       for (j=0; j<nctot; j++) {
2410         if (ctype == IS_COLORING_GHOSTED) {
2411           col = ltog[cols[j]];
2412         } else {
2413           col = cols[j];
2414         }
2415         if (col >= cstart && col < cend) {
2416           /* column is in diagonal block of matrix */
2417           rows = A_cj + A_ci[col-cstart];
2418           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2419         } else {
2420 #if defined(PETSC_USE_CTABLE)
2421           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2422           colb--;
2423 #else
2424           colb = baij->colmap[col] - 1;
2425 #endif
2426           if (colb == -1) {
2427             m = 0;
2428           } else {
2429             colb = colb/bs;
2430             rows = B_cj + B_ci[colb];
2431             m    = B_ci[colb+1] - B_ci[colb];
2432           }
2433         }
2434 
2435         /* loop over columns marking them in rowhit */
2436         fm = M;    /* fm points to first entry in linked list */
2437         for (k=0; k<m; k++) {
2438           currentcol = *rows++;
2439           /* is it already in the list? */
2440           do {
2441             mfm = fm;
2442             fm  = rowhit[fm];
2443           } while (fm < currentcol);
2444           /* not in list so add it */
2445           if (fm != currentcol) {
2446             nrows++;
2447             columnsforrow[currentcol] = col;
2448             /* next three lines insert new entry into linked list */
2449             rowhit[mfm]        = currentcol;
2450             rowhit[currentcol] = fm;
2451             fm                 = currentcol;
2452             /* fm points to present position in list since we know the columns are sorted */
2453           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
2454         }
2455       }
2456       c->nrows[i] = nrows;
2457       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2458       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2459       ierr        = PetscLogObjectMemory((PetscObject)c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2460       /* now store the linked list of rows into c->rows[i] */
2461       nrows = 0;
2462       fm    = rowhit[M];
2463       do {
2464         c->rows[i][nrows]            = fm;
2465         c->columnsforrow[i][nrows++] = columnsforrow[fm];
2466         fm                           = rowhit[fm];
2467       } while (fm < M);
2468     } /* ---------------------------------------------------------------------------------------*/
2469     ierr = PetscFree(cols);CHKERRQ(ierr);
2470   }
2471 
2472   /* Optimize by adding the vscale, and scaleforrow[][] fields */
2473   /*
2474        vscale will contain the "diagonal" on processor scalings followed by the off processor
2475   */
2476   if (ctype == IS_COLORING_GLOBAL) {
2477     PetscInt *garray;
2478     ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2479     for (i=0; i<baij->B->cmap->n/bs; i++) {
2480       for (j=0; j<bs; j++) {
2481         garray[i*bs+j] = bs*baij->garray[i]+j;
2482       }
2483     }
2484     ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr);
2485     ierr = PetscFree(garray);CHKERRQ(ierr);
2486     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2487     for (k=0; k<c->ncolors; k++) {
2488       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2489       for (l=0; l<c->nrows[k]; l++) {
2490         col = c->columnsforrow[k][l];
2491         if (col >= cstart && col < cend) {
2492           /* column is in diagonal block of matrix */
2493           colb = col - cstart;
2494         } else {
2495           /* column  is in "off-processor" part */
2496 #if defined(PETSC_USE_CTABLE)
2497           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2498           colb--;
2499 #else
2500           colb = baij->colmap[col] - 1;
2501 #endif
2502           colb  = colb/bs;
2503           colb += cend - cstart;
2504         }
2505         c->vscaleforrow[k][l] = colb;
2506       }
2507     }
2508   } else if (ctype == IS_COLORING_GHOSTED) {
2509     /* Get gtol mapping */
2510     PetscInt N = mat->cmap->N,nlocal,*gtol;
2511     ierr = PetscMalloc((N+1)*sizeof(PetscInt),&gtol);CHKERRQ(ierr);
2512     for (i=0; i<N; i++) gtol[i] = -1;
2513     ierr = ISLocalToGlobalMappingGetSize(map,&nlocal);CHKERRQ(ierr);
2514     for (i=0; i<nlocal; i++) gtol[ltog[i]] = i;
2515 
2516     c->vscale = 0; /* will be created in MatFDColoringApply() */
2517     ierr      = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2518     for (k=0; k<c->ncolors; k++) {
2519       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2520       for (l=0; l<c->nrows[k]; l++) {
2521         col = c->columnsforrow[k][l];      /* global column index */
2522 
2523         c->vscaleforrow[k][l] = gtol[col]; /* local column index */
2524       }
2525     }
2526     ierr = PetscFree(gtol);CHKERRQ(ierr);
2527   }
2528   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
2529 
2530   ierr = PetscFree(rowhit);CHKERRQ(ierr);
2531   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
2532   ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2533   ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2534   if (map) {ierr = ISLocalToGlobalMappingRestoreIndices(map,&ltog);CHKERRQ(ierr);}
2535   PetscFunctionReturn(0);
2536 }
2537 
2538 #undef __FUNCT__
2539 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ"
2540 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2541 {
2542   Mat            B;
2543   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2544   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2545   Mat_SeqAIJ     *b;
2546   PetscErrorCode ierr;
2547   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2548   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2549   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2550 
2551   PetscFunctionBegin;
2552   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2553   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2554 
2555   /* ----------------------------------------------------------------
2556      Tell every processor the number of nonzeros per row
2557   */
2558   ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
2559   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2560     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2561   }
2562   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2563   ierr      = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr);
2564   displs    = recvcounts + size;
2565   for (i=0; i<size; i++) {
2566     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2567     displs[i]     = A->rmap->range[i]/bs;
2568   }
2569 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2570   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2571 #else
2572   ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2573 #endif
2574   /* ---------------------------------------------------------------
2575      Create the sequential matrix of the same type as the local block diagonal
2576   */
2577   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2578   ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2579   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2580   ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2581   b    = (Mat_SeqAIJ*)B->data;
2582 
2583   /*--------------------------------------------------------------------
2584     Copy my part of matrix column indices over
2585   */
2586   sendcount  = ad->nz + bd->nz;
2587   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2588   a_jsendbuf = ad->j;
2589   b_jsendbuf = bd->j;
2590   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2591   cnt        = 0;
2592   for (i=0; i<n; i++) {
2593 
2594     /* put in lower diagonal portion */
2595     m = bd->i[i+1] - bd->i[i];
2596     while (m > 0) {
2597       /* is it above diagonal (in bd (compressed) numbering) */
2598       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2599       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2600       m--;
2601     }
2602 
2603     /* put in diagonal portion */
2604     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2605       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2606     }
2607 
2608     /* put in upper diagonal portion */
2609     while (m-- > 0) {
2610       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2611     }
2612   }
2613   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2614 
2615   /*--------------------------------------------------------------------
2616     Gather all column indices to all processors
2617   */
2618   for (i=0; i<size; i++) {
2619     recvcounts[i] = 0;
2620     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2621       recvcounts[i] += lens[j];
2622     }
2623   }
2624   displs[0] = 0;
2625   for (i=1; i<size; i++) {
2626     displs[i] = displs[i-1] + recvcounts[i-1];
2627   }
2628 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2629   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2630 #else
2631   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2632 #endif
2633   /*--------------------------------------------------------------------
2634     Assemble the matrix into useable form (note numerical values not yet set)
2635   */
2636   /* set the b->ilen (length of each row) values */
2637   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2638   /* set the b->i indices */
2639   b->i[0] = 0;
2640   for (i=1; i<=A->rmap->N/bs; i++) {
2641     b->i[i] = b->i[i-1] + lens[i-1];
2642   }
2643   ierr = PetscFree(lens);CHKERRQ(ierr);
2644   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2645   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2646   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2647 
2648   if (A->symmetric) {
2649     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2650   } else if (A->hermitian) {
2651     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2652   } else if (A->structurally_symmetric) {
2653     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2654   }
2655   *newmat = B;
2656   PetscFunctionReturn(0);
2657 }
2658 
2659 #undef __FUNCT__
2660 #define __FUNCT__ "MatSOR_MPIBAIJ"
2661 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2662 {
2663   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2664   PetscErrorCode ierr;
2665   Vec            bb1 = 0;
2666 
2667   PetscFunctionBegin;
2668   if (flag == SOR_APPLY_UPPER) {
2669     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2670     PetscFunctionReturn(0);
2671   }
2672 
2673   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2674     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2675   }
2676 
2677   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2678     if (flag & SOR_ZERO_INITIAL_GUESS) {
2679       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2680       its--;
2681     }
2682 
2683     while (its--) {
2684       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2685       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2686 
2687       /* update rhs: bb1 = bb - B*x */
2688       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2689       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2690 
2691       /* local sweep */
2692       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2693     }
2694   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2695     if (flag & SOR_ZERO_INITIAL_GUESS) {
2696       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2697       its--;
2698     }
2699     while (its--) {
2700       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2701       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2702 
2703       /* update rhs: bb1 = bb - B*x */
2704       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2705       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2706 
2707       /* local sweep */
2708       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2709     }
2710   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2711     if (flag & SOR_ZERO_INITIAL_GUESS) {
2712       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2713       its--;
2714     }
2715     while (its--) {
2716       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2717       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2718 
2719       /* update rhs: bb1 = bb - B*x */
2720       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2721       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2722 
2723       /* local sweep */
2724       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2725     }
2726   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2727 
2728   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2729   PetscFunctionReturn(0);
2730 }
2731 
2732 extern PetscErrorCode  MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2733 
2734 #undef __FUNCT__
2735 #define __FUNCT__ "MatGetColumnNorms_MPIBAIJ"
2736 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms)
2737 {
2738   PetscErrorCode ierr;
2739   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)A->data;
2740   PetscInt       N,i,*garray = aij->garray;
2741   PetscInt       ib,jb,bs = A->rmap->bs;
2742   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) aij->A->data;
2743   MatScalar      *a_val = a_aij->a;
2744   Mat_SeqBAIJ    *b_aij = (Mat_SeqBAIJ*) aij->B->data;
2745   MatScalar      *b_val = b_aij->a;
2746   PetscReal      *work;
2747 
2748   PetscFunctionBegin;
2749   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
2750   ierr = PetscMalloc(N*sizeof(PetscReal),&work);CHKERRQ(ierr);
2751   ierr = PetscMemzero(work,N*sizeof(PetscReal));CHKERRQ(ierr);
2752   if (type == NORM_2) {
2753     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2754       for (jb=0; jb<bs; jb++) {
2755         for (ib=0; ib<bs; ib++) {
2756           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2757           a_val++;
2758         }
2759       }
2760     }
2761     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2762       for (jb=0; jb<bs; jb++) {
2763         for (ib=0; ib<bs; ib++) {
2764           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2765           b_val++;
2766         }
2767       }
2768     }
2769   } else if (type == NORM_1) {
2770     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2771       for (jb=0; jb<bs; jb++) {
2772         for (ib=0; ib<bs; ib++) {
2773           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2774           a_val++;
2775         }
2776       }
2777     }
2778     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2779       for (jb=0; jb<bs; jb++) {
2780        for (ib=0; ib<bs; ib++) {
2781           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2782           b_val++;
2783         }
2784       }
2785     }
2786   } else if (type == NORM_INFINITY) {
2787     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2788       for (jb=0; jb<bs; jb++) {
2789         for (ib=0; ib<bs; ib++) {
2790           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2791           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2792           a_val++;
2793         }
2794       }
2795     }
2796     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2797       for (jb=0; jb<bs; jb++) {
2798         for (ib=0; ib<bs; ib++) {
2799           int col = garray[b_aij->j[i]] * bs + jb;
2800           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2801           b_val++;
2802         }
2803       }
2804     }
2805   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
2806   if (type == NORM_INFINITY) {
2807     ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2808   } else {
2809     ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2810   }
2811   ierr = PetscFree(work);CHKERRQ(ierr);
2812   if (type == NORM_2) {
2813     for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]);
2814   }
2815   PetscFunctionReturn(0);
2816 }
2817 
2818 #undef __FUNCT__
2819 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ"
2820 PetscErrorCode  MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2821 {
2822   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2823   PetscErrorCode ierr;
2824 
2825   PetscFunctionBegin;
2826   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2827   PetscFunctionReturn(0);
2828 }
2829 
2830 
2831 /* -------------------------------------------------------------------*/
2832 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2833                                        MatGetRow_MPIBAIJ,
2834                                        MatRestoreRow_MPIBAIJ,
2835                                        MatMult_MPIBAIJ,
2836                                 /* 4*/ MatMultAdd_MPIBAIJ,
2837                                        MatMultTranspose_MPIBAIJ,
2838                                        MatMultTransposeAdd_MPIBAIJ,
2839                                        0,
2840                                        0,
2841                                        0,
2842                                 /*10*/ 0,
2843                                        0,
2844                                        0,
2845                                        MatSOR_MPIBAIJ,
2846                                        MatTranspose_MPIBAIJ,
2847                                 /*15*/ MatGetInfo_MPIBAIJ,
2848                                        MatEqual_MPIBAIJ,
2849                                        MatGetDiagonal_MPIBAIJ,
2850                                        MatDiagonalScale_MPIBAIJ,
2851                                        MatNorm_MPIBAIJ,
2852                                 /*20*/ MatAssemblyBegin_MPIBAIJ,
2853                                        MatAssemblyEnd_MPIBAIJ,
2854                                        MatSetOption_MPIBAIJ,
2855                                        MatZeroEntries_MPIBAIJ,
2856                                 /*24*/ MatZeroRows_MPIBAIJ,
2857                                        0,
2858                                        0,
2859                                        0,
2860                                        0,
2861                                 /*29*/ MatSetUp_MPIBAIJ,
2862                                        0,
2863                                        0,
2864                                        0,
2865                                        0,
2866                                 /*34*/ MatDuplicate_MPIBAIJ,
2867                                        0,
2868                                        0,
2869                                        0,
2870                                        0,
2871                                 /*39*/ MatAXPY_MPIBAIJ,
2872                                        MatGetSubMatrices_MPIBAIJ,
2873                                        MatIncreaseOverlap_MPIBAIJ,
2874                                        MatGetValues_MPIBAIJ,
2875                                        MatCopy_MPIBAIJ,
2876                                 /*44*/ 0,
2877                                        MatScale_MPIBAIJ,
2878                                        0,
2879                                        0,
2880                                        0,
2881                                 /*49*/ 0,
2882                                        0,
2883                                        0,
2884                                        0,
2885                                        0,
2886                                 /*54*/ MatFDColoringCreate_MPIBAIJ,
2887                                        0,
2888                                        MatSetUnfactored_MPIBAIJ,
2889                                        MatPermute_MPIBAIJ,
2890                                        MatSetValuesBlocked_MPIBAIJ,
2891                                 /*59*/ MatGetSubMatrix_MPIBAIJ,
2892                                        MatDestroy_MPIBAIJ,
2893                                        MatView_MPIBAIJ,
2894                                        0,
2895                                        0,
2896                                 /*64*/ 0,
2897                                        0,
2898                                        0,
2899                                        0,
2900                                        0,
2901                                 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2902                                        0,
2903                                        0,
2904                                        0,
2905                                        0,
2906                                 /*74*/ 0,
2907                                        MatFDColoringApply_BAIJ,
2908                                        0,
2909                                        0,
2910                                        0,
2911                                 /*79*/ 0,
2912                                        0,
2913                                        0,
2914                                        0,
2915                                        MatLoad_MPIBAIJ,
2916                                 /*84*/ 0,
2917                                        0,
2918                                        0,
2919                                        0,
2920                                        0,
2921                                 /*89*/ 0,
2922                                        0,
2923                                        0,
2924                                        0,
2925                                        0,
2926                                 /*94*/ 0,
2927                                        0,
2928                                        0,
2929                                        0,
2930                                        0,
2931                                 /*99*/ 0,
2932                                        0,
2933                                        0,
2934                                        0,
2935                                        0,
2936                                 /*104*/0,
2937                                        MatRealPart_MPIBAIJ,
2938                                        MatImaginaryPart_MPIBAIJ,
2939                                        0,
2940                                        0,
2941                                 /*109*/0,
2942                                        0,
2943                                        0,
2944                                        0,
2945                                        0,
2946                                 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2947                                        0,
2948                                        MatGetGhosts_MPIBAIJ,
2949                                        0,
2950                                        0,
2951                                 /*119*/0,
2952                                        0,
2953                                        0,
2954                                        0,
2955                                        MatGetMultiProcBlock_MPIBAIJ,
2956                                 /*124*/0,
2957                                        MatGetColumnNorms_MPIBAIJ,
2958                                        MatInvertBlockDiagonal_MPIBAIJ,
2959                                        0,
2960                                        0,
2961                                /*129*/ 0,
2962                                        0,
2963                                        0,
2964                                        0,
2965                                        0,
2966                                /*134*/ 0,
2967                                        0,
2968                                        0,
2969                                        0,
2970                                        0,
2971                                /*139*/ 0,
2972                                        0,
2973                                        0
2974 };
2975 
2976 #undef __FUNCT__
2977 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2978 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2979 {
2980   PetscFunctionBegin;
2981   *a = ((Mat_MPIBAIJ*)A->data)->A;
2982   PetscFunctionReturn(0);
2983 }
2984 
2985 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2986 
2987 #undef __FUNCT__
2988 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2989 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2990 {
2991   PetscInt       m,rstart,cstart,cend;
2992   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2993   const PetscInt *JJ    =0;
2994   PetscScalar    *values=0;
2995   PetscBool      roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2996   PetscErrorCode ierr;
2997 
2998   PetscFunctionBegin;
2999   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3000   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3001   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3002   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3003   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3004   m      = B->rmap->n/bs;
3005   rstart = B->rmap->rstart/bs;
3006   cstart = B->cmap->rstart/bs;
3007   cend   = B->cmap->rend/bs;
3008 
3009   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
3010   ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
3011   for (i=0; i<m; i++) {
3012     nz = ii[i+1] - ii[i];
3013     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
3014     nz_max = PetscMax(nz_max,nz);
3015     JJ     = jj + ii[i];
3016     for (j=0; j<nz; j++) {
3017       if (*JJ >= cstart) break;
3018       JJ++;
3019     }
3020     d = 0;
3021     for (; j<nz; j++) {
3022       if (*JJ++ >= cend) break;
3023       d++;
3024     }
3025     d_nnz[i] = d;
3026     o_nnz[i] = nz - d;
3027   }
3028   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3029   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3030 
3031   values = (PetscScalar*)V;
3032   if (!values) {
3033     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3034     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3035   }
3036   for (i=0; i<m; i++) {
3037     PetscInt          row    = i + rstart;
3038     PetscInt          ncols  = ii[i+1] - ii[i];
3039     const PetscInt    *icols = jj + ii[i];
3040     if (!roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
3041       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3042       ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3043     } else {                    /* block ordering does not match so we can only insert one block at a time. */
3044       PetscInt j;
3045       for (j=0; j<ncols; j++) {
3046         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
3047         ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
3048       }
3049     }
3050   }
3051 
3052   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3053   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3054   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3055   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3056   PetscFunctionReturn(0);
3057 }
3058 
3059 #undef __FUNCT__
3060 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
3061 /*@C
3062    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
3063    (the default parallel PETSc format).
3064 
3065    Collective on MPI_Comm
3066 
3067    Input Parameters:
3068 +  A - the matrix
3069 .  bs - the block size
3070 .  i - the indices into j for the start of each local row (starts with zero)
3071 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3072 -  v - optional values in the matrix
3073 
3074    Level: developer
3075 
3076    Notes: The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3077    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3078    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3079    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3080    block column and the second index is over columns within a block.
3081 
3082 .keywords: matrix, aij, compressed row, sparse, parallel
3083 
3084 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ
3085 @*/
3086 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3087 {
3088   PetscErrorCode ierr;
3089 
3090   PetscFunctionBegin;
3091   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3092   PetscValidType(B,1);
3093   PetscValidLogicalCollectiveInt(B,bs,2);
3094   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3095   PetscFunctionReturn(0);
3096 }
3097 
3098 #undef __FUNCT__
3099 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
3100 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
3101 {
3102   Mat_MPIBAIJ    *b;
3103   PetscErrorCode ierr;
3104   PetscInt       i;
3105 
3106   PetscFunctionBegin;
3107   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3108   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3109   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3110   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3111   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3112 
3113   if (d_nnz) {
3114     for (i=0; i<B->rmap->n/bs; i++) {
3115       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
3116     }
3117   }
3118   if (o_nnz) {
3119     for (i=0; i<B->rmap->n/bs; i++) {
3120       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
3121     }
3122   }
3123 
3124   b      = (Mat_MPIBAIJ*)B->data;
3125   b->bs2 = bs*bs;
3126   b->mbs = B->rmap->n/bs;
3127   b->nbs = B->cmap->n/bs;
3128   b->Mbs = B->rmap->N/bs;
3129   b->Nbs = B->cmap->N/bs;
3130 
3131   for (i=0; i<=b->size; i++) {
3132     b->rangebs[i] = B->rmap->range[i]/bs;
3133   }
3134   b->rstartbs = B->rmap->rstart/bs;
3135   b->rendbs   = B->rmap->rend/bs;
3136   b->cstartbs = B->cmap->rstart/bs;
3137   b->cendbs   = B->cmap->rend/bs;
3138 
3139   if (!B->preallocated) {
3140     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3141     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3142     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
3143     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
3144     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3145     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3146     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
3147     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
3148     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
3149   }
3150 
3151   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3152   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
3153   B->preallocated = PETSC_TRUE;
3154   PetscFunctionReturn(0);
3155 }
3156 
3157 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
3158 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
3159 
3160 #undef __FUNCT__
3161 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
3162 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
3163 {
3164   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
3165   PetscErrorCode ierr;
3166   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
3167   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
3168   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
3169 
3170   PetscFunctionBegin;
3171   ierr  = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr);
3172   ii[0] = 0;
3173   for (i=0; i<M; i++) {
3174     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
3175     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
3176     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
3177     /* remove one from count of matrix has diagonal */
3178     for (j=id[i]; j<id[i+1]; j++) {
3179       if (jd[j] == i) {ii[i+1]--;break;}
3180     }
3181   }
3182   ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3183   cnt  = 0;
3184   for (i=0; i<M; i++) {
3185     for (j=io[i]; j<io[i+1]; j++) {
3186       if (garray[jo[j]] > rstart) break;
3187       jj[cnt++] = garray[jo[j]];
3188     }
3189     for (k=id[i]; k<id[i+1]; k++) {
3190       if (jd[k] != i) {
3191         jj[cnt++] = rstart + jd[k];
3192       }
3193     }
3194     for (; j<io[i+1]; j++) {
3195       jj[cnt++] = garray[jo[j]];
3196     }
3197   }
3198   ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr);
3199   PetscFunctionReturn(0);
3200 }
3201 
3202 #include <../src/mat/impls/aij/mpi/mpiaij.h>
3203 
3204 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
3205 
3206 #undef __FUNCT__
3207 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ"
3208 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
3209 {
3210   PetscErrorCode ierr;
3211   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3212   Mat            B;
3213   Mat_MPIAIJ     *b;
3214 
3215   PetscFunctionBegin;
3216   if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
3217 
3218   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3219   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3220   ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
3221   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
3222   ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
3223   b    = (Mat_MPIAIJ*) B->data;
3224 
3225   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
3226   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
3227   ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
3228   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
3229   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
3230   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3231   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3232   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3233   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3234   if (reuse == MAT_REUSE_MATRIX) {
3235     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
3236   } else {
3237    *newmat = B;
3238   }
3239   PetscFunctionReturn(0);
3240 }
3241 
3242 #if defined(PETSC_HAVE_MUMPS)
3243 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3244 #endif
3245 
3246 /*MC
3247    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
3248 
3249    Options Database Keys:
3250 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
3251 . -mat_block_size <bs> - set the blocksize used to store the matrix
3252 - -mat_use_hash_table <fact>
3253 
3254   Level: beginner
3255 
3256 .seealso: MatCreateMPIBAIJ
3257 M*/
3258 
3259 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
3260 
3261 #undef __FUNCT__
3262 #define __FUNCT__ "MatCreate_MPIBAIJ"
3263 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
3264 {
3265   Mat_MPIBAIJ    *b;
3266   PetscErrorCode ierr;
3267   PetscBool      flg;
3268 
3269   PetscFunctionBegin;
3270   ierr    = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
3271   B->data = (void*)b;
3272 
3273   ierr         = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3274   B->assembled = PETSC_FALSE;
3275 
3276   B->insertmode = NOT_SET_VALUES;
3277   ierr          = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
3278   ierr          = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
3279 
3280   /* build local table of row and column ownerships */
3281   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
3282 
3283   /* build cache for off array entries formed */
3284   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
3285 
3286   b->donotstash  = PETSC_FALSE;
3287   b->colmap      = NULL;
3288   b->garray      = NULL;
3289   b->roworiented = PETSC_TRUE;
3290 
3291   /* stuff used in block assembly */
3292   b->barray = 0;
3293 
3294   /* stuff used for matrix vector multiply */
3295   b->lvec  = 0;
3296   b->Mvctx = 0;
3297 
3298   /* stuff for MatGetRow() */
3299   b->rowindices   = 0;
3300   b->rowvalues    = 0;
3301   b->getrowactive = PETSC_FALSE;
3302 
3303   /* hash table stuff */
3304   b->ht           = 0;
3305   b->hd           = 0;
3306   b->ht_size      = 0;
3307   b->ht_flag      = PETSC_FALSE;
3308   b->ht_fact      = 0;
3309   b->ht_total_ct  = 0;
3310   b->ht_insert_ct = 0;
3311 
3312   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
3313   b->ijonly = PETSC_FALSE;
3314 
3315   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3316   ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr);
3317   if (flg) {
3318     PetscReal fact = 1.39;
3319     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3320     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
3321     if (fact <= 1.0) fact = 1.39;
3322     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3323     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3324   }
3325   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3326 
3327 #if defined(PETSC_HAVE_MUMPS)
3328   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_baij_mumps);CHKERRQ(ierr);
3329 #endif
3330   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3331   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3332   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3333   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3334   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3335   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3336   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3337   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3338   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3339   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3340   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr);
3341   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3342   PetscFunctionReturn(0);
3343 }
3344 
3345 /*MC
3346    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3347 
3348    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3349    and MATMPIBAIJ otherwise.
3350 
3351    Options Database Keys:
3352 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3353 
3354   Level: beginner
3355 
3356 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3357 M*/
3358 
3359 #undef __FUNCT__
3360 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3361 /*@C
3362    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3363    (block compressed row).  For good matrix assembly performance
3364    the user should preallocate the matrix storage by setting the parameters
3365    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3366    performance can be increased by more than a factor of 50.
3367 
3368    Collective on Mat
3369 
3370    Input Parameters:
3371 +  A - the matrix
3372 .  bs   - size of block
3373 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3374            submatrix  (same for all local rows)
3375 .  d_nnz - array containing the number of block nonzeros in the various block rows
3376            of the in diagonal portion of the local (possibly different for each block
3377            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3378            set it even if it is zero.
3379 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3380            submatrix (same for all local rows).
3381 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3382            off-diagonal portion of the local submatrix (possibly different for
3383            each block row) or NULL.
3384 
3385    If the *_nnz parameter is given then the *_nz parameter is ignored
3386 
3387    Options Database Keys:
3388 +   -mat_block_size - size of the blocks to use
3389 -   -mat_use_hash_table <fact>
3390 
3391    Notes:
3392    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3393    than it must be used on all processors that share the object for that argument.
3394 
3395    Storage Information:
3396    For a square global matrix we define each processor's diagonal portion
3397    to be its local rows and the corresponding columns (a square submatrix);
3398    each processor's off-diagonal portion encompasses the remainder of the
3399    local matrix (a rectangular submatrix).
3400 
3401    The user can specify preallocated storage for the diagonal part of
3402    the local submatrix with either d_nz or d_nnz (not both).  Set
3403    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3404    memory allocation.  Likewise, specify preallocated storage for the
3405    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3406 
3407    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3408    the figure below we depict these three local rows and all columns (0-11).
3409 
3410 .vb
3411            0 1 2 3 4 5 6 7 8 9 10 11
3412           --------------------------
3413    row 3  |o o o d d d o o o o  o  o
3414    row 4  |o o o d d d o o o o  o  o
3415    row 5  |o o o d d d o o o o  o  o
3416           --------------------------
3417 .ve
3418 
3419    Thus, any entries in the d locations are stored in the d (diagonal)
3420    submatrix, and any entries in the o locations are stored in the
3421    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3422    stored simply in the MATSEQBAIJ format for compressed row storage.
3423 
3424    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3425    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3426    In general, for PDE problems in which most nonzeros are near the diagonal,
3427    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3428    or you will get TERRIBLE performance; see the users' manual chapter on
3429    matrices.
3430 
3431    You can call MatGetInfo() to get information on how effective the preallocation was;
3432    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3433    You can also run with the option -info and look for messages with the string
3434    malloc in them to see if additional memory allocation was needed.
3435 
3436    Level: intermediate
3437 
3438 .keywords: matrix, block, aij, compressed row, sparse, parallel
3439 
3440 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3441 @*/
3442 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3443 {
3444   PetscErrorCode ierr;
3445 
3446   PetscFunctionBegin;
3447   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3448   PetscValidType(B,1);
3449   PetscValidLogicalCollectiveInt(B,bs,2);
3450   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3451   PetscFunctionReturn(0);
3452 }
3453 
3454 #undef __FUNCT__
3455 #define __FUNCT__ "MatCreateBAIJ"
3456 /*@C
3457    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3458    (block compressed row).  For good matrix assembly performance
3459    the user should preallocate the matrix storage by setting the parameters
3460    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3461    performance can be increased by more than a factor of 50.
3462 
3463    Collective on MPI_Comm
3464 
3465    Input Parameters:
3466 +  comm - MPI communicator
3467 .  bs   - size of blockk
3468 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3469            This value should be the same as the local size used in creating the
3470            y vector for the matrix-vector product y = Ax.
3471 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3472            This value should be the same as the local size used in creating the
3473            x vector for the matrix-vector product y = Ax.
3474 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3475 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3476 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3477            submatrix  (same for all local rows)
3478 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3479            of the in diagonal portion of the local (possibly different for each block
3480            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3481            and set it even if it is zero.
3482 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3483            submatrix (same for all local rows).
3484 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3485            off-diagonal portion of the local submatrix (possibly different for
3486            each block row) or NULL.
3487 
3488    Output Parameter:
3489 .  A - the matrix
3490 
3491    Options Database Keys:
3492 +   -mat_block_size - size of the blocks to use
3493 -   -mat_use_hash_table <fact>
3494 
3495    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3496    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3497    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3498 
3499    Notes:
3500    If the *_nnz parameter is given then the *_nz parameter is ignored
3501 
3502    A nonzero block is any block that as 1 or more nonzeros in it
3503 
3504    The user MUST specify either the local or global matrix dimensions
3505    (possibly both).
3506 
3507    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3508    than it must be used on all processors that share the object for that argument.
3509 
3510    Storage Information:
3511    For a square global matrix we define each processor's diagonal portion
3512    to be its local rows and the corresponding columns (a square submatrix);
3513    each processor's off-diagonal portion encompasses the remainder of the
3514    local matrix (a rectangular submatrix).
3515 
3516    The user can specify preallocated storage for the diagonal part of
3517    the local submatrix with either d_nz or d_nnz (not both).  Set
3518    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3519    memory allocation.  Likewise, specify preallocated storage for the
3520    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3521 
3522    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3523    the figure below we depict these three local rows and all columns (0-11).
3524 
3525 .vb
3526            0 1 2 3 4 5 6 7 8 9 10 11
3527           --------------------------
3528    row 3  |o o o d d d o o o o  o  o
3529    row 4  |o o o d d d o o o o  o  o
3530    row 5  |o o o d d d o o o o  o  o
3531           --------------------------
3532 .ve
3533 
3534    Thus, any entries in the d locations are stored in the d (diagonal)
3535    submatrix, and any entries in the o locations are stored in the
3536    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3537    stored simply in the MATSEQBAIJ format for compressed row storage.
3538 
3539    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3540    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3541    In general, for PDE problems in which most nonzeros are near the diagonal,
3542    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3543    or you will get TERRIBLE performance; see the users' manual chapter on
3544    matrices.
3545 
3546    Level: intermediate
3547 
3548 .keywords: matrix, block, aij, compressed row, sparse, parallel
3549 
3550 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3551 @*/
3552 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3553 {
3554   PetscErrorCode ierr;
3555   PetscMPIInt    size;
3556 
3557   PetscFunctionBegin;
3558   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3559   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3560   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3561   if (size > 1) {
3562     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3563     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3564   } else {
3565     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3566     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3567   }
3568   PetscFunctionReturn(0);
3569 }
3570 
3571 #undef __FUNCT__
3572 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3573 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3574 {
3575   Mat            mat;
3576   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3577   PetscErrorCode ierr;
3578   PetscInt       len=0;
3579 
3580   PetscFunctionBegin;
3581   *newmat = 0;
3582   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3583   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3584   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3585   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3586 
3587   mat->factortype   = matin->factortype;
3588   mat->preallocated = PETSC_TRUE;
3589   mat->assembled    = PETSC_TRUE;
3590   mat->insertmode   = NOT_SET_VALUES;
3591 
3592   a             = (Mat_MPIBAIJ*)mat->data;
3593   mat->rmap->bs = matin->rmap->bs;
3594   a->bs2        = oldmat->bs2;
3595   a->mbs        = oldmat->mbs;
3596   a->nbs        = oldmat->nbs;
3597   a->Mbs        = oldmat->Mbs;
3598   a->Nbs        = oldmat->Nbs;
3599 
3600   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3601   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3602 
3603   a->size         = oldmat->size;
3604   a->rank         = oldmat->rank;
3605   a->donotstash   = oldmat->donotstash;
3606   a->roworiented  = oldmat->roworiented;
3607   a->rowindices   = 0;
3608   a->rowvalues    = 0;
3609   a->getrowactive = PETSC_FALSE;
3610   a->barray       = 0;
3611   a->rstartbs     = oldmat->rstartbs;
3612   a->rendbs       = oldmat->rendbs;
3613   a->cstartbs     = oldmat->cstartbs;
3614   a->cendbs       = oldmat->cendbs;
3615 
3616   /* hash table stuff */
3617   a->ht           = 0;
3618   a->hd           = 0;
3619   a->ht_size      = 0;
3620   a->ht_flag      = oldmat->ht_flag;
3621   a->ht_fact      = oldmat->ht_fact;
3622   a->ht_total_ct  = 0;
3623   a->ht_insert_ct = 0;
3624 
3625   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3626   if (oldmat->colmap) {
3627 #if defined(PETSC_USE_CTABLE)
3628     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3629 #else
3630     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3631     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3632     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3633 #endif
3634   } else a->colmap = 0;
3635 
3636   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3637     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3638     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3639     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3640   } else a->garray = 0;
3641 
3642   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3643   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3644   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3645   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3646   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3647 
3648   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3649   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3650   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3651   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3652   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3653   *newmat = mat;
3654   PetscFunctionReturn(0);
3655 }
3656 
3657 #undef __FUNCT__
3658 #define __FUNCT__ "MatLoad_MPIBAIJ"
3659 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3660 {
3661   PetscErrorCode ierr;
3662   int            fd;
3663   PetscInt       i,nz,j,rstart,rend;
3664   PetscScalar    *vals,*buf;
3665   MPI_Comm       comm;
3666   MPI_Status     status;
3667   PetscMPIInt    rank,size,maxnz;
3668   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3669   PetscInt       *locrowlens = NULL,*procsnz = NULL,*browners = NULL;
3670   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3671   PetscMPIInt    tag    = ((PetscObject)viewer)->tag;
3672   PetscInt       *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount;
3673   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3674 
3675   PetscFunctionBegin;
3676   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3677   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3678   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3679   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3680 
3681   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3682   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3683   if (!rank) {
3684     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3685     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3686     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3687   }
3688 
3689   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3690 
3691   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3692   M    = header[1]; N = header[2];
3693 
3694   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3695   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3696   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3697 
3698   /* If global sizes are set, check if they are consistent with that given in the file */
3699   if (sizesset) {
3700     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3701   }
3702   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3703   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3704 
3705   if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices");
3706 
3707   /*
3708      This code adds extra rows to make sure the number of rows is
3709      divisible by the blocksize
3710   */
3711   Mbs        = M/bs;
3712   extra_rows = bs - M + bs*Mbs;
3713   if (extra_rows == bs) extra_rows = 0;
3714   else                  Mbs++;
3715   if (extra_rows && !rank) {
3716     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3717   }
3718 
3719   /* determine ownership of all rows */
3720   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3721     mbs = Mbs/size + ((Mbs % size) > rank);
3722     m   = mbs*bs;
3723   } else { /* User set */
3724     m   = newmat->rmap->n;
3725     mbs = m/bs;
3726   }
3727   ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
3728   ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3729 
3730   /* process 0 needs enough room for process with most rows */
3731   if (!rank) {
3732     mmax = rowners[1];
3733     for (i=2; i<=size; i++) {
3734       mmax = PetscMax(mmax,rowners[i]);
3735     }
3736     mmax*=bs;
3737   } else mmax = -1;             /* unused, but compiler warns anyway */
3738 
3739   rowners[0] = 0;
3740   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
3741   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
3742   rstart = rowners[rank];
3743   rend   = rowners[rank+1];
3744 
3745   /* distribute row lengths to all processors */
3746   ierr = PetscMalloc(m*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
3747   if (!rank) {
3748     mend = m;
3749     if (size == 1) mend = mend - extra_rows;
3750     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3751     for (j=mend; j<m; j++) locrowlens[j] = 1;
3752     ierr = PetscMalloc(mmax*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3753     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3754     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3755     for (j=0; j<m; j++) {
3756       procsnz[0] += locrowlens[j];
3757     }
3758     for (i=1; i<size; i++) {
3759       mend = browners[i+1] - browners[i];
3760       if (i == size-1) mend = mend - extra_rows;
3761       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3762       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3763       /* calculate the number of nonzeros on each processor */
3764       for (j=0; j<browners[i+1]-browners[i]; j++) {
3765         procsnz[i] += rowlengths[j];
3766       }
3767       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3768     }
3769     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3770   } else {
3771     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3772   }
3773 
3774   if (!rank) {
3775     /* determine max buffer needed and allocate it */
3776     maxnz = procsnz[0];
3777     for (i=1; i<size; i++) {
3778       maxnz = PetscMax(maxnz,procsnz[i]);
3779     }
3780     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3781 
3782     /* read in my part of the matrix column indices  */
3783     nz     = procsnz[0];
3784     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3785     mycols = ibuf;
3786     if (size == 1) nz -= extra_rows;
3787     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3788     if (size == 1) {
3789       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
3790     }
3791 
3792     /* read in every ones (except the last) and ship off */
3793     for (i=1; i<size-1; i++) {
3794       nz   = procsnz[i];
3795       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3796       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3797     }
3798     /* read in the stuff for the last proc */
3799     if (size != 1) {
3800       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3801       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3802       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3803       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3804     }
3805     ierr = PetscFree(cols);CHKERRQ(ierr);
3806   } else {
3807     /* determine buffer space needed for message */
3808     nz = 0;
3809     for (i=0; i<m; i++) {
3810       nz += locrowlens[i];
3811     }
3812     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3813     mycols = ibuf;
3814     /* receive message of column indices*/
3815     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3816     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3817     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3818   }
3819 
3820   /* loop over local rows, determining number of off diagonal entries */
3821   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
3822   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
3823   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3824   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3825   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3826   rowcount = 0; nzcount = 0;
3827   for (i=0; i<mbs; i++) {
3828     dcount  = 0;
3829     odcount = 0;
3830     for (j=0; j<bs; j++) {
3831       kmax = locrowlens[rowcount];
3832       for (k=0; k<kmax; k++) {
3833         tmp = mycols[nzcount++]/bs;
3834         if (!mask[tmp]) {
3835           mask[tmp] = 1;
3836           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3837           else masked1[dcount++] = tmp;
3838         }
3839       }
3840       rowcount++;
3841     }
3842 
3843     dlens[i]  = dcount;
3844     odlens[i] = odcount;
3845 
3846     /* zero out the mask elements we set */
3847     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3848     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3849   }
3850 
3851 
3852   if (!sizesset) {
3853     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3854   }
3855   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3856 
3857   if (!rank) {
3858     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3859     /* read in my part of the matrix numerical values  */
3860     nz     = procsnz[0];
3861     vals   = buf;
3862     mycols = ibuf;
3863     if (size == 1) nz -= extra_rows;
3864     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3865     if (size == 1) {
3866       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
3867     }
3868 
3869     /* insert into matrix */
3870     jj = rstart*bs;
3871     for (i=0; i<m; i++) {
3872       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3873       mycols += locrowlens[i];
3874       vals   += locrowlens[i];
3875       jj++;
3876     }
3877     /* read in other processors (except the last one) and ship out */
3878     for (i=1; i<size-1; i++) {
3879       nz   = procsnz[i];
3880       vals = buf;
3881       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3882       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3883     }
3884     /* the last proc */
3885     if (size != 1) {
3886       nz   = procsnz[i] - extra_rows;
3887       vals = buf;
3888       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3889       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3890       ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3891     }
3892     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3893   } else {
3894     /* receive numeric values */
3895     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3896 
3897     /* receive message of values*/
3898     vals   = buf;
3899     mycols = ibuf;
3900     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3901 
3902     /* insert into matrix */
3903     jj = rstart*bs;
3904     for (i=0; i<m; i++) {
3905       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3906       mycols += locrowlens[i];
3907       vals   += locrowlens[i];
3908       jj++;
3909     }
3910   }
3911   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3912   ierr = PetscFree(buf);CHKERRQ(ierr);
3913   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3914   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3915   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3916   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3917   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3918   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3919   PetscFunctionReturn(0);
3920 }
3921 
3922 #undef __FUNCT__
3923 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3924 /*@
3925    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3926 
3927    Input Parameters:
3928 .  mat  - the matrix
3929 .  fact - factor
3930 
3931    Not Collective, each process can use a different factor
3932 
3933    Level: advanced
3934 
3935   Notes:
3936    This can also be set by the command line option: -mat_use_hash_table <fact>
3937 
3938 .keywords: matrix, hashtable, factor, HT
3939 
3940 .seealso: MatSetOption()
3941 @*/
3942 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3943 {
3944   PetscErrorCode ierr;
3945 
3946   PetscFunctionBegin;
3947   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3948   PetscFunctionReturn(0);
3949 }
3950 
3951 #undef __FUNCT__
3952 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3953 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3954 {
3955   Mat_MPIBAIJ *baij;
3956 
3957   PetscFunctionBegin;
3958   baij          = (Mat_MPIBAIJ*)mat->data;
3959   baij->ht_fact = fact;
3960   PetscFunctionReturn(0);
3961 }
3962 
3963 #undef __FUNCT__
3964 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3965 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3966 {
3967   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
3968 
3969   PetscFunctionBegin;
3970   *Ad     = a->A;
3971   *Ao     = a->B;
3972   *colmap = a->garray;
3973   PetscFunctionReturn(0);
3974 }
3975 
3976 /*
3977     Special version for direct calls from Fortran (to eliminate two function call overheads
3978 */
3979 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3980 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3981 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3982 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3983 #endif
3984 
3985 #undef __FUNCT__
3986 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3987 /*@C
3988   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3989 
3990   Collective on Mat
3991 
3992   Input Parameters:
3993 + mat - the matrix
3994 . min - number of input rows
3995 . im - input rows
3996 . nin - number of input columns
3997 . in - input columns
3998 . v - numerical values input
3999 - addvin - INSERT_VALUES or ADD_VALUES
4000 
4001   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
4002 
4003   Level: advanced
4004 
4005 .seealso:   MatSetValuesBlocked()
4006 @*/
4007 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
4008 {
4009   /* convert input arguments to C version */
4010   Mat        mat  = *matin;
4011   PetscInt   m    = *min, n = *nin;
4012   InsertMode addv = *addvin;
4013 
4014   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
4015   const MatScalar *value;
4016   MatScalar       *barray     = baij->barray;
4017   PetscBool       roworiented = baij->roworiented;
4018   PetscErrorCode  ierr;
4019   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
4020   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
4021   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
4022 
4023   PetscFunctionBegin;
4024   /* tasks normally handled by MatSetValuesBlocked() */
4025   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
4026 #if defined(PETSC_USE_DEBUG)
4027   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
4028   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
4029 #endif
4030   if (mat->assembled) {
4031     mat->was_assembled = PETSC_TRUE;
4032     mat->assembled     = PETSC_FALSE;
4033   }
4034   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
4035 
4036 
4037   if (!barray) {
4038     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
4039     baij->barray = barray;
4040   }
4041 
4042   if (roworiented) stepval = (n-1)*bs;
4043   else stepval = (m-1)*bs;
4044 
4045   for (i=0; i<m; i++) {
4046     if (im[i] < 0) continue;
4047 #if defined(PETSC_USE_DEBUG)
4048     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
4049 #endif
4050     if (im[i] >= rstart && im[i] < rend) {
4051       row = im[i] - rstart;
4052       for (j=0; j<n; j++) {
4053         /* If NumCol = 1 then a copy is not required */
4054         if ((roworiented) && (n == 1)) {
4055           barray = (MatScalar*)v + i*bs2;
4056         } else if ((!roworiented) && (m == 1)) {
4057           barray = (MatScalar*)v + j*bs2;
4058         } else { /* Here a copy is required */
4059           if (roworiented) {
4060             value = v + i*(stepval+bs)*bs + j*bs;
4061           } else {
4062             value = v + j*(stepval+bs)*bs + i*bs;
4063           }
4064           for (ii=0; ii<bs; ii++,value+=stepval) {
4065             for (jj=0; jj<bs; jj++) {
4066               *barray++ = *value++;
4067             }
4068           }
4069           barray -=bs2;
4070         }
4071 
4072         if (in[j] >= cstart && in[j] < cend) {
4073           col  = in[j] - cstart;
4074           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4075         } else if (in[j] < 0) continue;
4076 #if defined(PETSC_USE_DEBUG)
4077         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
4078 #endif
4079         else {
4080           if (mat->was_assembled) {
4081             if (!baij->colmap) {
4082               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
4083             }
4084 
4085 #if defined(PETSC_USE_DEBUG)
4086 #if defined(PETSC_USE_CTABLE)
4087             { PetscInt data;
4088               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
4089               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4090             }
4091 #else
4092             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4093 #endif
4094 #endif
4095 #if defined(PETSC_USE_CTABLE)
4096             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4097             col  = (col - 1)/bs;
4098 #else
4099             col = (baij->colmap[in[j]] - 1)/bs;
4100 #endif
4101             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
4102               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
4103               col  =  in[j];
4104             }
4105           } else col = in[j];
4106           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4107         }
4108       }
4109     } else {
4110       if (!baij->donotstash) {
4111         if (roworiented) {
4112           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4113         } else {
4114           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4115         }
4116       }
4117     }
4118   }
4119 
4120   /* task normally handled by MatSetValuesBlocked() */
4121   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
4122   PetscFunctionReturn(0);
4123 }
4124 
4125 #undef __FUNCT__
4126 #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
4127 /*@
4128      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
4129          CSR format the local rows.
4130 
4131    Collective on MPI_Comm
4132 
4133    Input Parameters:
4134 +  comm - MPI communicator
4135 .  bs - the block size, only a block size of 1 is supported
4136 .  m - number of local rows (Cannot be PETSC_DECIDE)
4137 .  n - This value should be the same as the local size used in creating the
4138        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4139        calculated if N is given) For square matrices n is almost always m.
4140 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4141 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4142 .   i - row indices
4143 .   j - column indices
4144 -   a - matrix values
4145 
4146    Output Parameter:
4147 .   mat - the matrix
4148 
4149    Level: intermediate
4150 
4151    Notes:
4152        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4153      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4154      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4155 
4156      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
4157      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
4158      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
4159      with column-major ordering within blocks.
4160 
4161        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4162 
4163 .keywords: matrix, aij, compressed row, sparse, parallel
4164 
4165 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4166           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4167 @*/
4168 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4169 {
4170   PetscErrorCode ierr;
4171 
4172   PetscFunctionBegin;
4173   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4174   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4175   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4176   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4177   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
4178   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
4179   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
4180   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
4181   PetscFunctionReturn(0);
4182 }
4183