xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision ce94432eddcd14845bc7e8083b7f8ea723b9bf7d)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 #include <petscblaslapack.h>
4 
5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat);
7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec);
13 
14 #undef __FUNCT__
15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
17 {
18   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
19   PetscErrorCode ierr;
20   PetscInt       i,*idxb = 0;
21   PetscScalar    *va,*vb;
22   Vec            vtmp;
23 
24   PetscFunctionBegin;
25   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
26   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
27   if (idx) {
28     for (i=0; i<A->rmap->n; i++) {
29       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
30     }
31   }
32 
33   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
34   if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
35   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
36   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
37 
38   for (i=0; i<A->rmap->n; i++) {
39     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
40       va[i] = vb[i];
41       if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);
42     }
43   }
44 
45   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
46   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
47   ierr = PetscFree(idxb);CHKERRQ(ierr);
48   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
49   PetscFunctionReturn(0);
50 }
51 
52 EXTERN_C_BEGIN
53 #undef __FUNCT__
54 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
55 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
56 {
57   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
58   PetscErrorCode ierr;
59 
60   PetscFunctionBegin;
61   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
62   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
63   PetscFunctionReturn(0);
64 }
65 EXTERN_C_END
66 
67 EXTERN_C_BEGIN
68 #undef __FUNCT__
69 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
70 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
71 {
72   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
73   PetscErrorCode ierr;
74 
75   PetscFunctionBegin;
76   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
77   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
78   PetscFunctionReturn(0);
79 }
80 EXTERN_C_END
81 
82 /*
83      Local utility routine that creates a mapping from the global column
84    number to the local number in the off-diagonal part of the local
85    storage of the matrix.  This is done in a non scalable way since the
86    length of colmap equals the global matrix length.
87 */
88 #undef __FUNCT__
89 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private"
90 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
91 {
92   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
93   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
94   PetscErrorCode ierr;
95   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
96 
97   PetscFunctionBegin;
98 #if defined(PETSC_USE_CTABLE)
99   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
100   for (i=0; i<nbs; i++) {
101     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
102   }
103 #else
104   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
105   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
106   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
107   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
108 #endif
109   PetscFunctionReturn(0);
110 }
111 
112 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
113   { \
114  \
115     brow = row/bs;  \
116     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
117     rmax = aimax[brow]; nrow = ailen[brow]; \
118     bcol = col/bs; \
119     ridx = row % bs; cidx = col % bs; \
120     low  = 0; high = nrow; \
121     while (high-low > 3) { \
122       t = (low+high)/2; \
123       if (rp[t] > bcol) high = t; \
124       else              low  = t; \
125     } \
126     for (_i=low; _i<high; _i++) { \
127       if (rp[_i] > bcol) break; \
128       if (rp[_i] == bcol) { \
129         bap = ap +  bs2*_i + bs*cidx + ridx; \
130         if (addv == ADD_VALUES) *bap += value;  \
131         else                    *bap  = value;  \
132         goto a_noinsert; \
133       } \
134     } \
135     if (a->nonew == 1) goto a_noinsert; \
136     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
137     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
138     N = nrow++ - 1;  \
139     /* shift up all the later entries in this row */ \
140     for (ii=N; ii>=_i; ii--) { \
141       rp[ii+1] = rp[ii]; \
142       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
143     } \
144     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
145     rp[_i]                      = bcol;  \
146     ap[bs2*_i + bs*cidx + ridx] = value;  \
147 a_noinsert:; \
148     ailen[brow] = nrow; \
149   }
150 
151 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
152   { \
153     brow = row/bs;  \
154     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
155     rmax = bimax[brow]; nrow = bilen[brow]; \
156     bcol = col/bs; \
157     ridx = row % bs; cidx = col % bs; \
158     low  = 0; high = nrow; \
159     while (high-low > 3) { \
160       t = (low+high)/2; \
161       if (rp[t] > bcol) high = t; \
162       else              low  = t; \
163     } \
164     for (_i=low; _i<high; _i++) { \
165       if (rp[_i] > bcol) break; \
166       if (rp[_i] == bcol) { \
167         bap = ap +  bs2*_i + bs*cidx + ridx; \
168         if (addv == ADD_VALUES) *bap += value;  \
169         else                    *bap  = value;  \
170         goto b_noinsert; \
171       } \
172     } \
173     if (b->nonew == 1) goto b_noinsert; \
174     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
175     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
176     CHKMEMQ; \
177     N = nrow++ - 1;  \
178     /* shift up all the later entries in this row */ \
179     for (ii=N; ii>=_i; ii--) { \
180       rp[ii+1] = rp[ii]; \
181       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
182     } \
183     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
184     rp[_i]                      = bcol;  \
185     ap[bs2*_i + bs*cidx + ridx] = value;  \
186 b_noinsert:; \
187     bilen[brow] = nrow; \
188   }
189 
190 #undef __FUNCT__
191 #define __FUNCT__ "MatSetValues_MPIBAIJ"
192 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
193 {
194   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
195   MatScalar      value;
196   PetscBool      roworiented = baij->roworiented;
197   PetscErrorCode ierr;
198   PetscInt       i,j,row,col;
199   PetscInt       rstart_orig=mat->rmap->rstart;
200   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
201   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
202 
203   /* Some Variables required in the macro */
204   Mat         A     = baij->A;
205   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
206   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
207   MatScalar   *aa   =a->a;
208 
209   Mat         B     = baij->B;
210   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
211   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
212   MatScalar   *ba   =b->a;
213 
214   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
215   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
216   MatScalar *ap,*bap;
217 
218   PetscFunctionBegin;
219   if (v) PetscValidScalarPointer(v,6);
220   for (i=0; i<m; i++) {
221     if (im[i] < 0) continue;
222 #if defined(PETSC_USE_DEBUG)
223     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
224 #endif
225     if (im[i] >= rstart_orig && im[i] < rend_orig) {
226       row = im[i] - rstart_orig;
227       for (j=0; j<n; j++) {
228         if (in[j] >= cstart_orig && in[j] < cend_orig) {
229           col = in[j] - cstart_orig;
230           if (roworiented) value = v[i*n+j];
231           else             value = v[i+j*m];
232           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
233           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
234         } else if (in[j] < 0) continue;
235 #if defined(PETSC_USE_DEBUG)
236         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
237 #endif
238         else {
239           if (mat->was_assembled) {
240             if (!baij->colmap) {
241               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
242             }
243 #if defined(PETSC_USE_CTABLE)
244             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
245             col  = col - 1;
246 #else
247             col = baij->colmap[in[j]/bs] - 1;
248 #endif
249             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
250               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
251               col  =  in[j];
252               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
253               B    = baij->B;
254               b    = (Mat_SeqBAIJ*)(B)->data;
255               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
256               ba   =b->a;
257             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
258             else col += in[j]%bs;
259           } else col = in[j];
260           if (roworiented) value = v[i*n+j];
261           else             value = v[i+j*m];
262           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
263           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
264         }
265       }
266     } else {
267       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
268       if (!baij->donotstash) {
269         mat->assembled = PETSC_FALSE;
270         if (roworiented) {
271           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
272         } else {
273           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
274         }
275       }
276     }
277   }
278   PetscFunctionReturn(0);
279 }
280 
281 #undef __FUNCT__
282 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
283 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
284 {
285   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
286   const PetscScalar *value;
287   MatScalar         *barray     = baij->barray;
288   PetscBool         roworiented = baij->roworiented;
289   PetscErrorCode    ierr;
290   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
291   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
292   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
293 
294   PetscFunctionBegin;
295   if (!barray) {
296     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
297     baij->barray = barray;
298   }
299 
300   if (roworiented) stepval = (n-1)*bs;
301   else stepval = (m-1)*bs;
302 
303   for (i=0; i<m; i++) {
304     if (im[i] < 0) continue;
305 #if defined(PETSC_USE_DEBUG)
306     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
307 #endif
308     if (im[i] >= rstart && im[i] < rend) {
309       row = im[i] - rstart;
310       for (j=0; j<n; j++) {
311         /* If NumCol = 1 then a copy is not required */
312         if ((roworiented) && (n == 1)) {
313           barray = (MatScalar*)v + i*bs2;
314         } else if ((!roworiented) && (m == 1)) {
315           barray = (MatScalar*)v + j*bs2;
316         } else { /* Here a copy is required */
317           if (roworiented) {
318             value = v + (i*(stepval+bs) + j)*bs;
319           } else {
320             value = v + (j*(stepval+bs) + i)*bs;
321           }
322           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
323             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
324             barray += bs;
325           }
326           barray -= bs2;
327         }
328 
329         if (in[j] >= cstart && in[j] < cend) {
330           col  = in[j] - cstart;
331           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
332         } else if (in[j] < 0) continue;
333 #if defined(PETSC_USE_DEBUG)
334         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
335 #endif
336         else {
337           if (mat->was_assembled) {
338             if (!baij->colmap) {
339               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
340             }
341 
342 #if defined(PETSC_USE_DEBUG)
343 #if defined(PETSC_USE_CTABLE)
344             { PetscInt data;
345               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
346               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
347             }
348 #else
349             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
350 #endif
351 #endif
352 #if defined(PETSC_USE_CTABLE)
353             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
354             col  = (col - 1)/bs;
355 #else
356             col = (baij->colmap[in[j]] - 1)/bs;
357 #endif
358             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
359               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
360               col  =  in[j];
361             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]);
362           } else col = in[j];
363           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
364         }
365       }
366     } else {
367       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
368       if (!baij->donotstash) {
369         if (roworiented) {
370           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
371         } else {
372           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
373         }
374       }
375     }
376   }
377   PetscFunctionReturn(0);
378 }
379 
380 #define HASH_KEY 0.6180339887
381 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
382 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
383 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
384 #undef __FUNCT__
385 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
386 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
387 {
388   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
389   PetscBool      roworiented = baij->roworiented;
390   PetscErrorCode ierr;
391   PetscInt       i,j,row,col;
392   PetscInt       rstart_orig=mat->rmap->rstart;
393   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
394   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
395   PetscReal      tmp;
396   MatScalar      **HD = baij->hd,value;
397 #if defined(PETSC_USE_DEBUG)
398   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
399 #endif
400 
401   PetscFunctionBegin;
402   if (v) PetscValidScalarPointer(v,6);
403   for (i=0; i<m; i++) {
404 #if defined(PETSC_USE_DEBUG)
405     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
406     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
407 #endif
408     row = im[i];
409     if (row >= rstart_orig && row < rend_orig) {
410       for (j=0; j<n; j++) {
411         col = in[j];
412         if (roworiented) value = v[i*n+j];
413         else             value = v[i+j*m];
414         /* Look up PetscInto the Hash Table */
415         key = (row/bs)*Nbs+(col/bs)+1;
416         h1  = HASH(size,key,tmp);
417 
418 
419         idx = h1;
420 #if defined(PETSC_USE_DEBUG)
421         insert_ct++;
422         total_ct++;
423         if (HT[idx] != key) {
424           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
425           if (idx == size) {
426             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
427             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
428           }
429         }
430 #else
431         if (HT[idx] != key) {
432           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
433           if (idx == size) {
434             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
435             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
436           }
437         }
438 #endif
439         /* A HASH table entry is found, so insert the values at the correct address */
440         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
441         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
442       }
443     } else if (!baij->donotstash) {
444       if (roworiented) {
445         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
446       } else {
447         ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
448       }
449     }
450   }
451 #if defined(PETSC_USE_DEBUG)
452   baij->ht_total_ct  = total_ct;
453   baij->ht_insert_ct = insert_ct;
454 #endif
455   PetscFunctionReturn(0);
456 }
457 
458 #undef __FUNCT__
459 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
460 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
461 {
462   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
463   PetscBool         roworiented = baij->roworiented;
464   PetscErrorCode    ierr;
465   PetscInt          i,j,ii,jj,row,col;
466   PetscInt          rstart=baij->rstartbs;
467   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
468   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
469   PetscReal         tmp;
470   MatScalar         **HD = baij->hd,*baij_a;
471   const PetscScalar *v_t,*value;
472 #if defined(PETSC_USE_DEBUG)
473   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
474 #endif
475 
476   PetscFunctionBegin;
477   if (roworiented) stepval = (n-1)*bs;
478   else stepval = (m-1)*bs;
479 
480   for (i=0; i<m; i++) {
481 #if defined(PETSC_USE_DEBUG)
482     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
483     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
484 #endif
485     row = im[i];
486     v_t = v + i*nbs2;
487     if (row >= rstart && row < rend) {
488       for (j=0; j<n; j++) {
489         col = in[j];
490 
491         /* Look up into the Hash Table */
492         key = row*Nbs+col+1;
493         h1  = HASH(size,key,tmp);
494 
495         idx = h1;
496 #if defined(PETSC_USE_DEBUG)
497         total_ct++;
498         insert_ct++;
499         if (HT[idx] != key) {
500           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
501           if (idx == size) {
502             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
503             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
504           }
505         }
506 #else
507         if (HT[idx] != key) {
508           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
509           if (idx == size) {
510             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
511             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
512           }
513         }
514 #endif
515         baij_a = HD[idx];
516         if (roworiented) {
517           /*value = v + i*(stepval+bs)*bs + j*bs;*/
518           /* value = v + (i*(stepval+bs)+j)*bs; */
519           value = v_t;
520           v_t  += bs;
521           if (addv == ADD_VALUES) {
522             for (ii=0; ii<bs; ii++,value+=stepval) {
523               for (jj=ii; jj<bs2; jj+=bs) {
524                 baij_a[jj] += *value++;
525               }
526             }
527           } else {
528             for (ii=0; ii<bs; ii++,value+=stepval) {
529               for (jj=ii; jj<bs2; jj+=bs) {
530                 baij_a[jj] = *value++;
531               }
532             }
533           }
534         } else {
535           value = v + j*(stepval+bs)*bs + i*bs;
536           if (addv == ADD_VALUES) {
537             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
538               for (jj=0; jj<bs; jj++) {
539                 baij_a[jj] += *value++;
540               }
541             }
542           } else {
543             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
544               for (jj=0; jj<bs; jj++) {
545                 baij_a[jj] = *value++;
546               }
547             }
548           }
549         }
550       }
551     } else {
552       if (!baij->donotstash) {
553         if (roworiented) {
554           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
555         } else {
556           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
557         }
558       }
559     }
560   }
561 #if defined(PETSC_USE_DEBUG)
562   baij->ht_total_ct  = total_ct;
563   baij->ht_insert_ct = insert_ct;
564 #endif
565   PetscFunctionReturn(0);
566 }
567 
568 #undef __FUNCT__
569 #define __FUNCT__ "MatGetValues_MPIBAIJ"
570 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
571 {
572   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
573   PetscErrorCode ierr;
574   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
575   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
576 
577   PetscFunctionBegin;
578   for (i=0; i<m; i++) {
579     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
580     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
581     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
582       row = idxm[i] - bsrstart;
583       for (j=0; j<n; j++) {
584         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
585         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
586         if (idxn[j] >= bscstart && idxn[j] < bscend) {
587           col  = idxn[j] - bscstart;
588           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
589         } else {
590           if (!baij->colmap) {
591             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
592           }
593 #if defined(PETSC_USE_CTABLE)
594           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
595           data--;
596 #else
597           data = baij->colmap[idxn[j]/bs]-1;
598 #endif
599           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
600           else {
601             col  = data + idxn[j]%bs;
602             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
603           }
604         }
605       }
606     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
607   }
608   PetscFunctionReturn(0);
609 }
610 
611 #undef __FUNCT__
612 #define __FUNCT__ "MatNorm_MPIBAIJ"
613 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
614 {
615   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
616   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
617   PetscErrorCode ierr;
618   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
619   PetscReal      sum = 0.0;
620   MatScalar      *v;
621 
622   PetscFunctionBegin;
623   if (baij->size == 1) {
624     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
625   } else {
626     if (type == NORM_FROBENIUS) {
627       v  = amat->a;
628       nz = amat->nz*bs2;
629       for (i=0; i<nz; i++) {
630         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
631       }
632       v  = bmat->a;
633       nz = bmat->nz*bs2;
634       for (i=0; i<nz; i++) {
635         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
636       }
637       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
638       *nrm = PetscSqrtReal(*nrm);
639     } else if (type == NORM_1) { /* max column sum */
640       PetscReal *tmp,*tmp2;
641       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
642       ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
643       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
644       v    = amat->a; jj = amat->j;
645       for (i=0; i<amat->nz; i++) {
646         for (j=0; j<bs; j++) {
647           col = bs*(cstart + *jj) + j; /* column index */
648           for (row=0; row<bs; row++) {
649             tmp[col] += PetscAbsScalar(*v);  v++;
650           }
651         }
652         jj++;
653       }
654       v = bmat->a; jj = bmat->j;
655       for (i=0; i<bmat->nz; i++) {
656         for (j=0; j<bs; j++) {
657           col = bs*garray[*jj] + j;
658           for (row=0; row<bs; row++) {
659             tmp[col] += PetscAbsScalar(*v); v++;
660           }
661         }
662         jj++;
663       }
664       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
665       *nrm = 0.0;
666       for (j=0; j<mat->cmap->N; j++) {
667         if (tmp2[j] > *nrm) *nrm = tmp2[j];
668       }
669       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
670     } else if (type == NORM_INFINITY) { /* max row sum */
671       PetscReal *sums;
672       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr);
673       sum  = 0.0;
674       for (j=0; j<amat->mbs; j++) {
675         for (row=0; row<bs; row++) sums[row] = 0.0;
676         v  = amat->a + bs2*amat->i[j];
677         nz = amat->i[j+1]-amat->i[j];
678         for (i=0; i<nz; i++) {
679           for (col=0; col<bs; col++) {
680             for (row=0; row<bs; row++) {
681               sums[row] += PetscAbsScalar(*v); v++;
682             }
683           }
684         }
685         v  = bmat->a + bs2*bmat->i[j];
686         nz = bmat->i[j+1]-bmat->i[j];
687         for (i=0; i<nz; i++) {
688           for (col=0; col<bs; col++) {
689             for (row=0; row<bs; row++) {
690               sums[row] += PetscAbsScalar(*v); v++;
691             }
692           }
693         }
694         for (row=0; row<bs; row++) {
695           if (sums[row] > sum) sum = sums[row];
696         }
697       }
698       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
699       ierr = PetscFree(sums);CHKERRQ(ierr);
700     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
701   }
702   PetscFunctionReturn(0);
703 }
704 
705 /*
706   Creates the hash table, and sets the table
707   This table is created only once.
708   If new entried need to be added to the matrix
709   then the hash table has to be destroyed and
710   recreated.
711 */
712 #undef __FUNCT__
713 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
714 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
715 {
716   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
717   Mat            A     = baij->A,B=baij->B;
718   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
719   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
720   PetscErrorCode ierr;
721   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
722   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
723   PetscInt       *HT,key;
724   MatScalar      **HD;
725   PetscReal      tmp;
726 #if defined(PETSC_USE_INFO)
727   PetscInt ct=0,max=0;
728 #endif
729 
730   PetscFunctionBegin;
731   if (baij->ht) PetscFunctionReturn(0);
732 
733   baij->ht_size = (PetscInt)(factor*nz);
734   ht_size       = baij->ht_size;
735 
736   /* Allocate Memory for Hash Table */
737   ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr);
738   ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr);
739   ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr);
740   HD   = baij->hd;
741   HT   = baij->ht;
742 
743   /* Loop Over A */
744   for (i=0; i<a->mbs; i++) {
745     for (j=ai[i]; j<ai[i+1]; j++) {
746       row = i+rstart;
747       col = aj[j]+cstart;
748 
749       key = row*Nbs + col + 1;
750       h1  = HASH(ht_size,key,tmp);
751       for (k=0; k<ht_size; k++) {
752         if (!HT[(h1+k)%ht_size]) {
753           HT[(h1+k)%ht_size] = key;
754           HD[(h1+k)%ht_size] = a->a + j*bs2;
755           break;
756 #if defined(PETSC_USE_INFO)
757         } else {
758           ct++;
759 #endif
760         }
761       }
762 #if defined(PETSC_USE_INFO)
763       if (k> max) max = k;
764 #endif
765     }
766   }
767   /* Loop Over B */
768   for (i=0; i<b->mbs; i++) {
769     for (j=bi[i]; j<bi[i+1]; j++) {
770       row = i+rstart;
771       col = garray[bj[j]];
772       key = row*Nbs + col + 1;
773       h1  = HASH(ht_size,key,tmp);
774       for (k=0; k<ht_size; k++) {
775         if (!HT[(h1+k)%ht_size]) {
776           HT[(h1+k)%ht_size] = key;
777           HD[(h1+k)%ht_size] = b->a + j*bs2;
778           break;
779 #if defined(PETSC_USE_INFO)
780         } else {
781           ct++;
782 #endif
783         }
784       }
785 #if defined(PETSC_USE_INFO)
786       if (k> max) max = k;
787 #endif
788     }
789   }
790 
791   /* Print Summary */
792 #if defined(PETSC_USE_INFO)
793   for (i=0,j=0; i<ht_size; i++) {
794     if (HT[i]) j++;
795   }
796   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
797 #endif
798   PetscFunctionReturn(0);
799 }
800 
801 #undef __FUNCT__
802 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
803 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
804 {
805   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
806   PetscErrorCode ierr;
807   PetscInt       nstash,reallocs;
808   InsertMode     addv;
809 
810   PetscFunctionBegin;
811   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
812 
813   /* make sure all processors are either in INSERTMODE or ADDMODE */
814   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
815   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
816   mat->insertmode = addv; /* in case this processor had no cache */
817 
818   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
819   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
820   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
821   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
822   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
823   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
824   PetscFunctionReturn(0);
825 }
826 
827 #undef __FUNCT__
828 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
829 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
830 {
831   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
832   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
833   PetscErrorCode ierr;
834   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
835   PetscInt       *row,*col;
836   PetscBool      r1,r2,r3,other_disassembled;
837   MatScalar      *val;
838   InsertMode     addv = mat->insertmode;
839   PetscMPIInt    n;
840 
841   PetscFunctionBegin;
842   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
843   if (!baij->donotstash && !mat->nooffprocentries) {
844     while (1) {
845       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
846       if (!flg) break;
847 
848       for (i=0; i<n;) {
849         /* Now identify the consecutive vals belonging to the same row */
850         for (j=i,rstart=row[j]; j<n; j++) {
851           if (row[j] != rstart) break;
852         }
853         if (j < n) ncols = j-i;
854         else       ncols = n-i;
855         /* Now assemble all these values with a single function call */
856         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
857         i    = j;
858       }
859     }
860     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
861     /* Now process the block-stash. Since the values are stashed column-oriented,
862        set the roworiented flag to column oriented, and after MatSetValues()
863        restore the original flags */
864     r1 = baij->roworiented;
865     r2 = a->roworiented;
866     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
867 
868     baij->roworiented = PETSC_FALSE;
869     a->roworiented    = PETSC_FALSE;
870 
871     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
872     while (1) {
873       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
874       if (!flg) break;
875 
876       for (i=0; i<n;) {
877         /* Now identify the consecutive vals belonging to the same row */
878         for (j=i,rstart=row[j]; j<n; j++) {
879           if (row[j] != rstart) break;
880         }
881         if (j < n) ncols = j-i;
882         else       ncols = n-i;
883         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
884         i    = j;
885       }
886     }
887     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
888 
889     baij->roworiented = r1;
890     a->roworiented    = r2;
891 
892     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
893   }
894 
895   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
896   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
897 
898   /* determine if any processor has disassembled, if so we must
899      also disassemble ourselfs, in order that we may reassemble. */
900   /*
901      if nonzero structure of submatrix B cannot change then we know that
902      no processor disassembled thus we can skip this stuff
903   */
904   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
905     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
906     if (mat->was_assembled && !other_disassembled) {
907       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
908     }
909   }
910 
911   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
912     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
913   }
914   ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
915   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
916   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
917 
918 #if defined(PETSC_USE_INFO)
919   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
920     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
921 
922     baij->ht_total_ct  = 0;
923     baij->ht_insert_ct = 0;
924   }
925 #endif
926   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
927     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
928 
929     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
930     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
931   }
932 
933   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
934 
935   baij->rowvalues = 0;
936   PetscFunctionReturn(0);
937 }
938 
939 #undef __FUNCT__
940 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
941 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
942 {
943   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
944   PetscErrorCode    ierr;
945   PetscMPIInt       size = baij->size,rank = baij->rank;
946   PetscInt          bs   = mat->rmap->bs;
947   PetscBool         iascii,isdraw;
948   PetscViewer       sviewer;
949   PetscViewerFormat format;
950 
951   PetscFunctionBegin;
952   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
953   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
954   if (iascii) {
955     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
956     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
957       MatInfo info;
958       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
959       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
960       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
961       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
962                                                 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
963       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
964       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
965       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
966       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
967       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
968       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
969       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
970       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
971       PetscFunctionReturn(0);
972     } else if (format == PETSC_VIEWER_ASCII_INFO) {
973       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
974       PetscFunctionReturn(0);
975     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
976       PetscFunctionReturn(0);
977     }
978   }
979 
980   if (isdraw) {
981     PetscDraw draw;
982     PetscBool isnull;
983     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
984     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
985   }
986 
987   if (size == 1) {
988     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
989     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
990   } else {
991     /* assemble the entire matrix onto first processor. */
992     Mat         A;
993     Mat_SeqBAIJ *Aloc;
994     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
995     MatScalar   *a;
996 
997     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
998     /* Perhaps this should be the type of mat? */
999     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1000     if (!rank) {
1001       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1002     } else {
1003       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1004     }
1005     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1006     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
1007     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1008     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1009 
1010     /* copy over the A part */
1011     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1012     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1013     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1014 
1015     for (i=0; i<mbs; i++) {
1016       rvals[0] = bs*(baij->rstartbs + i);
1017       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1018       for (j=ai[i]; j<ai[i+1]; j++) {
1019         col = (baij->cstartbs+aj[j])*bs;
1020         for (k=0; k<bs; k++) {
1021           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1022           col++; a += bs;
1023         }
1024       }
1025     }
1026     /* copy over the B part */
1027     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1028     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1029     for (i=0; i<mbs; i++) {
1030       rvals[0] = bs*(baij->rstartbs + i);
1031       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1032       for (j=ai[i]; j<ai[i+1]; j++) {
1033         col = baij->garray[aj[j]]*bs;
1034         for (k=0; k<bs; k++) {
1035           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1036           col++; a += bs;
1037         }
1038       }
1039     }
1040     ierr = PetscFree(rvals);CHKERRQ(ierr);
1041     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1042     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1043     /*
1044        Everyone has to call to draw the matrix since the graphics waits are
1045        synchronized across all processors that share the PetscDraw object
1046     */
1047     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1048     if (!rank) {
1049       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1050       /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/
1051       PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ);
1052       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1053     }
1054     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1055     ierr = MatDestroy(&A);CHKERRQ(ierr);
1056   }
1057   PetscFunctionReturn(0);
1058 }
1059 
1060 #undef __FUNCT__
1061 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1062 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1063 {
1064   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1065   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)a->A->data;
1066   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
1067   PetscErrorCode ierr;
1068   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1069   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1070   int            fd;
1071   PetscScalar    *column_values;
1072   FILE           *file;
1073   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1074   PetscInt       message_count,flowcontrolcount;
1075 
1076   PetscFunctionBegin;
1077   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1078   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1079   nz   = bs2*(A->nz + B->nz);
1080   rlen = mat->rmap->n;
1081   if (!rank) {
1082     header[0] = MAT_FILE_CLASSID;
1083     header[1] = mat->rmap->N;
1084     header[2] = mat->cmap->N;
1085 
1086     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1087     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1088     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1089     /* get largest number of rows any processor has */
1090     range = mat->rmap->range;
1091     for (i=1; i<size; i++) {
1092       rlen = PetscMax(rlen,range[i+1] - range[i]);
1093     }
1094   } else {
1095     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1096   }
1097 
1098   ierr = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr);
1099   /* compute lengths of each row  */
1100   for (i=0; i<a->mbs; i++) {
1101     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1102   }
1103   /* store the row lengths to the file */
1104   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1105   if (!rank) {
1106     MPI_Status status;
1107     ierr = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr);
1108     rlen = (range[1] - range[0])/bs;
1109     for (i=0; i<rlen; i++) {
1110       for (j=0; j<bs; j++) {
1111         row_lens[i*bs+j] = bs*crow_lens[i];
1112       }
1113     }
1114     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1115     for (i=1; i<size; i++) {
1116       rlen = (range[i+1] - range[i])/bs;
1117       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1118       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1119       for (k=0; k<rlen; k++) {
1120         for (j=0; j<bs; j++) {
1121           row_lens[k*bs+j] = bs*crow_lens[k];
1122         }
1123       }
1124       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1125     }
1126     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1127     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1128   } else {
1129     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1130     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1131     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1132   }
1133   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1134 
1135   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1136      information needed to make it for each row from a block row. This does require more communication but still not more than
1137      the communication needed for the nonzero values  */
1138   nzmax = nz; /*  space a largest processor needs */
1139   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1140   ierr  = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1141   cnt   = 0;
1142   for (i=0; i<a->mbs; i++) {
1143     pcnt = cnt;
1144     for (j=B->i[i]; j<B->i[i+1]; j++) {
1145       if ((col = garray[B->j[j]]) > cstart) break;
1146       for (l=0; l<bs; l++) {
1147         column_indices[cnt++] = bs*col+l;
1148       }
1149     }
1150     for (k=A->i[i]; k<A->i[i+1]; k++) {
1151       for (l=0; l<bs; l++) {
1152         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1153       }
1154     }
1155     for (; j<B->i[i+1]; j++) {
1156       for (l=0; l<bs; l++) {
1157         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1158       }
1159     }
1160     len = cnt - pcnt;
1161     for (k=1; k<bs; k++) {
1162       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1163       cnt += len;
1164     }
1165   }
1166   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1167 
1168   /* store the columns to the file */
1169   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1170   if (!rank) {
1171     MPI_Status status;
1172     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1173     for (i=1; i<size; i++) {
1174       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1175       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1176       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1177       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1178     }
1179     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1180   } else {
1181     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1182     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1183     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1184     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1185   }
1186   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1187 
1188   /* load up the numerical values */
1189   ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1190   cnt  = 0;
1191   for (i=0; i<a->mbs; i++) {
1192     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1193     for (j=B->i[i]; j<B->i[i+1]; j++) {
1194       if (garray[B->j[j]] > cstart) break;
1195       for (l=0; l<bs; l++) {
1196         for (ll=0; ll<bs; ll++) {
1197           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1198         }
1199       }
1200       cnt += bs;
1201     }
1202     for (k=A->i[i]; k<A->i[i+1]; k++) {
1203       for (l=0; l<bs; l++) {
1204         for (ll=0; ll<bs; ll++) {
1205           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1206         }
1207       }
1208       cnt += bs;
1209     }
1210     for (; j<B->i[i+1]; j++) {
1211       for (l=0; l<bs; l++) {
1212         for (ll=0; ll<bs; ll++) {
1213           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1214         }
1215       }
1216       cnt += bs;
1217     }
1218     cnt += (bs-1)*rlen;
1219   }
1220   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1221 
1222   /* store the column values to the file */
1223   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1224   if (!rank) {
1225     MPI_Status status;
1226     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1227     for (i=1; i<size; i++) {
1228       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1229       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1230       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1231       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1232     }
1233     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1234   } else {
1235     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1236     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1237     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1238     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1239   }
1240   ierr = PetscFree(column_values);CHKERRQ(ierr);
1241 
1242   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1243   if (file) {
1244     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1245   }
1246   PetscFunctionReturn(0);
1247 }
1248 
1249 #undef __FUNCT__
1250 #define __FUNCT__ "MatView_MPIBAIJ"
1251 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1252 {
1253   PetscErrorCode ierr;
1254   PetscBool      iascii,isdraw,issocket,isbinary;
1255 
1256   PetscFunctionBegin;
1257   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1258   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1259   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1260   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1261   if (iascii || isdraw || issocket) {
1262     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1263   } else if (isbinary) {
1264     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1265   }
1266   PetscFunctionReturn(0);
1267 }
1268 
1269 #undef __FUNCT__
1270 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1271 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1272 {
1273   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1274   PetscErrorCode ierr;
1275 
1276   PetscFunctionBegin;
1277 #if defined(PETSC_USE_LOG)
1278   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1279 #endif
1280   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1281   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1282   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1283   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1284 #if defined(PETSC_USE_CTABLE)
1285   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1286 #else
1287   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1288 #endif
1289   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1290   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1291   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1292   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1293   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1294   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1295   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1296   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1297 
1298   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",NULL);CHKERRQ(ierr);
1304   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",NULL);CHKERRQ(ierr);
1305   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",NULL);CHKERRQ(ierr);
1306   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C","",NULL);CHKERRQ(ierr);
1307   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C","",NULL);CHKERRQ(ierr);
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 #undef __FUNCT__
1312 #define __FUNCT__ "MatMult_MPIBAIJ"
1313 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1314 {
1315   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1316   PetscErrorCode ierr;
1317   PetscInt       nt;
1318 
1319   PetscFunctionBegin;
1320   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1321   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1322   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1323   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1324   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1325   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1326   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1327   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1328   PetscFunctionReturn(0);
1329 }
1330 
1331 #undef __FUNCT__
1332 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1333 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1334 {
1335   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1336   PetscErrorCode ierr;
1337 
1338   PetscFunctionBegin;
1339   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1340   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1341   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1342   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1343   PetscFunctionReturn(0);
1344 }
1345 
1346 #undef __FUNCT__
1347 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1348 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1349 {
1350   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1351   PetscErrorCode ierr;
1352   PetscBool      merged;
1353 
1354   PetscFunctionBegin;
1355   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1356   /* do nondiagonal part */
1357   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1358   if (!merged) {
1359     /* send it on its way */
1360     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1361     /* do local part */
1362     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1363     /* receive remote parts: note this assumes the values are not actually */
1364     /* inserted in yy until the next line */
1365     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1366   } else {
1367     /* do local part */
1368     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1369     /* send it on its way */
1370     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1371     /* values actually were received in the Begin() but we need to call this nop */
1372     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1373   }
1374   PetscFunctionReturn(0);
1375 }
1376 
1377 #undef __FUNCT__
1378 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1379 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1380 {
1381   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1382   PetscErrorCode ierr;
1383 
1384   PetscFunctionBegin;
1385   /* do nondiagonal part */
1386   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1387   /* send it on its way */
1388   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1389   /* do local part */
1390   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1391   /* receive remote parts: note this assumes the values are not actually */
1392   /* inserted in yy until the next line, which is true for my implementation*/
1393   /* but is not perhaps always true. */
1394   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1395   PetscFunctionReturn(0);
1396 }
1397 
1398 /*
1399   This only works correctly for square matrices where the subblock A->A is the
1400    diagonal block
1401 */
1402 #undef __FUNCT__
1403 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1404 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1405 {
1406   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1407   PetscErrorCode ierr;
1408 
1409   PetscFunctionBegin;
1410   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1411   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1412   PetscFunctionReturn(0);
1413 }
1414 
1415 #undef __FUNCT__
1416 #define __FUNCT__ "MatScale_MPIBAIJ"
1417 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1418 {
1419   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1420   PetscErrorCode ierr;
1421 
1422   PetscFunctionBegin;
1423   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1424   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 #undef __FUNCT__
1429 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1430 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1431 {
1432   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1433   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1434   PetscErrorCode ierr;
1435   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1436   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1437   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1438 
1439   PetscFunctionBegin;
1440   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1441   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1442   mat->getrowactive = PETSC_TRUE;
1443 
1444   if (!mat->rowvalues && (idx || v)) {
1445     /*
1446         allocate enough space to hold information from the longest row.
1447     */
1448     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1449     PetscInt    max = 1,mbs = mat->mbs,tmp;
1450     for (i=0; i<mbs; i++) {
1451       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1452       if (max < tmp) max = tmp;
1453     }
1454     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1455   }
1456   lrow = row - brstart;
1457 
1458   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1459   if (!v)   {pvA = 0; pvB = 0;}
1460   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1461   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1462   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1463   nztot = nzA + nzB;
1464 
1465   cmap = mat->garray;
1466   if (v  || idx) {
1467     if (nztot) {
1468       /* Sort by increasing column numbers, assuming A and B already sorted */
1469       PetscInt imark = -1;
1470       if (v) {
1471         *v = v_p = mat->rowvalues;
1472         for (i=0; i<nzB; i++) {
1473           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1474           else break;
1475         }
1476         imark = i;
1477         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1478         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1479       }
1480       if (idx) {
1481         *idx = idx_p = mat->rowindices;
1482         if (imark > -1) {
1483           for (i=0; i<imark; i++) {
1484             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1485           }
1486         } else {
1487           for (i=0; i<nzB; i++) {
1488             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1489             else break;
1490           }
1491           imark = i;
1492         }
1493         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1494         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1495       }
1496     } else {
1497       if (idx) *idx = 0;
1498       if (v)   *v   = 0;
1499     }
1500   }
1501   *nz  = nztot;
1502   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1503   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1504   PetscFunctionReturn(0);
1505 }
1506 
1507 #undef __FUNCT__
1508 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1509 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1510 {
1511   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1512 
1513   PetscFunctionBegin;
1514   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1515   baij->getrowactive = PETSC_FALSE;
1516   PetscFunctionReturn(0);
1517 }
1518 
1519 #undef __FUNCT__
1520 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1521 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1522 {
1523   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1524   PetscErrorCode ierr;
1525 
1526   PetscFunctionBegin;
1527   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1528   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1529   PetscFunctionReturn(0);
1530 }
1531 
1532 #undef __FUNCT__
1533 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1534 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1535 {
1536   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1537   Mat            A  = a->A,B = a->B;
1538   PetscErrorCode ierr;
1539   PetscReal      isend[5],irecv[5];
1540 
1541   PetscFunctionBegin;
1542   info->block_size = (PetscReal)matin->rmap->bs;
1543 
1544   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1545 
1546   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1547   isend[3] = info->memory;  isend[4] = info->mallocs;
1548 
1549   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1550 
1551   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1552   isend[3] += info->memory;  isend[4] += info->mallocs;
1553 
1554   if (flag == MAT_LOCAL) {
1555     info->nz_used      = isend[0];
1556     info->nz_allocated = isend[1];
1557     info->nz_unneeded  = isend[2];
1558     info->memory       = isend[3];
1559     info->mallocs      = isend[4];
1560   } else if (flag == MAT_GLOBAL_MAX) {
1561     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1562 
1563     info->nz_used      = irecv[0];
1564     info->nz_allocated = irecv[1];
1565     info->nz_unneeded  = irecv[2];
1566     info->memory       = irecv[3];
1567     info->mallocs      = irecv[4];
1568   } else if (flag == MAT_GLOBAL_SUM) {
1569     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1570 
1571     info->nz_used      = irecv[0];
1572     info->nz_allocated = irecv[1];
1573     info->nz_unneeded  = irecv[2];
1574     info->memory       = irecv[3];
1575     info->mallocs      = irecv[4];
1576   } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1577   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1578   info->fill_ratio_needed = 0;
1579   info->factor_mallocs    = 0;
1580   PetscFunctionReturn(0);
1581 }
1582 
1583 #undef __FUNCT__
1584 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1585 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1586 {
1587   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1588   PetscErrorCode ierr;
1589 
1590   PetscFunctionBegin;
1591   switch (op) {
1592   case MAT_NEW_NONZERO_LOCATIONS:
1593   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1594   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1595   case MAT_KEEP_NONZERO_PATTERN:
1596   case MAT_NEW_NONZERO_LOCATION_ERR:
1597     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1598     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1599     break;
1600   case MAT_ROW_ORIENTED:
1601     a->roworiented = flg;
1602 
1603     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1604     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1605     break;
1606   case MAT_NEW_DIAGONALS:
1607     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1608     break;
1609   case MAT_IGNORE_OFF_PROC_ENTRIES:
1610     a->donotstash = flg;
1611     break;
1612   case MAT_USE_HASH_TABLE:
1613     a->ht_flag = flg;
1614     break;
1615   case MAT_SYMMETRIC:
1616   case MAT_STRUCTURALLY_SYMMETRIC:
1617   case MAT_HERMITIAN:
1618   case MAT_SYMMETRY_ETERNAL:
1619     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1620     break;
1621   default:
1622     SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1623   }
1624   PetscFunctionReturn(0);
1625 }
1626 
1627 #undef __FUNCT__
1628 #define __FUNCT__ "MatTranspose_MPIBAIJ"
1629 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1630 {
1631   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1632   Mat_SeqBAIJ    *Aloc;
1633   Mat            B;
1634   PetscErrorCode ierr;
1635   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1636   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1637   MatScalar      *a;
1638 
1639   PetscFunctionBegin;
1640   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1641   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1642     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1643     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1644     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1645     /* Do not know preallocation information, but must set block size */
1646     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr);
1647   } else {
1648     B = *matout;
1649   }
1650 
1651   /* copy over the A part */
1652   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1653   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1654   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1655 
1656   for (i=0; i<mbs; i++) {
1657     rvals[0] = bs*(baij->rstartbs + i);
1658     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1659     for (j=ai[i]; j<ai[i+1]; j++) {
1660       col = (baij->cstartbs+aj[j])*bs;
1661       for (k=0; k<bs; k++) {
1662         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1663 
1664         col++; a += bs;
1665       }
1666     }
1667   }
1668   /* copy over the B part */
1669   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1670   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1671   for (i=0; i<mbs; i++) {
1672     rvals[0] = bs*(baij->rstartbs + i);
1673     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1674     for (j=ai[i]; j<ai[i+1]; j++) {
1675       col = baij->garray[aj[j]]*bs;
1676       for (k=0; k<bs; k++) {
1677         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1678         col++;
1679         a += bs;
1680       }
1681     }
1682   }
1683   ierr = PetscFree(rvals);CHKERRQ(ierr);
1684   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1685   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1686 
1687   if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B;
1688   else {
1689     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1690   }
1691   PetscFunctionReturn(0);
1692 }
1693 
1694 #undef __FUNCT__
1695 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1696 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1697 {
1698   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1699   Mat            a     = baij->A,b = baij->B;
1700   PetscErrorCode ierr;
1701   PetscInt       s1,s2,s3;
1702 
1703   PetscFunctionBegin;
1704   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1705   if (rr) {
1706     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1707     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1708     /* Overlap communication with computation. */
1709     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1710   }
1711   if (ll) {
1712     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1713     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1714     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1715   }
1716   /* scale  the diagonal block */
1717   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1718 
1719   if (rr) {
1720     /* Do a scatter end and then right scale the off-diagonal block */
1721     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1722     ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1723   }
1724   PetscFunctionReturn(0);
1725 }
1726 
1727 #undef __FUNCT__
1728 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1729 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1730 {
1731   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1732   PetscErrorCode    ierr;
1733   PetscMPIInt       imdex,size = l->size,n,rank = l->rank;
1734   PetscInt          i,*owners = A->rmap->range;
1735   PetscInt          *nprocs,j,idx,nsends,row;
1736   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
1737   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1738   PetscInt          *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1739   MPI_Comm          comm;
1740   MPI_Request       *send_waits,*recv_waits;
1741   MPI_Status        recv_status,*send_status;
1742   const PetscScalar *xx;
1743   PetscScalar       *bb;
1744 #if defined(PETSC_DEBUG)
1745   PetscBool         found = PETSC_FALSE;
1746 #endif
1747 
1748   PetscFunctionBegin;
1749   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1750   /*  first count number of contributors to each processor */
1751   ierr = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1752   ierr = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1753   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr);  /* see note*/
1754   j    = 0;
1755   for (i=0; i<N; i++) {
1756     if (lastidx > (idx = rows[i])) j = 0;
1757     lastidx = idx;
1758     for (; j<size; j++) {
1759       if (idx >= owners[j] && idx < owners[j+1]) {
1760         nprocs[2*j]++;
1761         nprocs[2*j+1] = 1;
1762         owner[i]      = j;
1763 #if defined(PETSC_DEBUG)
1764         found = PETSC_TRUE;
1765 #endif
1766         break;
1767       }
1768     }
1769 #if defined(PETSC_DEBUG)
1770     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1771     found = PETSC_FALSE;
1772 #endif
1773   }
1774   nsends = 0;  for (i=0; i<size; i++) nsends += nprocs[2*i+1];
1775 
1776   if (A->nooffproczerorows) {
1777     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
1778     nrecvs = nsends;
1779     nmax   = N;
1780   } else {
1781     /* inform other processors of number of messages and max length*/
1782     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1783   }
1784 
1785   /* post receives:   */
1786   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1787   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1788   for (i=0; i<nrecvs; i++) {
1789     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1790   }
1791 
1792   /* do sends:
1793      1) starts[i] gives the starting index in svalues for stuff going to
1794      the ith processor
1795   */
1796   ierr      = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1797   ierr      = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1798   ierr      = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1799   starts[0] = 0;
1800   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
1801   for (i=0; i<N; i++) {
1802     svalues[starts[owner[i]]++] = rows[i];
1803   }
1804 
1805   starts[0] = 0;
1806   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[2*i-2];
1807   count = 0;
1808   for (i=0; i<size; i++) {
1809     if (nprocs[2*i+1]) {
1810       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1811     }
1812   }
1813   ierr = PetscFree(starts);CHKERRQ(ierr);
1814 
1815   base = owners[rank];
1816 
1817   /*  wait on receives */
1818   ierr  = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr);
1819   count = nrecvs;
1820   slen  = 0;
1821   while (count) {
1822     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1823     /* unpack receives into our local space */
1824     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1825 
1826     source[imdex] = recv_status.MPI_SOURCE;
1827     lens[imdex]   = n;
1828     slen         += n;
1829     count--;
1830   }
1831   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1832 
1833   /* move the data into the send scatter */
1834   ierr  = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1835   count = 0;
1836   for (i=0; i<nrecvs; i++) {
1837     values = rvalues + i*nmax;
1838     for (j=0; j<lens[i]; j++) {
1839       lrows[count++] = values[j] - base;
1840     }
1841   }
1842   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1843   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1844   ierr = PetscFree(owner);CHKERRQ(ierr);
1845   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1846 
1847   /* fix right hand side if needed */
1848   if (x && b) {
1849     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1850     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1851     for (i=0; i<slen; i++) {
1852       bb[lrows[i]] = diag*xx[lrows[i]];
1853     }
1854     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1855     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1856   }
1857 
1858   /* actually zap the local rows */
1859   /*
1860         Zero the required rows. If the "diagonal block" of the matrix
1861      is square and the user wishes to set the diagonal we use separate
1862      code so that MatSetValues() is not called for each diagonal allocating
1863      new memory, thus calling lots of mallocs and slowing things down.
1864 
1865   */
1866   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1867   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1868   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1869     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
1870   } else if (diag != 0.0) {
1871     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1872     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1873        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1874     for (i=0; i<slen; i++) {
1875       row  = lrows[i] + rstart_bs;
1876       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1877     }
1878     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1879     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1880   } else {
1881     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1882   }
1883 
1884   ierr = PetscFree(lrows);CHKERRQ(ierr);
1885 
1886   /* wait on sends */
1887   if (nsends) {
1888     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1889     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1890     ierr = PetscFree(send_status);CHKERRQ(ierr);
1891   }
1892   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1893   ierr = PetscFree(svalues);CHKERRQ(ierr);
1894   PetscFunctionReturn(0);
1895 }
1896 
1897 #undef __FUNCT__
1898 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1899 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1900 {
1901   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1902   PetscErrorCode ierr;
1903 
1904   PetscFunctionBegin;
1905   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1906   PetscFunctionReturn(0);
1907 }
1908 
1909 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
1910 
1911 #undef __FUNCT__
1912 #define __FUNCT__ "MatEqual_MPIBAIJ"
1913 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1914 {
1915   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1916   Mat            a,b,c,d;
1917   PetscBool      flg;
1918   PetscErrorCode ierr;
1919 
1920   PetscFunctionBegin;
1921   a = matA->A; b = matA->B;
1922   c = matB->A; d = matB->B;
1923 
1924   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1925   if (flg) {
1926     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1927   }
1928   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1929   PetscFunctionReturn(0);
1930 }
1931 
1932 #undef __FUNCT__
1933 #define __FUNCT__ "MatCopy_MPIBAIJ"
1934 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1935 {
1936   PetscErrorCode ierr;
1937   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1938   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
1939 
1940   PetscFunctionBegin;
1941   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1942   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1943     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1944   } else {
1945     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1946     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1947   }
1948   PetscFunctionReturn(0);
1949 }
1950 
1951 #undef __FUNCT__
1952 #define __FUNCT__ "MatSetUp_MPIBAIJ"
1953 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1954 {
1955   PetscErrorCode ierr;
1956 
1957   PetscFunctionBegin;
1958   ierr =  MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1959   PetscFunctionReturn(0);
1960 }
1961 
1962 #undef __FUNCT__
1963 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1964 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1965 {
1966   PetscErrorCode ierr;
1967   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
1968   PetscBLASInt   bnz,one=1;
1969   Mat_SeqBAIJ    *x,*y;
1970 
1971   PetscFunctionBegin;
1972   if (str == SAME_NONZERO_PATTERN) {
1973     PetscScalar alpha = a;
1974     x    = (Mat_SeqBAIJ*)xx->A->data;
1975     y    = (Mat_SeqBAIJ*)yy->A->data;
1976     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
1977     PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1978     x    = (Mat_SeqBAIJ*)xx->B->data;
1979     y    = (Mat_SeqBAIJ*)yy->B->data;
1980     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
1981     PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1982   } else {
1983     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1984   }
1985   PetscFunctionReturn(0);
1986 }
1987 
1988 #undef __FUNCT__
1989 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1990 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1991 {
1992   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1993   PetscErrorCode ierr;
1994 
1995   PetscFunctionBegin;
1996   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1997   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1998   PetscFunctionReturn(0);
1999 }
2000 
2001 #undef __FUNCT__
2002 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
2003 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2004 {
2005   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2006   PetscErrorCode ierr;
2007 
2008   PetscFunctionBegin;
2009   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2010   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2011   PetscFunctionReturn(0);
2012 }
2013 
2014 #undef __FUNCT__
2015 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
2016 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2017 {
2018   PetscErrorCode ierr;
2019   IS             iscol_local;
2020   PetscInt       csize;
2021 
2022   PetscFunctionBegin;
2023   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2024   if (call == MAT_REUSE_MATRIX) {
2025     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2026     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2027   } else {
2028     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2029   }
2030   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2031   if (call == MAT_INITIAL_MATRIX) {
2032     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2033     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2034   }
2035   PetscFunctionReturn(0);
2036 }
2037 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*);
2038 #undef __FUNCT__
2039 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
2040 /*
2041   Not great since it makes two copies of the submatrix, first an SeqBAIJ
2042   in local and then by concatenating the local matrices the end result.
2043   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
2044 */
2045 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2046 {
2047   PetscErrorCode ierr;
2048   PetscMPIInt    rank,size;
2049   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2050   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow;
2051   Mat            M,Mreuse;
2052   MatScalar      *vwork,*aa;
2053   MPI_Comm       comm;
2054   IS             isrow_new, iscol_new;
2055   PetscBool      idflag,allrows, allcols;
2056   Mat_SeqBAIJ    *aij;
2057 
2058   PetscFunctionBegin;
2059   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2060   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2061   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2062   /* The compression and expansion should be avoided. Doesn't point
2063      out errors, might change the indices, hence buggey */
2064   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
2065   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
2066 
2067   /* Check for special case: each processor gets entire matrix columns */
2068   ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr);
2069   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
2070   if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE;
2071   else allcols = PETSC_FALSE;
2072 
2073   ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr);
2074   ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr);
2075   if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE;
2076   else allrows = PETSC_FALSE;
2077 
2078   if (call ==  MAT_REUSE_MATRIX) {
2079     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
2080     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2081     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2082   } else {
2083     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2084   }
2085   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
2086   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
2087   /*
2088       m - number of local rows
2089       n - number of columns (same on all processors)
2090       rstart - first row in new global matrix generated
2091   */
2092   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2093   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2094   m    = m/bs;
2095   n    = n/bs;
2096 
2097   if (call == MAT_INITIAL_MATRIX) {
2098     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2099     ii  = aij->i;
2100     jj  = aij->j;
2101 
2102     /*
2103         Determine the number of non-zeros in the diagonal and off-diagonal
2104         portions of the matrix in order to do correct preallocation
2105     */
2106 
2107     /* first get start and end of "diagonal" columns */
2108     if (csize == PETSC_DECIDE) {
2109       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2110       if (mglobal == n*bs) { /* square matrix */
2111         nlocal = m;
2112       } else {
2113         nlocal = n/size + ((n % size) > rank);
2114       }
2115     } else {
2116       nlocal = csize/bs;
2117     }
2118     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2119     rstart = rend - nlocal;
2120     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2121 
2122     /* next, compute all the lengths */
2123     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2124     olens = dlens + m;
2125     for (i=0; i<m; i++) {
2126       jend = ii[i+1] - ii[i];
2127       olen = 0;
2128       dlen = 0;
2129       for (j=0; j<jend; j++) {
2130         if (*jj < rstart || *jj >= rend) olen++;
2131         else dlen++;
2132         jj++;
2133       }
2134       olens[i] = olen;
2135       dlens[i] = dlen;
2136     }
2137     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2138     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2139     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2140     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2141     ierr = PetscFree(dlens);CHKERRQ(ierr);
2142   } else {
2143     PetscInt ml,nl;
2144 
2145     M    = *newmat;
2146     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2147     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2148     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2149     /*
2150          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2151        rather than the slower MatSetValues().
2152     */
2153     M->was_assembled = PETSC_TRUE;
2154     M->assembled     = PETSC_FALSE;
2155   }
2156   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2157   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2158   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
2159   ii   = aij->i;
2160   jj   = aij->j;
2161   aa   = aij->a;
2162   for (i=0; i<m; i++) {
2163     row   = rstart/bs + i;
2164     nz    = ii[i+1] - ii[i];
2165     cwork = jj;     jj += nz;
2166     vwork = aa;     aa += nz*bs*bs;
2167     ierr  = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2168   }
2169 
2170   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2171   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2172   *newmat = M;
2173 
2174   /* save submatrix used in processor for next request */
2175   if (call ==  MAT_INITIAL_MATRIX) {
2176     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2177     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2178   }
2179   PetscFunctionReturn(0);
2180 }
2181 
2182 #undef __FUNCT__
2183 #define __FUNCT__ "MatPermute_MPIBAIJ"
2184 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2185 {
2186   MPI_Comm       comm,pcomm;
2187   PetscInt       first,rlocal_size,clocal_size,nrows;
2188   const PetscInt *rows;
2189   PetscMPIInt    size;
2190   IS             crowp,growp,irowp,lrowp,lcolp;
2191   PetscErrorCode ierr;
2192 
2193   PetscFunctionBegin;
2194   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2195   /* make a collective version of 'rowp' */
2196   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2197   if (pcomm==comm) {
2198     crowp = rowp;
2199   } else {
2200     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2201     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2202     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2203     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2204   }
2205   /* collect the global row permutation and invert it */
2206   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
2207   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
2208   if (pcomm!=comm) {
2209     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2210   }
2211   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
2212   ierr = ISDestroy(&growp);CHKERRQ(ierr);
2213   /* get the local target indices */
2214   ierr = MatGetOwnershipRange(A,&first,NULL);CHKERRQ(ierr);
2215   ierr = MatGetLocalSize(A,&rlocal_size,&clocal_size);CHKERRQ(ierr);
2216   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
2217   ierr = ISCreateGeneral(MPI_COMM_SELF,rlocal_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
2218   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
2219   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
2220   /* the column permutation is so much easier;
2221      make a local version of 'colp' and invert it */
2222   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2223   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2224   if (size==1) {
2225     lcolp = colp;
2226   } else {
2227     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2228   }
2229   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2230   /* now we just get the submatrix */
2231   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2232   if (size>1) {
2233     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2234   }
2235   /* clean up */
2236   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
2237   PetscFunctionReturn(0);
2238 }
2239 
2240 #undef __FUNCT__
2241 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2242 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2243 {
2244   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2245   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
2246 
2247   PetscFunctionBegin;
2248   if (nghosts) *nghosts = B->nbs;
2249   if (ghosts) *ghosts = baij->garray;
2250   PetscFunctionReturn(0);
2251 }
2252 
2253 extern PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat);
2254 
2255 #undef __FUNCT__
2256 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ"
2257 /*
2258     This routine is almost identical to MatFDColoringCreate_MPIBAIJ()!
2259 */
2260 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
2261 {
2262   Mat_MPIBAIJ            *baij = (Mat_MPIBAIJ*)mat->data;
2263   PetscErrorCode         ierr;
2264   PetscMPIInt            size,*ncolsonproc,*disp,nn;
2265   PetscInt               bs,i,n,nrows,j,k,m,ncols,col;
2266   const PetscInt         *is,*rows = 0,*A_ci,*A_cj,*B_ci,*B_cj,*ltog;
2267   PetscInt               nis = iscoloring->n,nctot,*cols;
2268   PetscInt               *rowhit,M,cstart,cend,colb;
2269   PetscInt               *columnsforrow,l;
2270   IS                     *isa;
2271   PetscBool              done,flg;
2272   ISLocalToGlobalMapping map = mat->cmap->bmapping;
2273   PetscInt               ctype=c->ctype;
2274 
2275   PetscFunctionBegin;
2276   if (!mat->assembled) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();");
2277   if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock");
2278 
2279   if (map) {ierr = ISLocalToGlobalMappingGetIndices(map,&ltog);CHKERRQ(ierr);}
2280   else     ltog = NULL;
2281   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
2282   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2283 
2284   M         = mat->rmap->n/bs;
2285   cstart    = mat->cmap->rstart/bs;
2286   cend      = mat->cmap->rend/bs;
2287   c->M      = mat->rmap->N/bs;         /* set the global rows and columns and local rows */
2288   c->N      = mat->cmap->N/bs;
2289   c->m      = mat->rmap->n/bs;
2290   c->rstart = mat->rmap->rstart/bs;
2291 
2292   c->ncolors = nis;
2293   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
2294   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
2295   ierr       = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
2296   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
2297   ierr       = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
2298   ierr       = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
2299 
2300   /* Allow access to data structures of local part of matrix */
2301   if (!baij->colmap) {
2302     ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
2303   }
2304   ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2305   ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2306 
2307   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
2308   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
2309 
2310   for (i=0; i<nis; i++) {
2311     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
2312     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
2313 
2314     c->ncolumns[i] = n;
2315     if (n) {
2316       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
2317       ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr);
2318       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
2319     } else {
2320       c->columns[i] = 0;
2321     }
2322 
2323     if (ctype == IS_COLORING_GLOBAL) {
2324       /* Determine the total (parallel) number of columns of this color */
2325       ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
2326       ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr);
2327 
2328       ierr  = PetscMPIIntCast(n,&nn);CHKERRQ(ierr);
2329       ierr  = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2330       nctot = 0; for (j=0; j<size; j++) nctot += ncolsonproc[j];
2331       if (!nctot) {
2332         ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
2333       }
2334 
2335       disp[0] = 0;
2336       for (j=1; j<size; j++) {
2337         disp[j] = disp[j-1] + ncolsonproc[j-1];
2338       }
2339 
2340       /* Get complete list of columns for color on each processor */
2341       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2342       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
2343       ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr);
2344     } else if (ctype == IS_COLORING_GHOSTED) {
2345       /* Determine local number of columns of this color on this process, including ghost points */
2346       nctot = n;
2347       ierr  = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2348       ierr  = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
2349     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
2350 
2351     /*
2352        Mark all rows affect by these columns
2353     */
2354     /* Temporary option to allow for debugging/testing */
2355     flg  = PETSC_FALSE;
2356     ierr = PetscOptionsGetBool(NULL,"-matfdcoloring_slow",&flg,NULL);CHKERRQ(ierr);
2357     if (!flg) { /*-----------------------------------------------------------------------------*/
2358       /* crude, fast version */
2359       ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
2360       /* loop over columns*/
2361       for (j=0; j<nctot; j++) {
2362         if (ctype == IS_COLORING_GHOSTED) {
2363           col = ltog[cols[j]];
2364         } else {
2365           col = cols[j];
2366         }
2367         if (col >= cstart && col < cend) {
2368           /* column is in diagonal block of matrix */
2369           rows = A_cj + A_ci[col-cstart];
2370           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2371         } else {
2372 #if defined(PETSC_USE_CTABLE)
2373           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2374           colb--;
2375 #else
2376           colb = baij->colmap[col] - 1;
2377 #endif
2378           if (colb == -1) {
2379             m = 0;
2380           } else {
2381             colb = colb/bs;
2382             rows = B_cj + B_ci[colb];
2383             m    = B_ci[colb+1] - B_ci[colb];
2384           }
2385         }
2386         /* loop over columns marking them in rowhit */
2387         for (k=0; k<m; k++) {
2388           rowhit[*rows++] = col + 1;
2389         }
2390       }
2391 
2392       /* count the number of hits */
2393       nrows = 0;
2394       for (j=0; j<M; j++) {
2395         if (rowhit[j]) nrows++;
2396       }
2397       c->nrows[i] = nrows;
2398       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2399       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2400       ierr        = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2401       nrows       = 0;
2402       for (j=0; j<M; j++) {
2403         if (rowhit[j]) {
2404           c->rows[i][nrows]          = j;
2405           c->columnsforrow[i][nrows] = rowhit[j] - 1;
2406           nrows++;
2407         }
2408       }
2409     } else { /*-------------------------------------------------------------------------------*/
2410       /* slow version, using rowhit as a linked list */
2411       PetscInt currentcol,fm,mfm;
2412       rowhit[M] = M;
2413       nrows     = 0;
2414       /* loop over columns*/
2415       for (j=0; j<nctot; j++) {
2416         if (ctype == IS_COLORING_GHOSTED) {
2417           col = ltog[cols[j]];
2418         } else {
2419           col = cols[j];
2420         }
2421         if (col >= cstart && col < cend) {
2422           /* column is in diagonal block of matrix */
2423           rows = A_cj + A_ci[col-cstart];
2424           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2425         } else {
2426 #if defined(PETSC_USE_CTABLE)
2427           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2428           colb--;
2429 #else
2430           colb = baij->colmap[col] - 1;
2431 #endif
2432           if (colb == -1) {
2433             m = 0;
2434           } else {
2435             colb = colb/bs;
2436             rows = B_cj + B_ci[colb];
2437             m    = B_ci[colb+1] - B_ci[colb];
2438           }
2439         }
2440 
2441         /* loop over columns marking them in rowhit */
2442         fm = M;    /* fm points to first entry in linked list */
2443         for (k=0; k<m; k++) {
2444           currentcol = *rows++;
2445           /* is it already in the list? */
2446           do {
2447             mfm = fm;
2448             fm  = rowhit[fm];
2449           } while (fm < currentcol);
2450           /* not in list so add it */
2451           if (fm != currentcol) {
2452             nrows++;
2453             columnsforrow[currentcol] = col;
2454             /* next three lines insert new entry into linked list */
2455             rowhit[mfm]        = currentcol;
2456             rowhit[currentcol] = fm;
2457             fm                 = currentcol;
2458             /* fm points to present position in list since we know the columns are sorted */
2459           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
2460         }
2461       }
2462       c->nrows[i] = nrows;
2463       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2464       ierr        = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2465       ierr        = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2466       /* now store the linked list of rows into c->rows[i] */
2467       nrows = 0;
2468       fm    = rowhit[M];
2469       do {
2470         c->rows[i][nrows]            = fm;
2471         c->columnsforrow[i][nrows++] = columnsforrow[fm];
2472         fm                           = rowhit[fm];
2473       } while (fm < M);
2474     } /* ---------------------------------------------------------------------------------------*/
2475     ierr = PetscFree(cols);CHKERRQ(ierr);
2476   }
2477 
2478   /* Optimize by adding the vscale, and scaleforrow[][] fields */
2479   /*
2480        vscale will contain the "diagonal" on processor scalings followed by the off processor
2481   */
2482   if (ctype == IS_COLORING_GLOBAL) {
2483     PetscInt *garray;
2484     ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2485     for (i=0; i<baij->B->cmap->n/bs; i++) {
2486       for (j=0; j<bs; j++) {
2487         garray[i*bs+j] = bs*baij->garray[i]+j;
2488       }
2489     }
2490     ierr = VecCreateGhost(PetscObjectComm((PetscObject)mat),baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr);
2491     ierr = PetscFree(garray);CHKERRQ(ierr);
2492     CHKMEMQ;
2493     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2494     for (k=0; k<c->ncolors; k++) {
2495       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2496       for (l=0; l<c->nrows[k]; l++) {
2497         col = c->columnsforrow[k][l];
2498         if (col >= cstart && col < cend) {
2499           /* column is in diagonal block of matrix */
2500           colb = col - cstart;
2501         } else {
2502           /* column  is in "off-processor" part */
2503 #if defined(PETSC_USE_CTABLE)
2504           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2505           colb--;
2506 #else
2507           colb = baij->colmap[col] - 1;
2508 #endif
2509           colb  = colb/bs;
2510           colb += cend - cstart;
2511         }
2512         c->vscaleforrow[k][l] = colb;
2513       }
2514     }
2515   } else if (ctype == IS_COLORING_GHOSTED) {
2516     /* Get gtol mapping */
2517     PetscInt N = mat->cmap->N,nlocal,*gtol;
2518     ierr = PetscMalloc((N+1)*sizeof(PetscInt),&gtol);CHKERRQ(ierr);
2519     for (i=0; i<N; i++) gtol[i] = -1;
2520     ierr = ISLocalToGlobalMappingGetSize(map,&nlocal);CHKERRQ(ierr);
2521     for (i=0; i<nlocal; i++) gtol[ltog[i]] = i;
2522 
2523     c->vscale = 0; /* will be created in MatFDColoringApply() */
2524     ierr      = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2525     for (k=0; k<c->ncolors; k++) {
2526       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2527       for (l=0; l<c->nrows[k]; l++) {
2528         col = c->columnsforrow[k][l];      /* global column index */
2529 
2530         c->vscaleforrow[k][l] = gtol[col]; /* local column index */
2531       }
2532     }
2533     ierr = PetscFree(gtol);CHKERRQ(ierr);
2534   }
2535   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
2536 
2537   ierr = PetscFree(rowhit);CHKERRQ(ierr);
2538   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
2539   ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2540   ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2541   if (map) {ierr = ISLocalToGlobalMappingRestoreIndices(map,&ltog);CHKERRQ(ierr);}
2542   CHKMEMQ;
2543   PetscFunctionReturn(0);
2544 }
2545 
2546 #undef __FUNCT__
2547 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ"
2548 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2549 {
2550   Mat            B;
2551   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2552   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2553   Mat_SeqAIJ     *b;
2554   PetscErrorCode ierr;
2555   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2556   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2557   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2558 
2559   PetscFunctionBegin;
2560   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2561   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2562 
2563   /* ----------------------------------------------------------------
2564      Tell every processor the number of nonzeros per row
2565   */
2566   ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
2567   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2568     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2569   }
2570   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2571   ierr      = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr);
2572   displs    = recvcounts + size;
2573   for (i=0; i<size; i++) {
2574     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2575     displs[i]     = A->rmap->range[i]/bs;
2576   }
2577 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2578   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2579 #else
2580   ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2581 #endif
2582   /* ---------------------------------------------------------------
2583      Create the sequential matrix of the same type as the local block diagonal
2584   */
2585   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2586   ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2587   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2588   ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2589   b    = (Mat_SeqAIJ*)B->data;
2590 
2591   /*--------------------------------------------------------------------
2592     Copy my part of matrix column indices over
2593   */
2594   sendcount  = ad->nz + bd->nz;
2595   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2596   a_jsendbuf = ad->j;
2597   b_jsendbuf = bd->j;
2598   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2599   cnt        = 0;
2600   for (i=0; i<n; i++) {
2601 
2602     /* put in lower diagonal portion */
2603     m = bd->i[i+1] - bd->i[i];
2604     while (m > 0) {
2605       /* is it above diagonal (in bd (compressed) numbering) */
2606       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2607       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2608       m--;
2609     }
2610 
2611     /* put in diagonal portion */
2612     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2613       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2614     }
2615 
2616     /* put in upper diagonal portion */
2617     while (m-- > 0) {
2618       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2619     }
2620   }
2621   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2622 
2623   /*--------------------------------------------------------------------
2624     Gather all column indices to all processors
2625   */
2626   for (i=0; i<size; i++) {
2627     recvcounts[i] = 0;
2628     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2629       recvcounts[i] += lens[j];
2630     }
2631   }
2632   displs[0] = 0;
2633   for (i=1; i<size; i++) {
2634     displs[i] = displs[i-1] + recvcounts[i-1];
2635   }
2636 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2637   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2638 #else
2639   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2640 #endif
2641   /*--------------------------------------------------------------------
2642     Assemble the matrix into useable form (note numerical values not yet set)
2643   */
2644   /* set the b->ilen (length of each row) values */
2645   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2646   /* set the b->i indices */
2647   b->i[0] = 0;
2648   for (i=1; i<=A->rmap->N/bs; i++) {
2649     b->i[i] = b->i[i-1] + lens[i-1];
2650   }
2651   ierr = PetscFree(lens);CHKERRQ(ierr);
2652   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2653   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2654   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2655 
2656   if (A->symmetric) {
2657     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2658   } else if (A->hermitian) {
2659     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2660   } else if (A->structurally_symmetric) {
2661     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2662   }
2663   *newmat = B;
2664   PetscFunctionReturn(0);
2665 }
2666 
2667 #undef __FUNCT__
2668 #define __FUNCT__ "MatSOR_MPIBAIJ"
2669 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2670 {
2671   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2672   PetscErrorCode ierr;
2673   Vec            bb1 = 0;
2674 
2675   PetscFunctionBegin;
2676   if (flag == SOR_APPLY_UPPER) {
2677     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2678     PetscFunctionReturn(0);
2679   }
2680 
2681   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2682     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2683   }
2684 
2685   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2686     if (flag & SOR_ZERO_INITIAL_GUESS) {
2687       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2688       its--;
2689     }
2690 
2691     while (its--) {
2692       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2693       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2694 
2695       /* update rhs: bb1 = bb - B*x */
2696       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2697       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2698 
2699       /* local sweep */
2700       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2701     }
2702   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2703     if (flag & SOR_ZERO_INITIAL_GUESS) {
2704       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2705       its--;
2706     }
2707     while (its--) {
2708       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2709       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2710 
2711       /* update rhs: bb1 = bb - B*x */
2712       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2713       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2714 
2715       /* local sweep */
2716       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2717     }
2718   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2719     if (flag & SOR_ZERO_INITIAL_GUESS) {
2720       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2721       its--;
2722     }
2723     while (its--) {
2724       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2725       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2726 
2727       /* update rhs: bb1 = bb - B*x */
2728       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2729       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2730 
2731       /* local sweep */
2732       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2733     }
2734   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2735 
2736   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2737   PetscFunctionReturn(0);
2738 }
2739 
2740 extern PetscErrorCode  MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2741 
2742 #undef __FUNCT__
2743 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ"
2744 PetscErrorCode  MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2745 {
2746   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2747   PetscErrorCode ierr;
2748 
2749   PetscFunctionBegin;
2750   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2751   PetscFunctionReturn(0);
2752 }
2753 
2754 
2755 /* -------------------------------------------------------------------*/
2756 static struct _MatOps MatOps_Values = {
2757        MatSetValues_MPIBAIJ,
2758        MatGetRow_MPIBAIJ,
2759        MatRestoreRow_MPIBAIJ,
2760        MatMult_MPIBAIJ,
2761 /* 4*/ MatMultAdd_MPIBAIJ,
2762        MatMultTranspose_MPIBAIJ,
2763        MatMultTransposeAdd_MPIBAIJ,
2764        0,
2765        0,
2766        0,
2767 /*10*/ 0,
2768        0,
2769        0,
2770        MatSOR_MPIBAIJ,
2771        MatTranspose_MPIBAIJ,
2772 /*15*/ MatGetInfo_MPIBAIJ,
2773        MatEqual_MPIBAIJ,
2774        MatGetDiagonal_MPIBAIJ,
2775        MatDiagonalScale_MPIBAIJ,
2776        MatNorm_MPIBAIJ,
2777 /*20*/ MatAssemblyBegin_MPIBAIJ,
2778        MatAssemblyEnd_MPIBAIJ,
2779        MatSetOption_MPIBAIJ,
2780        MatZeroEntries_MPIBAIJ,
2781 /*24*/ MatZeroRows_MPIBAIJ,
2782        0,
2783        0,
2784        0,
2785        0,
2786 /*29*/ MatSetUp_MPIBAIJ,
2787        0,
2788        0,
2789        0,
2790        0,
2791 /*34*/ MatDuplicate_MPIBAIJ,
2792        0,
2793        0,
2794        0,
2795        0,
2796 /*39*/ MatAXPY_MPIBAIJ,
2797        MatGetSubMatrices_MPIBAIJ,
2798        MatIncreaseOverlap_MPIBAIJ,
2799        MatGetValues_MPIBAIJ,
2800        MatCopy_MPIBAIJ,
2801 /*44*/ 0,
2802        MatScale_MPIBAIJ,
2803        0,
2804        0,
2805        0,
2806 /*49*/ 0,
2807        0,
2808        0,
2809        0,
2810        0,
2811 /*54*/ MatFDColoringCreate_MPIBAIJ,
2812        0,
2813        MatSetUnfactored_MPIBAIJ,
2814        MatPermute_MPIBAIJ,
2815        MatSetValuesBlocked_MPIBAIJ,
2816 /*59*/ MatGetSubMatrix_MPIBAIJ,
2817        MatDestroy_MPIBAIJ,
2818        MatView_MPIBAIJ,
2819        0,
2820        0,
2821 /*64*/ 0,
2822        0,
2823        0,
2824        0,
2825        0,
2826 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2827        0,
2828        0,
2829        0,
2830        0,
2831 /*74*/ 0,
2832        MatFDColoringApply_BAIJ,
2833        0,
2834        0,
2835        0,
2836 /*79*/ 0,
2837        0,
2838        0,
2839        0,
2840        MatLoad_MPIBAIJ,
2841 /*84*/ 0,
2842        0,
2843        0,
2844        0,
2845        0,
2846 /*89*/ 0,
2847        0,
2848        0,
2849        0,
2850        0,
2851 /*94*/ 0,
2852        0,
2853        0,
2854        0,
2855        0,
2856 /*99*/ 0,
2857        0,
2858        0,
2859        0,
2860        0,
2861 /*104*/0,
2862        MatRealPart_MPIBAIJ,
2863        MatImaginaryPart_MPIBAIJ,
2864        0,
2865        0,
2866 /*109*/0,
2867        0,
2868        0,
2869        0,
2870        0,
2871 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2872        0,
2873        MatGetGhosts_MPIBAIJ,
2874        0,
2875        0,
2876 /*119*/0,
2877        0,
2878        0,
2879        0,
2880        0,
2881 /*124*/0,
2882        0,
2883        MatInvertBlockDiagonal_MPIBAIJ
2884 };
2885 
2886 EXTERN_C_BEGIN
2887 #undef __FUNCT__
2888 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2889 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2890 {
2891   PetscFunctionBegin;
2892   *a = ((Mat_MPIBAIJ*)A->data)->A;
2893   PetscFunctionReturn(0);
2894 }
2895 EXTERN_C_END
2896 
2897 EXTERN_C_BEGIN
2898 extern PetscErrorCode  MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2899 EXTERN_C_END
2900 
2901 EXTERN_C_BEGIN
2902 #undef __FUNCT__
2903 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2904 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2905 {
2906   PetscInt       m,rstart,cstart,cend;
2907   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2908   const PetscInt *JJ    =0;
2909   PetscScalar    *values=0;
2910   PetscErrorCode ierr;
2911 
2912   PetscFunctionBegin;
2913   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2914   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2915   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2916   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2917   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2918   m      = B->rmap->n/bs;
2919   rstart = B->rmap->rstart/bs;
2920   cstart = B->cmap->rstart/bs;
2921   cend   = B->cmap->rend/bs;
2922 
2923   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2924   ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
2925   for (i=0; i<m; i++) {
2926     nz = ii[i+1] - ii[i];
2927     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2928     nz_max = PetscMax(nz_max,nz);
2929     JJ     = jj + ii[i];
2930     for (j=0; j<nz; j++) {
2931       if (*JJ >= cstart) break;
2932       JJ++;
2933     }
2934     d = 0;
2935     for (; j<nz; j++) {
2936       if (*JJ++ >= cend) break;
2937       d++;
2938     }
2939     d_nnz[i] = d;
2940     o_nnz[i] = nz - d;
2941   }
2942   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2943   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2944 
2945   values = (PetscScalar*)V;
2946   if (!values) {
2947     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2948     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2949   }
2950   for (i=0; i<m; i++) {
2951     PetscInt          row    = i + rstart;
2952     PetscInt          ncols  = ii[i+1] - ii[i];
2953     const PetscInt    *icols = jj + ii[i];
2954     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2955     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2956   }
2957 
2958   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2959   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2960   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2961   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2962   PetscFunctionReturn(0);
2963 }
2964 EXTERN_C_END
2965 
2966 #undef __FUNCT__
2967 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2968 /*@C
2969    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2970    (the default parallel PETSc format).
2971 
2972    Collective on MPI_Comm
2973 
2974    Input Parameters:
2975 +  A - the matrix
2976 .  bs - the block size
2977 .  i - the indices into j for the start of each local row (starts with zero)
2978 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2979 -  v - optional values in the matrix
2980 
2981    Level: developer
2982 
2983 .keywords: matrix, aij, compressed row, sparse, parallel
2984 
2985 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2986 @*/
2987 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2988 {
2989   PetscErrorCode ierr;
2990 
2991   PetscFunctionBegin;
2992   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2993   PetscValidType(B,1);
2994   PetscValidLogicalCollectiveInt(B,bs,2);
2995   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2996   PetscFunctionReturn(0);
2997 }
2998 
2999 EXTERN_C_BEGIN
3000 #undef __FUNCT__
3001 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
3002 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
3003 {
3004   Mat_MPIBAIJ    *b;
3005   PetscErrorCode ierr;
3006   PetscInt       i;
3007   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3008 
3009   PetscFunctionBegin;
3010   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3011   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3012 
3013   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3014   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3015   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3016   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3017 
3018   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3019   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3020   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3021   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3022   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3023 
3024   if (d_nnz) {
3025     for (i=0; i<B->rmap->n/bs; i++) {
3026       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
3027     }
3028   }
3029   if (o_nnz) {
3030     for (i=0; i<B->rmap->n/bs; i++) {
3031       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
3032     }
3033   }
3034 
3035   b      = (Mat_MPIBAIJ*)B->data;
3036   b->bs2 = bs*bs;
3037   b->mbs = B->rmap->n/bs;
3038   b->nbs = B->cmap->n/bs;
3039   b->Mbs = B->rmap->N/bs;
3040   b->Nbs = B->cmap->N/bs;
3041 
3042   for (i=0; i<=b->size; i++) {
3043     b->rangebs[i] = B->rmap->range[i]/bs;
3044   }
3045   b->rstartbs = B->rmap->rstart/bs;
3046   b->rendbs   = B->rmap->rend/bs;
3047   b->cstartbs = B->cmap->rstart/bs;
3048   b->cendbs   = B->cmap->rend/bs;
3049 
3050   if (!B->preallocated) {
3051     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3052     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3053     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
3054     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3055     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3056     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3057     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
3058     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3059     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
3060   }
3061 
3062   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3063   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
3064   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3065   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3066   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3067   B->preallocated = PETSC_TRUE;
3068   PetscFunctionReturn(0);
3069 }
3070 EXTERN_C_END
3071 
3072 EXTERN_C_BEGIN
3073 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
3074 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
3075 EXTERN_C_END
3076 
3077 
3078 EXTERN_C_BEGIN
3079 #undef __FUNCT__
3080 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
3081 PetscErrorCode  MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
3082 {
3083   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
3084   PetscErrorCode ierr;
3085   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
3086   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
3087   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
3088 
3089   PetscFunctionBegin;
3090   ierr  = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr);
3091   ii[0] = 0;
3092   CHKMEMQ;
3093   for (i=0; i<M; i++) {
3094     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
3095     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
3096     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
3097     /* remove one from count of matrix has diagonal */
3098     for (j=id[i]; j<id[i+1]; j++) {
3099       if (jd[j] == i) {ii[i+1]--;break;}
3100     }
3101     CHKMEMQ;
3102   }
3103   ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3104   cnt  = 0;
3105   for (i=0; i<M; i++) {
3106     for (j=io[i]; j<io[i+1]; j++) {
3107       if (garray[jo[j]] > rstart) break;
3108       jj[cnt++] = garray[jo[j]];
3109       CHKMEMQ;
3110     }
3111     for (k=id[i]; k<id[i+1]; k++) {
3112       if (jd[k] != i) {
3113         jj[cnt++] = rstart + jd[k];
3114         CHKMEMQ;
3115       }
3116     }
3117     for (; j<io[i+1]; j++) {
3118       jj[cnt++] = garray[jo[j]];
3119       CHKMEMQ;
3120     }
3121   }
3122   ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr);
3123   PetscFunctionReturn(0);
3124 }
3125 EXTERN_C_END
3126 
3127 #include <../src/mat/impls/aij/mpi/mpiaij.h>
3128 EXTERN_C_BEGIN
3129 PetscErrorCode  MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
3130 EXTERN_C_END
3131 
3132 EXTERN_C_BEGIN
3133 #undef __FUNCT__
3134 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ"
3135 PetscErrorCode  MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
3136 {
3137   PetscErrorCode ierr;
3138   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3139   Mat            B;
3140   Mat_MPIAIJ     *b;
3141 
3142   PetscFunctionBegin;
3143   if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
3144 
3145   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3146   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3147   ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
3148   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
3149   ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
3150   b    = (Mat_MPIAIJ*) B->data;
3151 
3152   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
3153   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
3154   ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
3155   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
3156   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
3157   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3158   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3159   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3160   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3161   if (reuse == MAT_REUSE_MATRIX) {
3162     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
3163   } else {
3164    *newmat = B;
3165   }
3166   PetscFunctionReturn(0);
3167 }
3168 EXTERN_C_END
3169 
3170 EXTERN_C_BEGIN
3171 #if defined(PETSC_HAVE_MUMPS)
3172 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3173 #endif
3174 EXTERN_C_END
3175 
3176 /*MC
3177    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
3178 
3179    Options Database Keys:
3180 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
3181 . -mat_block_size <bs> - set the blocksize used to store the matrix
3182 - -mat_use_hash_table <fact>
3183 
3184   Level: beginner
3185 
3186 .seealso: MatCreateMPIBAIJ
3187 M*/
3188 
3189 EXTERN_C_BEGIN
3190 extern PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
3191 EXTERN_C_END
3192 
3193 EXTERN_C_BEGIN
3194 #undef __FUNCT__
3195 #define __FUNCT__ "MatCreate_MPIBAIJ"
3196 PetscErrorCode  MatCreate_MPIBAIJ(Mat B)
3197 {
3198   Mat_MPIBAIJ    *b;
3199   PetscErrorCode ierr;
3200   PetscBool      flg;
3201 
3202   PetscFunctionBegin;
3203   ierr    = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
3204   B->data = (void*)b;
3205 
3206   ierr         = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3207   B->assembled = PETSC_FALSE;
3208 
3209   B->insertmode = NOT_SET_VALUES;
3210   ierr          = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
3211   ierr          = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
3212 
3213   /* build local table of row and column ownerships */
3214   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
3215 
3216   /* build cache for off array entries formed */
3217   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
3218 
3219   b->donotstash  = PETSC_FALSE;
3220   b->colmap      = NULL;
3221   b->garray      = NULL;
3222   b->roworiented = PETSC_TRUE;
3223 
3224   /* stuff used in block assembly */
3225   b->barray = 0;
3226 
3227   /* stuff used for matrix vector multiply */
3228   b->lvec  = 0;
3229   b->Mvctx = 0;
3230 
3231   /* stuff for MatGetRow() */
3232   b->rowindices   = 0;
3233   b->rowvalues    = 0;
3234   b->getrowactive = PETSC_FALSE;
3235 
3236   /* hash table stuff */
3237   b->ht           = 0;
3238   b->hd           = 0;
3239   b->ht_size      = 0;
3240   b->ht_flag      = PETSC_FALSE;
3241   b->ht_fact      = 0;
3242   b->ht_total_ct  = 0;
3243   b->ht_insert_ct = 0;
3244 
3245   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
3246   b->ijonly = PETSC_FALSE;
3247 
3248   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3249   ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr);
3250   if (flg) {
3251     PetscReal fact = 1.39;
3252     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3253     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
3254     if (fact <= 1.0) fact = 1.39;
3255     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3256     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3257   }
3258   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3259 
3260 #if defined(PETSC_HAVE_MUMPS)
3261   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr);
3262 #endif
3263   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",
3264                                            "MatConvert_MPIBAIJ_MPIAdj",
3265                                            MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3266   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",
3267                                            "MatConvert_MPIBAIJ_MPIAIJ",
3268                                            MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3269   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",
3270                                            "MatConvert_MPIBAIJ_MPISBAIJ",
3271                                            MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3272   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3273                                            "MatStoreValues_MPIBAIJ",
3274                                            MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3275   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3276                                            "MatRetrieveValues_MPIBAIJ",
3277                                            MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3278   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3279                                            "MatGetDiagonalBlock_MPIBAIJ",
3280                                            MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3281   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
3282                                            "MatMPIBAIJSetPreallocation_MPIBAIJ",
3283                                            MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3284   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
3285                                            "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
3286                                            MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3287   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3288                                            "MatDiagonalScaleLocal_MPIBAIJ",
3289                                            MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3290   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
3291                                            "MatSetHashTableFactor_MPIBAIJ",
3292                                            MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3293   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",
3294                                            "MatConvert_MPIBAIJ_MPIBSTRM",
3295                                            MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr);
3296   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3297   PetscFunctionReturn(0);
3298 }
3299 EXTERN_C_END
3300 
3301 /*MC
3302    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3303 
3304    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3305    and MATMPIBAIJ otherwise.
3306 
3307    Options Database Keys:
3308 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3309 
3310   Level: beginner
3311 
3312 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3313 M*/
3314 
3315 #undef __FUNCT__
3316 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3317 /*@C
3318    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3319    (block compressed row).  For good matrix assembly performance
3320    the user should preallocate the matrix storage by setting the parameters
3321    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3322    performance can be increased by more than a factor of 50.
3323 
3324    Collective on Mat
3325 
3326    Input Parameters:
3327 +  A - the matrix
3328 .  bs   - size of blockk
3329 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3330            submatrix  (same for all local rows)
3331 .  d_nnz - array containing the number of block nonzeros in the various block rows
3332            of the in diagonal portion of the local (possibly different for each block
3333            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3334            set it even if it is zero.
3335 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3336            submatrix (same for all local rows).
3337 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3338            off-diagonal portion of the local submatrix (possibly different for
3339            each block row) or NULL.
3340 
3341    If the *_nnz parameter is given then the *_nz parameter is ignored
3342 
3343    Options Database Keys:
3344 +   -mat_block_size - size of the blocks to use
3345 -   -mat_use_hash_table <fact>
3346 
3347    Notes:
3348    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3349    than it must be used on all processors that share the object for that argument.
3350 
3351    Storage Information:
3352    For a square global matrix we define each processor's diagonal portion
3353    to be its local rows and the corresponding columns (a square submatrix);
3354    each processor's off-diagonal portion encompasses the remainder of the
3355    local matrix (a rectangular submatrix).
3356 
3357    The user can specify preallocated storage for the diagonal part of
3358    the local submatrix with either d_nz or d_nnz (not both).  Set
3359    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3360    memory allocation.  Likewise, specify preallocated storage for the
3361    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3362 
3363    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3364    the figure below we depict these three local rows and all columns (0-11).
3365 
3366 .vb
3367            0 1 2 3 4 5 6 7 8 9 10 11
3368           -------------------
3369    row 3  |  o o o d d d o o o o o o
3370    row 4  |  o o o d d d o o o o o o
3371    row 5  |  o o o d d d o o o o o o
3372           -------------------
3373 .ve
3374 
3375    Thus, any entries in the d locations are stored in the d (diagonal)
3376    submatrix, and any entries in the o locations are stored in the
3377    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3378    stored simply in the MATSEQBAIJ format for compressed row storage.
3379 
3380    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3381    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3382    In general, for PDE problems in which most nonzeros are near the diagonal,
3383    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3384    or you will get TERRIBLE performance; see the users' manual chapter on
3385    matrices.
3386 
3387    You can call MatGetInfo() to get information on how effective the preallocation was;
3388    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3389    You can also run with the option -info and look for messages with the string
3390    malloc in them to see if additional memory allocation was needed.
3391 
3392    Level: intermediate
3393 
3394 .keywords: matrix, block, aij, compressed row, sparse, parallel
3395 
3396 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3397 @*/
3398 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3399 {
3400   PetscErrorCode ierr;
3401 
3402   PetscFunctionBegin;
3403   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3404   PetscValidType(B,1);
3405   PetscValidLogicalCollectiveInt(B,bs,2);
3406   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3407   PetscFunctionReturn(0);
3408 }
3409 
3410 #undef __FUNCT__
3411 #define __FUNCT__ "MatCreateBAIJ"
3412 /*@C
3413    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3414    (block compressed row).  For good matrix assembly performance
3415    the user should preallocate the matrix storage by setting the parameters
3416    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3417    performance can be increased by more than a factor of 50.
3418 
3419    Collective on MPI_Comm
3420 
3421    Input Parameters:
3422 +  comm - MPI communicator
3423 .  bs   - size of blockk
3424 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3425            This value should be the same as the local size used in creating the
3426            y vector for the matrix-vector product y = Ax.
3427 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3428            This value should be the same as the local size used in creating the
3429            x vector for the matrix-vector product y = Ax.
3430 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3431 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3432 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3433            submatrix  (same for all local rows)
3434 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3435            of the in diagonal portion of the local (possibly different for each block
3436            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3437            and set it even if it is zero.
3438 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3439            submatrix (same for all local rows).
3440 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3441            off-diagonal portion of the local submatrix (possibly different for
3442            each block row) or NULL.
3443 
3444    Output Parameter:
3445 .  A - the matrix
3446 
3447    Options Database Keys:
3448 +   -mat_block_size - size of the blocks to use
3449 -   -mat_use_hash_table <fact>
3450 
3451    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3452    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3453    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3454 
3455    Notes:
3456    If the *_nnz parameter is given then the *_nz parameter is ignored
3457 
3458    A nonzero block is any block that as 1 or more nonzeros in it
3459 
3460    The user MUST specify either the local or global matrix dimensions
3461    (possibly both).
3462 
3463    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3464    than it must be used on all processors that share the object for that argument.
3465 
3466    Storage Information:
3467    For a square global matrix we define each processor's diagonal portion
3468    to be its local rows and the corresponding columns (a square submatrix);
3469    each processor's off-diagonal portion encompasses the remainder of the
3470    local matrix (a rectangular submatrix).
3471 
3472    The user can specify preallocated storage for the diagonal part of
3473    the local submatrix with either d_nz or d_nnz (not both).  Set
3474    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3475    memory allocation.  Likewise, specify preallocated storage for the
3476    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3477 
3478    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3479    the figure below we depict these three local rows and all columns (0-11).
3480 
3481 .vb
3482            0 1 2 3 4 5 6 7 8 9 10 11
3483           -------------------
3484    row 3  |  o o o d d d o o o o o o
3485    row 4  |  o o o d d d o o o o o o
3486    row 5  |  o o o d d d o o o o o o
3487           -------------------
3488 .ve
3489 
3490    Thus, any entries in the d locations are stored in the d (diagonal)
3491    submatrix, and any entries in the o locations are stored in the
3492    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3493    stored simply in the MATSEQBAIJ format for compressed row storage.
3494 
3495    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3496    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3497    In general, for PDE problems in which most nonzeros are near the diagonal,
3498    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3499    or you will get TERRIBLE performance; see the users' manual chapter on
3500    matrices.
3501 
3502    Level: intermediate
3503 
3504 .keywords: matrix, block, aij, compressed row, sparse, parallel
3505 
3506 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3507 @*/
3508 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3509 {
3510   PetscErrorCode ierr;
3511   PetscMPIInt    size;
3512 
3513   PetscFunctionBegin;
3514   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3515   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3516   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3517   if (size > 1) {
3518     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3519     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3520   } else {
3521     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3522     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3523   }
3524   PetscFunctionReturn(0);
3525 }
3526 
3527 #undef __FUNCT__
3528 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3529 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3530 {
3531   Mat            mat;
3532   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3533   PetscErrorCode ierr;
3534   PetscInt       len=0;
3535 
3536   PetscFunctionBegin;
3537   *newmat = 0;
3538   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3539   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3540   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3541   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3542 
3543   mat->factortype   = matin->factortype;
3544   mat->preallocated = PETSC_TRUE;
3545   mat->assembled    = PETSC_TRUE;
3546   mat->insertmode   = NOT_SET_VALUES;
3547 
3548   a             = (Mat_MPIBAIJ*)mat->data;
3549   mat->rmap->bs = matin->rmap->bs;
3550   a->bs2        = oldmat->bs2;
3551   a->mbs        = oldmat->mbs;
3552   a->nbs        = oldmat->nbs;
3553   a->Mbs        = oldmat->Mbs;
3554   a->Nbs        = oldmat->Nbs;
3555 
3556   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3557   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3558 
3559   a->size         = oldmat->size;
3560   a->rank         = oldmat->rank;
3561   a->donotstash   = oldmat->donotstash;
3562   a->roworiented  = oldmat->roworiented;
3563   a->rowindices   = 0;
3564   a->rowvalues    = 0;
3565   a->getrowactive = PETSC_FALSE;
3566   a->barray       = 0;
3567   a->rstartbs     = oldmat->rstartbs;
3568   a->rendbs       = oldmat->rendbs;
3569   a->cstartbs     = oldmat->cstartbs;
3570   a->cendbs       = oldmat->cendbs;
3571 
3572   /* hash table stuff */
3573   a->ht           = 0;
3574   a->hd           = 0;
3575   a->ht_size      = 0;
3576   a->ht_flag      = oldmat->ht_flag;
3577   a->ht_fact      = oldmat->ht_fact;
3578   a->ht_total_ct  = 0;
3579   a->ht_insert_ct = 0;
3580 
3581   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3582   if (oldmat->colmap) {
3583 #if defined(PETSC_USE_CTABLE)
3584     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3585 #else
3586     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3587     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3588     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3589 #endif
3590   } else a->colmap = 0;
3591 
3592   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3593     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3594     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3595     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3596   } else a->garray = 0;
3597 
3598   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3599   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3600   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3601   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3602   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3603 
3604   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3605   ierr    = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3606   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3607   ierr    = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3608   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3609   *newmat = mat;
3610   PetscFunctionReturn(0);
3611 }
3612 
3613 #undef __FUNCT__
3614 #define __FUNCT__ "MatLoad_MPIBAIJ"
3615 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3616 {
3617   PetscErrorCode ierr;
3618   int            fd;
3619   PetscInt       i,nz,j,rstart,rend;
3620   PetscScalar    *vals,*buf;
3621   MPI_Comm       comm;
3622   MPI_Status     status;
3623   PetscMPIInt    rank,size,maxnz;
3624   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3625   PetscInt       *locrowlens = NULL,*procsnz = NULL,*browners = NULL;
3626   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3627   PetscMPIInt    tag    = ((PetscObject)viewer)->tag;
3628   PetscInt       *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount;
3629   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3630 
3631   PetscFunctionBegin;
3632   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3633   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3634   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3635   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3636 
3637   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3638   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3639   if (!rank) {
3640     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3641     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3642     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3643   }
3644 
3645   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3646 
3647   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3648   M    = header[1]; N = header[2];
3649 
3650   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3651   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3652   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3653 
3654   /* If global sizes are set, check if they are consistent with that given in the file */
3655   if (sizesset) {
3656     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3657   }
3658   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3659   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3660 
3661   if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices");
3662 
3663   /*
3664      This code adds extra rows to make sure the number of rows is
3665      divisible by the blocksize
3666   */
3667   Mbs        = M/bs;
3668   extra_rows = bs - M + bs*Mbs;
3669   if (extra_rows == bs) extra_rows = 0;
3670   else                  Mbs++;
3671   if (extra_rows && !rank) {
3672     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3673   }
3674 
3675   /* determine ownership of all rows */
3676   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3677     mbs = Mbs/size + ((Mbs % size) > rank);
3678     m   = mbs*bs;
3679   } else { /* User set */
3680     m   = newmat->rmap->n;
3681     mbs = m/bs;
3682   }
3683   ierr = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
3684   ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3685 
3686   /* process 0 needs enough room for process with most rows */
3687   if (!rank) {
3688     mmax = rowners[1];
3689     for (i=2; i<=size; i++) {
3690       mmax = PetscMax(mmax,rowners[i]);
3691     }
3692     mmax*=bs;
3693   } else mmax = m;
3694 
3695   rowners[0] = 0;
3696   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
3697   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
3698   rstart = rowners[rank];
3699   rend   = rowners[rank+1];
3700 
3701   /* distribute row lengths to all processors */
3702   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
3703   if (!rank) {
3704     mend = m;
3705     if (size == 1) mend = mend - extra_rows;
3706     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3707     for (j=mend; j<m; j++) locrowlens[j] = 1;
3708     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3709     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3710     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3711     for (j=0; j<m; j++) {
3712       procsnz[0] += locrowlens[j];
3713     }
3714     for (i=1; i<size; i++) {
3715       mend = browners[i+1] - browners[i];
3716       if (i == size-1) mend = mend - extra_rows;
3717       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3718       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3719       /* calculate the number of nonzeros on each processor */
3720       for (j=0; j<browners[i+1]-browners[i]; j++) {
3721         procsnz[i] += rowlengths[j];
3722       }
3723       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3724     }
3725     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3726   } else {
3727     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3728   }
3729 
3730   if (!rank) {
3731     /* determine max buffer needed and allocate it */
3732     maxnz = procsnz[0];
3733     for (i=1; i<size; i++) {
3734       maxnz = PetscMax(maxnz,procsnz[i]);
3735     }
3736     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3737 
3738     /* read in my part of the matrix column indices  */
3739     nz     = procsnz[0];
3740     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3741     mycols = ibuf;
3742     if (size == 1) nz -= extra_rows;
3743     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3744     if (size == 1) {
3745       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
3746     }
3747 
3748     /* read in every ones (except the last) and ship off */
3749     for (i=1; i<size-1; i++) {
3750       nz   = procsnz[i];
3751       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3752       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3753     }
3754     /* read in the stuff for the last proc */
3755     if (size != 1) {
3756       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3757       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3758       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3759       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3760     }
3761     ierr = PetscFree(cols);CHKERRQ(ierr);
3762   } else {
3763     /* determine buffer space needed for message */
3764     nz = 0;
3765     for (i=0; i<m; i++) {
3766       nz += locrowlens[i];
3767     }
3768     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3769     mycols = ibuf;
3770     /* receive message of column indices*/
3771     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3772     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3773     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3774   }
3775 
3776   /* loop over local rows, determining number of off diagonal entries */
3777   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
3778   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
3779   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3780   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3781   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3782   rowcount = 0; nzcount = 0;
3783   for (i=0; i<mbs; i++) {
3784     dcount  = 0;
3785     odcount = 0;
3786     for (j=0; j<bs; j++) {
3787       kmax = locrowlens[rowcount];
3788       for (k=0; k<kmax; k++) {
3789         tmp = mycols[nzcount++]/bs;
3790         if (!mask[tmp]) {
3791           mask[tmp] = 1;
3792           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3793           else masked1[dcount++] = tmp;
3794         }
3795       }
3796       rowcount++;
3797     }
3798 
3799     dlens[i]  = dcount;
3800     odlens[i] = odcount;
3801 
3802     /* zero out the mask elements we set */
3803     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3804     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3805   }
3806 
3807 
3808   if (!sizesset) {
3809     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3810   }
3811   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3812 
3813   if (!rank) {
3814     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3815     /* read in my part of the matrix numerical values  */
3816     nz     = procsnz[0];
3817     vals   = buf;
3818     mycols = ibuf;
3819     if (size == 1) nz -= extra_rows;
3820     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3821     if (size == 1) {
3822       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
3823     }
3824 
3825     /* insert into matrix */
3826     jj = rstart*bs;
3827     for (i=0; i<m; i++) {
3828       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3829       mycols += locrowlens[i];
3830       vals   += locrowlens[i];
3831       jj++;
3832     }
3833     /* read in other processors (except the last one) and ship out */
3834     for (i=1; i<size-1; i++) {
3835       nz   = procsnz[i];
3836       vals = buf;
3837       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3838       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3839     }
3840     /* the last proc */
3841     if (size != 1) {
3842       nz   = procsnz[i] - extra_rows;
3843       vals = buf;
3844       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3845       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3846       ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3847     }
3848     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3849   } else {
3850     /* receive numeric values */
3851     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3852 
3853     /* receive message of values*/
3854     vals   = buf;
3855     mycols = ibuf;
3856     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3857 
3858     /* insert into matrix */
3859     jj = rstart*bs;
3860     for (i=0; i<m; i++) {
3861       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3862       mycols += locrowlens[i];
3863       vals   += locrowlens[i];
3864       jj++;
3865     }
3866   }
3867   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3868   ierr = PetscFree(buf);CHKERRQ(ierr);
3869   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3870   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3871   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3872   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3873   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3874   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3875   PetscFunctionReturn(0);
3876 }
3877 
3878 #undef __FUNCT__
3879 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3880 /*@
3881    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3882 
3883    Input Parameters:
3884 .  mat  - the matrix
3885 .  fact - factor
3886 
3887    Not Collective, each process can use a different factor
3888 
3889    Level: advanced
3890 
3891   Notes:
3892    This can also be set by the command line option: -mat_use_hash_table <fact>
3893 
3894 .keywords: matrix, hashtable, factor, HT
3895 
3896 .seealso: MatSetOption()
3897 @*/
3898 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3899 {
3900   PetscErrorCode ierr;
3901 
3902   PetscFunctionBegin;
3903   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3904   PetscFunctionReturn(0);
3905 }
3906 
3907 EXTERN_C_BEGIN
3908 #undef __FUNCT__
3909 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3910 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3911 {
3912   Mat_MPIBAIJ *baij;
3913 
3914   PetscFunctionBegin;
3915   baij          = (Mat_MPIBAIJ*)mat->data;
3916   baij->ht_fact = fact;
3917   PetscFunctionReturn(0);
3918 }
3919 EXTERN_C_END
3920 
3921 #undef __FUNCT__
3922 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3923 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3924 {
3925   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
3926 
3927   PetscFunctionBegin;
3928   *Ad     = a->A;
3929   *Ao     = a->B;
3930   *colmap = a->garray;
3931   PetscFunctionReturn(0);
3932 }
3933 
3934 /*
3935     Special version for direct calls from Fortran (to eliminate two function call overheads
3936 */
3937 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3938 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3939 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3940 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3941 #endif
3942 
3943 #undef __FUNCT__
3944 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3945 /*@C
3946   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3947 
3948   Collective on Mat
3949 
3950   Input Parameters:
3951 + mat - the matrix
3952 . min - number of input rows
3953 . im - input rows
3954 . nin - number of input columns
3955 . in - input columns
3956 . v - numerical values input
3957 - addvin - INSERT_VALUES or ADD_VALUES
3958 
3959   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3960 
3961   Level: advanced
3962 
3963 .seealso:   MatSetValuesBlocked()
3964 @*/
3965 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3966 {
3967   /* convert input arguments to C version */
3968   Mat        mat  = *matin;
3969   PetscInt   m    = *min, n = *nin;
3970   InsertMode addv = *addvin;
3971 
3972   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3973   const MatScalar *value;
3974   MatScalar       *barray     = baij->barray;
3975   PetscBool       roworiented = baij->roworiented;
3976   PetscErrorCode  ierr;
3977   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3978   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3979   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3980 
3981   PetscFunctionBegin;
3982   /* tasks normally handled by MatSetValuesBlocked() */
3983   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3984 #if defined(PETSC_USE_DEBUG)
3985   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3986   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3987 #endif
3988   if (mat->assembled) {
3989     mat->was_assembled = PETSC_TRUE;
3990     mat->assembled     = PETSC_FALSE;
3991   }
3992   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3993 
3994 
3995   if (!barray) {
3996     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
3997     baij->barray = barray;
3998   }
3999 
4000   if (roworiented) stepval = (n-1)*bs;
4001   else stepval = (m-1)*bs;
4002 
4003   for (i=0; i<m; i++) {
4004     if (im[i] < 0) continue;
4005 #if defined(PETSC_USE_DEBUG)
4006     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
4007 #endif
4008     if (im[i] >= rstart && im[i] < rend) {
4009       row = im[i] - rstart;
4010       for (j=0; j<n; j++) {
4011         /* If NumCol = 1 then a copy is not required */
4012         if ((roworiented) && (n == 1)) {
4013           barray = (MatScalar*)v + i*bs2;
4014         } else if ((!roworiented) && (m == 1)) {
4015           barray = (MatScalar*)v + j*bs2;
4016         } else { /* Here a copy is required */
4017           if (roworiented) {
4018             value = v + i*(stepval+bs)*bs + j*bs;
4019           } else {
4020             value = v + j*(stepval+bs)*bs + i*bs;
4021           }
4022           for (ii=0; ii<bs; ii++,value+=stepval) {
4023             for (jj=0; jj<bs; jj++) {
4024               *barray++ = *value++;
4025             }
4026           }
4027           barray -=bs2;
4028         }
4029 
4030         if (in[j] >= cstart && in[j] < cend) {
4031           col  = in[j] - cstart;
4032           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4033         } else if (in[j] < 0) continue;
4034 #if defined(PETSC_USE_DEBUG)
4035         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
4036 #endif
4037         else {
4038           if (mat->was_assembled) {
4039             if (!baij->colmap) {
4040               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
4041             }
4042 
4043 #if defined(PETSC_USE_DEBUG)
4044 #if defined(PETSC_USE_CTABLE)
4045             { PetscInt data;
4046               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
4047               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4048             }
4049 #else
4050             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4051 #endif
4052 #endif
4053 #if defined(PETSC_USE_CTABLE)
4054             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4055             col  = (col - 1)/bs;
4056 #else
4057             col = (baij->colmap[in[j]] - 1)/bs;
4058 #endif
4059             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
4060               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
4061               col  =  in[j];
4062             }
4063           } else col = in[j];
4064           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4065         }
4066       }
4067     } else {
4068       if (!baij->donotstash) {
4069         if (roworiented) {
4070           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4071         } else {
4072           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4073         }
4074       }
4075     }
4076   }
4077 
4078   /* task normally handled by MatSetValuesBlocked() */
4079   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
4080   PetscFunctionReturn(0);
4081 }
4082 
4083 #undef __FUNCT__
4084 #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
4085 /*@
4086      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
4087          CSR format the local rows.
4088 
4089    Collective on MPI_Comm
4090 
4091    Input Parameters:
4092 +  comm - MPI communicator
4093 .  bs - the block size, only a block size of 1 is supported
4094 .  m - number of local rows (Cannot be PETSC_DECIDE)
4095 .  n - This value should be the same as the local size used in creating the
4096        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4097        calculated if N is given) For square matrices n is almost always m.
4098 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4099 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4100 .   i - row indices
4101 .   j - column indices
4102 -   a - matrix values
4103 
4104    Output Parameter:
4105 .   mat - the matrix
4106 
4107    Level: intermediate
4108 
4109    Notes:
4110        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4111      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4112      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4113 
4114        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4115 
4116 .keywords: matrix, aij, compressed row, sparse, parallel
4117 
4118 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4119           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4120 @*/
4121 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4122 {
4123   PetscErrorCode ierr;
4124 
4125   PetscFunctionBegin;
4126   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4127   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4128   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4129   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4130   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
4131   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
4132   PetscFunctionReturn(0);
4133 }
4134