xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision c4762a1b19cd2af06abeed90e8f9d34fb975dd94)
1 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
2 
3 #include <petsc/private/hashseti.h>
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 #if defined(PETSC_HAVE_HYPRE)
8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
9 #endif
10 
11 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
12 {
13   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
14   PetscErrorCode ierr;
15   PetscInt       i,*idxb = 0;
16   PetscScalar    *va,*vb;
17   Vec            vtmp;
18 
19   PetscFunctionBegin;
20   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
21   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
22   if (idx) {
23     for (i=0; i<A->rmap->n; i++) {
24       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
25     }
26   }
27 
28   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
29   if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);}
30   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
31   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
32 
33   for (i=0; i<A->rmap->n; i++) {
34     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
35       va[i] = vb[i];
36       if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);
37     }
38   }
39 
40   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
41   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
42   ierr = PetscFree(idxb);CHKERRQ(ierr);
43   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
44   PetscFunctionReturn(0);
45 }
46 
47 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
48 {
49   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
54   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 
58 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
59 {
60   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
61   PetscErrorCode ierr;
62 
63   PetscFunctionBegin;
64   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
65   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
66   PetscFunctionReturn(0);
67 }
68 
69 /*
70      Local utility routine that creates a mapping from the global column
71    number to the local number in the off-diagonal part of the local
72    storage of the matrix.  This is done in a non scalable way since the
73    length of colmap equals the global matrix length.
74 */
75 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
76 {
77   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
78   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
79   PetscErrorCode ierr;
80   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
81 
82   PetscFunctionBegin;
83 #if defined(PETSC_USE_CTABLE)
84   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
85   for (i=0; i<nbs; i++) {
86     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
87   }
88 #else
89   ierr = PetscCalloc1(baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
90   ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
91   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
92 #endif
93   PetscFunctionReturn(0);
94 }
95 
96 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol)       \
97   { \
98     brow = row/bs;  \
99     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
100     rmax = aimax[brow]; nrow = ailen[brow]; \
101     bcol = col/bs; \
102     ridx = row % bs; cidx = col % bs; \
103     low  = 0; high = nrow; \
104     while (high-low > 3) { \
105       t = (low+high)/2; \
106       if (rp[t] > bcol) high = t; \
107       else              low  = t; \
108     } \
109     for (_i=low; _i<high; _i++) { \
110       if (rp[_i] > bcol) break; \
111       if (rp[_i] == bcol) { \
112         bap = ap +  bs2*_i + bs*cidx + ridx; \
113         if (addv == ADD_VALUES) *bap += value;  \
114         else                    *bap  = value;  \
115         goto a_noinsert; \
116       } \
117     } \
118     if (a->nonew == 1) goto a_noinsert; \
119     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
120     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
121     N = nrow++ - 1;  \
122     /* shift up all the later entries in this row */ \
123     ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\
124     ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \
125     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr);  \
126     rp[_i]                      = bcol;  \
127     ap[bs2*_i + bs*cidx + ridx] = value;  \
128 a_noinsert:; \
129     ailen[brow] = nrow; \
130   }
131 
132 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol)       \
133   { \
134     brow = row/bs;  \
135     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
136     rmax = bimax[brow]; nrow = bilen[brow]; \
137     bcol = col/bs; \
138     ridx = row % bs; cidx = col % bs; \
139     low  = 0; high = nrow; \
140     while (high-low > 3) { \
141       t = (low+high)/2; \
142       if (rp[t] > bcol) high = t; \
143       else              low  = t; \
144     } \
145     for (_i=low; _i<high; _i++) { \
146       if (rp[_i] > bcol) break; \
147       if (rp[_i] == bcol) { \
148         bap = ap +  bs2*_i + bs*cidx + ridx; \
149         if (addv == ADD_VALUES) *bap += value;  \
150         else                    *bap  = value;  \
151         goto b_noinsert; \
152       } \
153     } \
154     if (b->nonew == 1) goto b_noinsert; \
155     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column  (%D, %D) into matrix", orow, ocol); \
156     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
157     N = nrow++ - 1;  \
158     /* shift up all the later entries in this row */ \
159     ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\
160     ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr);\
161     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr);  \
162     rp[_i]                      = bcol;  \
163     ap[bs2*_i + bs*cidx + ridx] = value;  \
164 b_noinsert:; \
165     bilen[brow] = nrow; \
166   }
167 
168 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
169 {
170   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
171   MatScalar      value;
172   PetscBool      roworiented = baij->roworiented;
173   PetscErrorCode ierr;
174   PetscInt       i,j,row,col;
175   PetscInt       rstart_orig=mat->rmap->rstart;
176   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
177   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
178 
179   /* Some Variables required in the macro */
180   Mat         A     = baij->A;
181   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
182   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
183   MatScalar   *aa   =a->a;
184 
185   Mat         B     = baij->B;
186   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
187   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
188   MatScalar   *ba   =b->a;
189 
190   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
191   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
192   MatScalar *ap,*bap;
193 
194   PetscFunctionBegin;
195   for (i=0; i<m; i++) {
196     if (im[i] < 0) continue;
197 #if defined(PETSC_USE_DEBUG)
198     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
199 #endif
200     if (im[i] >= rstart_orig && im[i] < rend_orig) {
201       row = im[i] - rstart_orig;
202       for (j=0; j<n; j++) {
203         if (in[j] >= cstart_orig && in[j] < cend_orig) {
204           col = in[j] - cstart_orig;
205           if (roworiented) value = v[i*n+j];
206           else             value = v[i+j*m];
207           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
208         } else if (in[j] < 0) continue;
209 #if defined(PETSC_USE_DEBUG)
210         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
211 #endif
212         else {
213           if (mat->was_assembled) {
214             if (!baij->colmap) {
215               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
216             }
217 #if defined(PETSC_USE_CTABLE)
218             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
219             col  = col - 1;
220 #else
221             col = baij->colmap[in[j]/bs] - 1;
222 #endif
223             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
224               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
225               col  =  in[j];
226               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
227               B    = baij->B;
228               b    = (Mat_SeqBAIJ*)(B)->data;
229               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
230               ba   =b->a;
231             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
232             else col += in[j]%bs;
233           } else col = in[j];
234           if (roworiented) value = v[i*n+j];
235           else             value = v[i+j*m];
236           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
237           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
238         }
239       }
240     } else {
241       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
242       if (!baij->donotstash) {
243         mat->assembled = PETSC_FALSE;
244         if (roworiented) {
245           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
246         } else {
247           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
248         }
249       }
250     }
251   }
252   PetscFunctionReturn(0);
253 }
254 
255 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
256 {
257   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
258   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
259   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
260   PetscErrorCode    ierr;
261   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
262   PetscBool         roworiented=a->roworiented;
263   const PetscScalar *value     = v;
264   MatScalar         *ap,*aa = a->a,*bap;
265 
266   PetscFunctionBegin;
267   rp   = aj + ai[row];
268   ap   = aa + bs2*ai[row];
269   rmax = imax[row];
270   nrow = ailen[row];
271   value = v;
272   low = 0;
273   high = nrow;
274   while (high-low > 7) {
275     t = (low+high)/2;
276     if (rp[t] > col) high = t;
277     else             low  = t;
278   }
279   for (i=low; i<high; i++) {
280     if (rp[i] > col) break;
281     if (rp[i] == col) {
282       bap = ap +  bs2*i;
283       if (roworiented) {
284         if (is == ADD_VALUES) {
285           for (ii=0; ii<bs; ii++) {
286             for (jj=ii; jj<bs2; jj+=bs) {
287               bap[jj] += *value++;
288             }
289           }
290         } else {
291           for (ii=0; ii<bs; ii++) {
292             for (jj=ii; jj<bs2; jj+=bs) {
293               bap[jj] = *value++;
294             }
295           }
296         }
297       } else {
298         if (is == ADD_VALUES) {
299           for (ii=0; ii<bs; ii++,value+=bs) {
300             for (jj=0; jj<bs; jj++) {
301               bap[jj] += value[jj];
302             }
303             bap += bs;
304           }
305         } else {
306           for (ii=0; ii<bs; ii++,value+=bs) {
307             for (jj=0; jj<bs; jj++) {
308               bap[jj]  = value[jj];
309             }
310             bap += bs;
311           }
312         }
313       }
314       goto noinsert2;
315     }
316   }
317   if (nonew == 1) goto noinsert2;
318   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
319   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
320   N = nrow++ - 1; high++;
321   /* shift up all the later entries in this row */
322   ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
323   ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
324   rp[i] = col;
325   bap   = ap +  bs2*i;
326   if (roworiented) {
327     for (ii=0; ii<bs; ii++) {
328       for (jj=ii; jj<bs2; jj+=bs) {
329         bap[jj] = *value++;
330       }
331     }
332   } else {
333     for (ii=0; ii<bs; ii++) {
334       for (jj=0; jj<bs; jj++) {
335         *bap++ = *value++;
336       }
337     }
338   }
339   noinsert2:;
340   ailen[row] = nrow;
341   PetscFunctionReturn(0);
342 }
343 
344 /*
345     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
346     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
347 */
348 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
349 {
350   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
351   const PetscScalar *value;
352   MatScalar         *barray     = baij->barray;
353   PetscBool         roworiented = baij->roworiented;
354   PetscErrorCode    ierr;
355   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
356   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
357   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
358 
359   PetscFunctionBegin;
360   if (!barray) {
361     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
362     baij->barray = barray;
363   }
364 
365   if (roworiented) stepval = (n-1)*bs;
366   else stepval = (m-1)*bs;
367 
368   for (i=0; i<m; i++) {
369     if (im[i] < 0) continue;
370 #if defined(PETSC_USE_DEBUG)
371     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
372 #endif
373     if (im[i] >= rstart && im[i] < rend) {
374       row = im[i] - rstart;
375       for (j=0; j<n; j++) {
376         /* If NumCol = 1 then a copy is not required */
377         if ((roworiented) && (n == 1)) {
378           barray = (MatScalar*)v + i*bs2;
379         } else if ((!roworiented) && (m == 1)) {
380           barray = (MatScalar*)v + j*bs2;
381         } else { /* Here a copy is required */
382           if (roworiented) {
383             value = v + (i*(stepval+bs) + j)*bs;
384           } else {
385             value = v + (j*(stepval+bs) + i)*bs;
386           }
387           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
388             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
389             barray += bs;
390           }
391           barray -= bs2;
392         }
393 
394         if (in[j] >= cstart && in[j] < cend) {
395           col  = in[j] - cstart;
396           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
397         } else if (in[j] < 0) continue;
398 #if defined(PETSC_USE_DEBUG)
399         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
400 #endif
401         else {
402           if (mat->was_assembled) {
403             if (!baij->colmap) {
404               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
405             }
406 
407 #if defined(PETSC_USE_DEBUG)
408 #if defined(PETSC_USE_CTABLE)
409             { PetscInt data;
410               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
411               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
412             }
413 #else
414             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
415 #endif
416 #endif
417 #if defined(PETSC_USE_CTABLE)
418             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
419             col  = (col - 1)/bs;
420 #else
421             col = (baij->colmap[in[j]] - 1)/bs;
422 #endif
423             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
424               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
425               col  =  in[j];
426             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%D, %D) into matrix",im[i],in[j]);
427           } else col = in[j];
428           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
429         }
430       }
431     } else {
432       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
433       if (!baij->donotstash) {
434         if (roworiented) {
435           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
436         } else {
437           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
438         }
439       }
440     }
441   }
442   PetscFunctionReturn(0);
443 }
444 
445 #define HASH_KEY 0.6180339887
446 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
447 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
448 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
449 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
450 {
451   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
452   PetscBool      roworiented = baij->roworiented;
453   PetscErrorCode ierr;
454   PetscInt       i,j,row,col;
455   PetscInt       rstart_orig=mat->rmap->rstart;
456   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
457   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
458   PetscReal      tmp;
459   MatScalar      **HD = baij->hd,value;
460 #if defined(PETSC_USE_DEBUG)
461   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
462 #endif
463 
464   PetscFunctionBegin;
465   for (i=0; i<m; i++) {
466 #if defined(PETSC_USE_DEBUG)
467     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
468     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
469 #endif
470     row = im[i];
471     if (row >= rstart_orig && row < rend_orig) {
472       for (j=0; j<n; j++) {
473         col = in[j];
474         if (roworiented) value = v[i*n+j];
475         else             value = v[i+j*m];
476         /* Look up PetscInto the Hash Table */
477         key = (row/bs)*Nbs+(col/bs)+1;
478         h1  = HASH(size,key,tmp);
479 
480 
481         idx = h1;
482 #if defined(PETSC_USE_DEBUG)
483         insert_ct++;
484         total_ct++;
485         if (HT[idx] != key) {
486           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
487           if (idx == size) {
488             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
489             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
490           }
491         }
492 #else
493         if (HT[idx] != key) {
494           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
495           if (idx == size) {
496             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
497             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
498           }
499         }
500 #endif
501         /* A HASH table entry is found, so insert the values at the correct address */
502         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
503         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
504       }
505     } else if (!baij->donotstash) {
506       if (roworiented) {
507         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
508       } else {
509         ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
510       }
511     }
512   }
513 #if defined(PETSC_USE_DEBUG)
514   baij->ht_total_ct  += total_ct;
515   baij->ht_insert_ct += insert_ct;
516 #endif
517   PetscFunctionReturn(0);
518 }
519 
520 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
521 {
522   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
523   PetscBool         roworiented = baij->roworiented;
524   PetscErrorCode    ierr;
525   PetscInt          i,j,ii,jj,row,col;
526   PetscInt          rstart=baij->rstartbs;
527   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
528   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
529   PetscReal         tmp;
530   MatScalar         **HD = baij->hd,*baij_a;
531   const PetscScalar *v_t,*value;
532 #if defined(PETSC_USE_DEBUG)
533   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
534 #endif
535 
536   PetscFunctionBegin;
537   if (roworiented) stepval = (n-1)*bs;
538   else stepval = (m-1)*bs;
539 
540   for (i=0; i<m; i++) {
541 #if defined(PETSC_USE_DEBUG)
542     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
543     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
544 #endif
545     row = im[i];
546     v_t = v + i*nbs2;
547     if (row >= rstart && row < rend) {
548       for (j=0; j<n; j++) {
549         col = in[j];
550 
551         /* Look up into the Hash Table */
552         key = row*Nbs+col+1;
553         h1  = HASH(size,key,tmp);
554 
555         idx = h1;
556 #if defined(PETSC_USE_DEBUG)
557         total_ct++;
558         insert_ct++;
559         if (HT[idx] != key) {
560           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
561           if (idx == size) {
562             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
563             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
564           }
565         }
566 #else
567         if (HT[idx] != key) {
568           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
569           if (idx == size) {
570             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
571             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
572           }
573         }
574 #endif
575         baij_a = HD[idx];
576         if (roworiented) {
577           /*value = v + i*(stepval+bs)*bs + j*bs;*/
578           /* value = v + (i*(stepval+bs)+j)*bs; */
579           value = v_t;
580           v_t  += bs;
581           if (addv == ADD_VALUES) {
582             for (ii=0; ii<bs; ii++,value+=stepval) {
583               for (jj=ii; jj<bs2; jj+=bs) {
584                 baij_a[jj] += *value++;
585               }
586             }
587           } else {
588             for (ii=0; ii<bs; ii++,value+=stepval) {
589               for (jj=ii; jj<bs2; jj+=bs) {
590                 baij_a[jj] = *value++;
591               }
592             }
593           }
594         } else {
595           value = v + j*(stepval+bs)*bs + i*bs;
596           if (addv == ADD_VALUES) {
597             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
598               for (jj=0; jj<bs; jj++) {
599                 baij_a[jj] += *value++;
600               }
601             }
602           } else {
603             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
604               for (jj=0; jj<bs; jj++) {
605                 baij_a[jj] = *value++;
606               }
607             }
608           }
609         }
610       }
611     } else {
612       if (!baij->donotstash) {
613         if (roworiented) {
614           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
615         } else {
616           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
617         }
618       }
619     }
620   }
621 #if defined(PETSC_USE_DEBUG)
622   baij->ht_total_ct  += total_ct;
623   baij->ht_insert_ct += insert_ct;
624 #endif
625   PetscFunctionReturn(0);
626 }
627 
628 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
629 {
630   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
631   PetscErrorCode ierr;
632   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
633   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
634 
635   PetscFunctionBegin;
636   for (i=0; i<m; i++) {
637     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
638     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
639     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
640       row = idxm[i] - bsrstart;
641       for (j=0; j<n; j++) {
642         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
643         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
644         if (idxn[j] >= bscstart && idxn[j] < bscend) {
645           col  = idxn[j] - bscstart;
646           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
647         } else {
648           if (!baij->colmap) {
649             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
650           }
651 #if defined(PETSC_USE_CTABLE)
652           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
653           data--;
654 #else
655           data = baij->colmap[idxn[j]/bs]-1;
656 #endif
657           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
658           else {
659             col  = data + idxn[j]%bs;
660             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
661           }
662         }
663       }
664     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
665   }
666   PetscFunctionReturn(0);
667 }
668 
669 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
670 {
671   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
672   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
673   PetscErrorCode ierr;
674   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
675   PetscReal      sum = 0.0;
676   MatScalar      *v;
677 
678   PetscFunctionBegin;
679   if (baij->size == 1) {
680     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
681   } else {
682     if (type == NORM_FROBENIUS) {
683       v  = amat->a;
684       nz = amat->nz*bs2;
685       for (i=0; i<nz; i++) {
686         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
687       }
688       v  = bmat->a;
689       nz = bmat->nz*bs2;
690       for (i=0; i<nz; i++) {
691         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
692       }
693       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
694       *nrm = PetscSqrtReal(*nrm);
695     } else if (type == NORM_1) { /* max column sum */
696       PetscReal *tmp,*tmp2;
697       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
698       ierr = PetscCalloc1(mat->cmap->N,&tmp);CHKERRQ(ierr);
699       ierr = PetscMalloc1(mat->cmap->N,&tmp2);CHKERRQ(ierr);
700       v    = amat->a; jj = amat->j;
701       for (i=0; i<amat->nz; i++) {
702         for (j=0; j<bs; j++) {
703           col = bs*(cstart + *jj) + j; /* column index */
704           for (row=0; row<bs; row++) {
705             tmp[col] += PetscAbsScalar(*v);  v++;
706           }
707         }
708         jj++;
709       }
710       v = bmat->a; jj = bmat->j;
711       for (i=0; i<bmat->nz; i++) {
712         for (j=0; j<bs; j++) {
713           col = bs*garray[*jj] + j;
714           for (row=0; row<bs; row++) {
715             tmp[col] += PetscAbsScalar(*v); v++;
716           }
717         }
718         jj++;
719       }
720       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
721       *nrm = 0.0;
722       for (j=0; j<mat->cmap->N; j++) {
723         if (tmp2[j] > *nrm) *nrm = tmp2[j];
724       }
725       ierr = PetscFree(tmp);CHKERRQ(ierr);
726       ierr = PetscFree(tmp2);CHKERRQ(ierr);
727     } else if (type == NORM_INFINITY) { /* max row sum */
728       PetscReal *sums;
729       ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr);
730       sum  = 0.0;
731       for (j=0; j<amat->mbs; j++) {
732         for (row=0; row<bs; row++) sums[row] = 0.0;
733         v  = amat->a + bs2*amat->i[j];
734         nz = amat->i[j+1]-amat->i[j];
735         for (i=0; i<nz; i++) {
736           for (col=0; col<bs; col++) {
737             for (row=0; row<bs; row++) {
738               sums[row] += PetscAbsScalar(*v); v++;
739             }
740           }
741         }
742         v  = bmat->a + bs2*bmat->i[j];
743         nz = bmat->i[j+1]-bmat->i[j];
744         for (i=0; i<nz; i++) {
745           for (col=0; col<bs; col++) {
746             for (row=0; row<bs; row++) {
747               sums[row] += PetscAbsScalar(*v); v++;
748             }
749           }
750         }
751         for (row=0; row<bs; row++) {
752           if (sums[row] > sum) sum = sums[row];
753         }
754       }
755       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
756       ierr = PetscFree(sums);CHKERRQ(ierr);
757     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
758   }
759   PetscFunctionReturn(0);
760 }
761 
762 /*
763   Creates the hash table, and sets the table
764   This table is created only once.
765   If new entried need to be added to the matrix
766   then the hash table has to be destroyed and
767   recreated.
768 */
769 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
770 {
771   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
772   Mat            A     = baij->A,B=baij->B;
773   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
774   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
775   PetscErrorCode ierr;
776   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
777   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
778   PetscInt       *HT,key;
779   MatScalar      **HD;
780   PetscReal      tmp;
781 #if defined(PETSC_USE_INFO)
782   PetscInt ct=0,max=0;
783 #endif
784 
785   PetscFunctionBegin;
786   if (baij->ht) PetscFunctionReturn(0);
787 
788   baij->ht_size = (PetscInt)(factor*nz);
789   ht_size       = baij->ht_size;
790 
791   /* Allocate Memory for Hash Table */
792   ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr);
793   HD   = baij->hd;
794   HT   = baij->ht;
795 
796   /* Loop Over A */
797   for (i=0; i<a->mbs; i++) {
798     for (j=ai[i]; j<ai[i+1]; j++) {
799       row = i+rstart;
800       col = aj[j]+cstart;
801 
802       key = row*Nbs + col + 1;
803       h1  = HASH(ht_size,key,tmp);
804       for (k=0; k<ht_size; k++) {
805         if (!HT[(h1+k)%ht_size]) {
806           HT[(h1+k)%ht_size] = key;
807           HD[(h1+k)%ht_size] = a->a + j*bs2;
808           break;
809 #if defined(PETSC_USE_INFO)
810         } else {
811           ct++;
812 #endif
813         }
814       }
815 #if defined(PETSC_USE_INFO)
816       if (k> max) max = k;
817 #endif
818     }
819   }
820   /* Loop Over B */
821   for (i=0; i<b->mbs; i++) {
822     for (j=bi[i]; j<bi[i+1]; j++) {
823       row = i+rstart;
824       col = garray[bj[j]];
825       key = row*Nbs + col + 1;
826       h1  = HASH(ht_size,key,tmp);
827       for (k=0; k<ht_size; k++) {
828         if (!HT[(h1+k)%ht_size]) {
829           HT[(h1+k)%ht_size] = key;
830           HD[(h1+k)%ht_size] = b->a + j*bs2;
831           break;
832 #if defined(PETSC_USE_INFO)
833         } else {
834           ct++;
835 #endif
836         }
837       }
838 #if defined(PETSC_USE_INFO)
839       if (k> max) max = k;
840 #endif
841     }
842   }
843 
844   /* Print Summary */
845 #if defined(PETSC_USE_INFO)
846   for (i=0,j=0; i<ht_size; i++) {
847     if (HT[i]) j++;
848   }
849   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
850 #endif
851   PetscFunctionReturn(0);
852 }
853 
854 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
855 {
856   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
857   PetscErrorCode ierr;
858   PetscInt       nstash,reallocs;
859 
860   PetscFunctionBegin;
861   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
862 
863   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
864   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
865   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
866   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
867   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
868   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
869   PetscFunctionReturn(0);
870 }
871 
872 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
873 {
874   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
875   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
876   PetscErrorCode ierr;
877   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
878   PetscInt       *row,*col;
879   PetscBool      r1,r2,r3,other_disassembled;
880   MatScalar      *val;
881   PetscMPIInt    n;
882 
883   PetscFunctionBegin;
884   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
885   if (!baij->donotstash && !mat->nooffprocentries) {
886     while (1) {
887       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
888       if (!flg) break;
889 
890       for (i=0; i<n;) {
891         /* Now identify the consecutive vals belonging to the same row */
892         for (j=i,rstart=row[j]; j<n; j++) {
893           if (row[j] != rstart) break;
894         }
895         if (j < n) ncols = j-i;
896         else       ncols = n-i;
897         /* Now assemble all these values with a single function call */
898         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
899         i    = j;
900       }
901     }
902     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
903     /* Now process the block-stash. Since the values are stashed column-oriented,
904        set the roworiented flag to column oriented, and after MatSetValues()
905        restore the original flags */
906     r1 = baij->roworiented;
907     r2 = a->roworiented;
908     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
909 
910     baij->roworiented = PETSC_FALSE;
911     a->roworiented    = PETSC_FALSE;
912 
913     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
914     while (1) {
915       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
916       if (!flg) break;
917 
918       for (i=0; i<n;) {
919         /* Now identify the consecutive vals belonging to the same row */
920         for (j=i,rstart=row[j]; j<n; j++) {
921           if (row[j] != rstart) break;
922         }
923         if (j < n) ncols = j-i;
924         else       ncols = n-i;
925         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
926         i    = j;
927       }
928     }
929     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
930 
931     baij->roworiented = r1;
932     a->roworiented    = r2;
933 
934     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
935   }
936 
937   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
938   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
939 
940   /* determine if any processor has disassembled, if so we must
941      also disassemble ourselfs, in order that we may reassemble. */
942   /*
943      if nonzero structure of submatrix B cannot change then we know that
944      no processor disassembled thus we can skip this stuff
945   */
946   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
947     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
948     if (mat->was_assembled && !other_disassembled) {
949       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
950     }
951   }
952 
953   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
954     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
955   }
956   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
957   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
958 
959 #if defined(PETSC_USE_INFO)
960   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
961     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",(double)((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
962 
963     baij->ht_total_ct  = 0;
964     baij->ht_insert_ct = 0;
965   }
966 #endif
967   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
968     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
969 
970     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
971     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
972   }
973 
974   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
975 
976   baij->rowvalues = 0;
977 
978   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
979   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
980     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
981     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
982   }
983   PetscFunctionReturn(0);
984 }
985 
986 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer);
987 #include <petscdraw.h>
988 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
989 {
990   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
991   PetscErrorCode    ierr;
992   PetscMPIInt       rank = baij->rank;
993   PetscInt          bs   = mat->rmap->bs;
994   PetscBool         iascii,isdraw;
995   PetscViewer       sviewer;
996   PetscViewerFormat format;
997 
998   PetscFunctionBegin;
999   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1000   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1001   if (iascii) {
1002     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1003     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1004       MatInfo info;
1005       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1006       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1007       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1008       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",
1009                                                 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr);
1010       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1011       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1012       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1013       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1014       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1015       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1016       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1017       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1018       PetscFunctionReturn(0);
1019     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1020       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1021       PetscFunctionReturn(0);
1022     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1023       PetscFunctionReturn(0);
1024     }
1025   }
1026 
1027   if (isdraw) {
1028     PetscDraw draw;
1029     PetscBool isnull;
1030     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1031     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1032     if (isnull) PetscFunctionReturn(0);
1033   }
1034 
1035   {
1036     /* assemble the entire matrix onto first processor. */
1037     Mat         A;
1038     Mat_SeqBAIJ *Aloc;
1039     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1040     MatScalar   *a;
1041     const char  *matname;
1042 
1043     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1044     /* Perhaps this should be the type of mat? */
1045     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1046     if (!rank) {
1047       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1048     } else {
1049       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1050     }
1051     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1052     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
1053     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1054     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1055 
1056     /* copy over the A part */
1057     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1058     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1059     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1060 
1061     for (i=0; i<mbs; i++) {
1062       rvals[0] = bs*(baij->rstartbs + i);
1063       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1064       for (j=ai[i]; j<ai[i+1]; j++) {
1065         col = (baij->cstartbs+aj[j])*bs;
1066         for (k=0; k<bs; k++) {
1067           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1068           col++; a += bs;
1069         }
1070       }
1071     }
1072     /* copy over the B part */
1073     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1074     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1075     for (i=0; i<mbs; i++) {
1076       rvals[0] = bs*(baij->rstartbs + i);
1077       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1078       for (j=ai[i]; j<ai[i+1]; j++) {
1079         col = baij->garray[aj[j]]*bs;
1080         for (k=0; k<bs; k++) {
1081           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1082           col++; a += bs;
1083         }
1084       }
1085     }
1086     ierr = PetscFree(rvals);CHKERRQ(ierr);
1087     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1088     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1089     /*
1090        Everyone has to call to draw the matrix since the graphics waits are
1091        synchronized across all processors that share the PetscDraw object
1092     */
1093     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1094     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
1095     if (!rank) {
1096       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
1097       ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1098     }
1099     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1100     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1101     ierr = MatDestroy(&A);CHKERRQ(ierr);
1102   }
1103   PetscFunctionReturn(0);
1104 }
1105 
1106 /* Used for both MPIBAIJ and MPISBAIJ matrices */
1107 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1108 {
1109   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
1110   Mat_SeqBAIJ    *A   = (Mat_SeqBAIJ*)aij->A->data;
1111   Mat_SeqBAIJ    *B   = (Mat_SeqBAIJ*)aij->B->data;
1112   const PetscInt *garray = aij->garray;
1113   PetscInt       header[4],M,N,m,rs,cs,bs,nz,cnt,i,j,ja,jb,k,l;
1114   PetscInt       *rowlens,*colidxs;
1115   PetscScalar    *matvals;
1116   PetscErrorCode ierr;
1117 
1118   PetscFunctionBegin;
1119   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1120 
1121   M  = mat->rmap->N;
1122   N  = mat->cmap->N;
1123   m  = mat->rmap->n;
1124   rs = mat->rmap->rstart;
1125   cs = mat->cmap->rstart;
1126   bs = mat->rmap->bs;
1127   nz = bs*bs*(A->nz + B->nz);
1128 
1129   /* write matrix header */
1130   header[0] = MAT_FILE_CLASSID;
1131   header[1] = M; header[2] = N; header[3] = nz;
1132   ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1133   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1134 
1135   /* fill in and store row lengths */
1136   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1137   for (cnt=0, i=0; i<A->mbs; i++)
1138     for (j=0; j<bs; j++)
1139       rowlens[cnt++] = bs*(A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i]);
1140   ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr);
1141   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1142 
1143   /* fill in and store column indices */
1144   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1145   for (cnt=0, i=0; i<A->mbs; i++) {
1146     for (k=0; k<bs; k++) {
1147       for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1148         if (garray[B->j[jb]] > cs/bs) break;
1149         for (l=0; l<bs; l++)
1150           colidxs[cnt++] = bs*garray[B->j[jb]] + l;
1151       }
1152       for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1153         for (l=0; l<bs; l++)
1154           colidxs[cnt++] = bs*A->j[ja] + l + cs;
1155       for (; jb<B->i[i+1]; jb++)
1156         for (l=0; l<bs; l++)
1157           colidxs[cnt++] = bs*garray[B->j[jb]] + l;
1158     }
1159   }
1160   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1161   ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_INT);CHKERRQ(ierr);
1162   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1163 
1164   /* fill in and store nonzero values */
1165   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1166   for (cnt=0, i=0; i<A->mbs; i++) {
1167     for (k=0; k<bs; k++) {
1168       for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1169         if (garray[B->j[jb]] > cs/bs) break;
1170         for (l=0; l<bs; l++)
1171           matvals[cnt++] = B->a[bs*(bs*jb + l) + k];
1172       }
1173       for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1174         for (l=0; l<bs; l++)
1175           matvals[cnt++] = A->a[bs*(bs*ja + l) + k];
1176       for (; jb<B->i[i+1]; jb++)
1177         for (l=0; l<bs; l++)
1178           matvals[cnt++] = B->a[bs*(bs*jb + l) + k];;
1179     }
1180   }
1181   ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DECIDE,PETSC_DECIDE,PETSC_SCALAR);CHKERRQ(ierr);
1182   ierr = PetscFree(matvals);CHKERRQ(ierr);
1183 
1184   /* write block size option to the viewer's .info file */
1185   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1186   PetscFunctionReturn(0);
1187 }
1188 
1189 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1190 {
1191   PetscErrorCode ierr;
1192   PetscBool      iascii,isdraw,issocket,isbinary;
1193 
1194   PetscFunctionBegin;
1195   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1196   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1197   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1198   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1199   if (iascii || isdraw || issocket) {
1200     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1201   } else if (isbinary) {
1202     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1203   }
1204   PetscFunctionReturn(0);
1205 }
1206 
1207 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1208 {
1209   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1210   PetscErrorCode ierr;
1211 
1212   PetscFunctionBegin;
1213 #if defined(PETSC_USE_LOG)
1214   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1215 #endif
1216   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1217   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1218   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1219   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1220 #if defined(PETSC_USE_CTABLE)
1221   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1222 #else
1223   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1224 #endif
1225   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1226   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1227   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1228   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1229   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1230   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1231   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1232   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1233 
1234   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1235   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1236   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1237   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1238   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1239   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1240   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr);
1241   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1242   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr);
1243 #if defined(PETSC_HAVE_HYPRE)
1244   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_hypre_C",NULL);CHKERRQ(ierr);
1245 #endif
1246   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_is_C",NULL);CHKERRQ(ierr);
1247   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpibaij_C",NULL);CHKERRQ(ierr);
1248   PetscFunctionReturn(0);
1249 }
1250 
1251 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1252 {
1253   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1254   PetscErrorCode ierr;
1255   PetscInt       nt;
1256 
1257   PetscFunctionBegin;
1258   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1259   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1260   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1261   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1262   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1263   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1264   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1265   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1266   PetscFunctionReturn(0);
1267 }
1268 
1269 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1270 {
1271   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1272   PetscErrorCode ierr;
1273 
1274   PetscFunctionBegin;
1275   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1276   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1277   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1278   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1279   PetscFunctionReturn(0);
1280 }
1281 
1282 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1283 {
1284   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1285   PetscErrorCode ierr;
1286 
1287   PetscFunctionBegin;
1288   /* do nondiagonal part */
1289   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1290   /* do local part */
1291   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1292   /* add partial results together */
1293   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1294   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1295   PetscFunctionReturn(0);
1296 }
1297 
1298 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1299 {
1300   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1301   PetscErrorCode ierr;
1302 
1303   PetscFunctionBegin;
1304   /* do nondiagonal part */
1305   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1306   /* do local part */
1307   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1308   /* add partial results together */
1309   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1310   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1311   PetscFunctionReturn(0);
1312 }
1313 
1314 /*
1315   This only works correctly for square matrices where the subblock A->A is the
1316    diagonal block
1317 */
1318 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1319 {
1320   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1321   PetscErrorCode ierr;
1322 
1323   PetscFunctionBegin;
1324   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1325   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1326   PetscFunctionReturn(0);
1327 }
1328 
1329 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1330 {
1331   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1332   PetscErrorCode ierr;
1333 
1334   PetscFunctionBegin;
1335   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1336   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1337   PetscFunctionReturn(0);
1338 }
1339 
1340 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1341 {
1342   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1343   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1344   PetscErrorCode ierr;
1345   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1346   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1347   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1348 
1349   PetscFunctionBegin;
1350   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1351   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1352   mat->getrowactive = PETSC_TRUE;
1353 
1354   if (!mat->rowvalues && (idx || v)) {
1355     /*
1356         allocate enough space to hold information from the longest row.
1357     */
1358     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1359     PetscInt    max = 1,mbs = mat->mbs,tmp;
1360     for (i=0; i<mbs; i++) {
1361       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1362       if (max < tmp) max = tmp;
1363     }
1364     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1365   }
1366   lrow = row - brstart;
1367 
1368   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1369   if (!v)   {pvA = 0; pvB = 0;}
1370   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1371   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1372   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1373   nztot = nzA + nzB;
1374 
1375   cmap = mat->garray;
1376   if (v  || idx) {
1377     if (nztot) {
1378       /* Sort by increasing column numbers, assuming A and B already sorted */
1379       PetscInt imark = -1;
1380       if (v) {
1381         *v = v_p = mat->rowvalues;
1382         for (i=0; i<nzB; i++) {
1383           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1384           else break;
1385         }
1386         imark = i;
1387         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1388         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1389       }
1390       if (idx) {
1391         *idx = idx_p = mat->rowindices;
1392         if (imark > -1) {
1393           for (i=0; i<imark; i++) {
1394             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1395           }
1396         } else {
1397           for (i=0; i<nzB; i++) {
1398             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1399             else break;
1400           }
1401           imark = i;
1402         }
1403         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1404         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1405       }
1406     } else {
1407       if (idx) *idx = 0;
1408       if (v)   *v   = 0;
1409     }
1410   }
1411   *nz  = nztot;
1412   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1413   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1414   PetscFunctionReturn(0);
1415 }
1416 
1417 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1418 {
1419   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1420 
1421   PetscFunctionBegin;
1422   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1423   baij->getrowactive = PETSC_FALSE;
1424   PetscFunctionReturn(0);
1425 }
1426 
1427 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1428 {
1429   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1430   PetscErrorCode ierr;
1431 
1432   PetscFunctionBegin;
1433   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1434   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1435   PetscFunctionReturn(0);
1436 }
1437 
1438 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1439 {
1440   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1441   Mat            A  = a->A,B = a->B;
1442   PetscErrorCode ierr;
1443   PetscLogDouble isend[5],irecv[5];
1444 
1445   PetscFunctionBegin;
1446   info->block_size = (PetscReal)matin->rmap->bs;
1447 
1448   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1449 
1450   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1451   isend[3] = info->memory;  isend[4] = info->mallocs;
1452 
1453   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1454 
1455   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1456   isend[3] += info->memory;  isend[4] += info->mallocs;
1457 
1458   if (flag == MAT_LOCAL) {
1459     info->nz_used      = isend[0];
1460     info->nz_allocated = isend[1];
1461     info->nz_unneeded  = isend[2];
1462     info->memory       = isend[3];
1463     info->mallocs      = isend[4];
1464   } else if (flag == MAT_GLOBAL_MAX) {
1465     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1466 
1467     info->nz_used      = irecv[0];
1468     info->nz_allocated = irecv[1];
1469     info->nz_unneeded  = irecv[2];
1470     info->memory       = irecv[3];
1471     info->mallocs      = irecv[4];
1472   } else if (flag == MAT_GLOBAL_SUM) {
1473     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1474 
1475     info->nz_used      = irecv[0];
1476     info->nz_allocated = irecv[1];
1477     info->nz_unneeded  = irecv[2];
1478     info->memory       = irecv[3];
1479     info->mallocs      = irecv[4];
1480   } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1481   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1482   info->fill_ratio_needed = 0;
1483   info->factor_mallocs    = 0;
1484   PetscFunctionReturn(0);
1485 }
1486 
1487 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1488 {
1489   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1490   PetscErrorCode ierr;
1491 
1492   PetscFunctionBegin;
1493   switch (op) {
1494   case MAT_NEW_NONZERO_LOCATIONS:
1495   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1496   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1497   case MAT_KEEP_NONZERO_PATTERN:
1498   case MAT_NEW_NONZERO_LOCATION_ERR:
1499     MatCheckPreallocated(A,1);
1500     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1501     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1502     break;
1503   case MAT_ROW_ORIENTED:
1504     MatCheckPreallocated(A,1);
1505     a->roworiented = flg;
1506 
1507     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1508     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1509     break;
1510   case MAT_NEW_DIAGONALS:
1511   case MAT_SORTED_FULL:
1512     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1513     break;
1514   case MAT_IGNORE_OFF_PROC_ENTRIES:
1515     a->donotstash = flg;
1516     break;
1517   case MAT_USE_HASH_TABLE:
1518     a->ht_flag = flg;
1519     a->ht_fact = 1.39;
1520     break;
1521   case MAT_SYMMETRIC:
1522   case MAT_STRUCTURALLY_SYMMETRIC:
1523   case MAT_HERMITIAN:
1524   case MAT_SUBMAT_SINGLEIS:
1525   case MAT_SYMMETRY_ETERNAL:
1526     MatCheckPreallocated(A,1);
1527     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1528     break;
1529   default:
1530     SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1531   }
1532   PetscFunctionReturn(0);
1533 }
1534 
1535 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1536 {
1537   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1538   Mat_SeqBAIJ    *Aloc;
1539   Mat            B;
1540   PetscErrorCode ierr;
1541   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1542   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1543   MatScalar      *a;
1544 
1545   PetscFunctionBegin;
1546   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1547     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1548     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1549     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1550     /* Do not know preallocation information, but must set block size */
1551     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr);
1552   } else {
1553     B = *matout;
1554   }
1555 
1556   /* copy over the A part */
1557   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1558   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1559   ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1560 
1561   for (i=0; i<mbs; i++) {
1562     rvals[0] = bs*(baij->rstartbs + i);
1563     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1564     for (j=ai[i]; j<ai[i+1]; j++) {
1565       col = (baij->cstartbs+aj[j])*bs;
1566       for (k=0; k<bs; k++) {
1567         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1568 
1569         col++; a += bs;
1570       }
1571     }
1572   }
1573   /* copy over the B part */
1574   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1575   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1576   for (i=0; i<mbs; i++) {
1577     rvals[0] = bs*(baij->rstartbs + i);
1578     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1579     for (j=ai[i]; j<ai[i+1]; j++) {
1580       col = baij->garray[aj[j]]*bs;
1581       for (k=0; k<bs; k++) {
1582         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1583         col++;
1584         a += bs;
1585       }
1586     }
1587   }
1588   ierr = PetscFree(rvals);CHKERRQ(ierr);
1589   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1590   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1591 
1592   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1593   else {
1594     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1595   }
1596   PetscFunctionReturn(0);
1597 }
1598 
1599 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1600 {
1601   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1602   Mat            a     = baij->A,b = baij->B;
1603   PetscErrorCode ierr;
1604   PetscInt       s1,s2,s3;
1605 
1606   PetscFunctionBegin;
1607   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1608   if (rr) {
1609     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1610     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1611     /* Overlap communication with computation. */
1612     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1613   }
1614   if (ll) {
1615     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1616     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1617     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1618   }
1619   /* scale  the diagonal block */
1620   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1621 
1622   if (rr) {
1623     /* Do a scatter end and then right scale the off-diagonal block */
1624     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1625     ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1626   }
1627   PetscFunctionReturn(0);
1628 }
1629 
1630 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1631 {
1632   Mat_MPIBAIJ   *l      = (Mat_MPIBAIJ *) A->data;
1633   PetscInt      *lrows;
1634   PetscInt       r, len;
1635   PetscBool      cong;
1636   PetscErrorCode ierr;
1637 
1638   PetscFunctionBegin;
1639   /* get locally owned rows */
1640   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
1641   /* fix right hand side if needed */
1642   if (x && b) {
1643     const PetscScalar *xx;
1644     PetscScalar       *bb;
1645 
1646     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1647     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1648     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
1649     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1650     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1651   }
1652 
1653   /* actually zap the local rows */
1654   /*
1655         Zero the required rows. If the "diagonal block" of the matrix
1656      is square and the user wishes to set the diagonal we use separate
1657      code so that MatSetValues() is not called for each diagonal allocating
1658      new memory, thus calling lots of mallocs and slowing things down.
1659 
1660   */
1661   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1662   ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1663   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1664   if ((diag != 0.0) && cong) {
1665     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr);
1666   } else if (diag != 0.0) {
1667     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr);
1668     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1669        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1670     for (r = 0; r < len; ++r) {
1671       const PetscInt row = lrows[r] + A->rmap->rstart;
1672       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1673     }
1674     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1675     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1676   } else {
1677     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1678   }
1679   ierr = PetscFree(lrows);CHKERRQ(ierr);
1680 
1681   /* only change matrix nonzero state if pattern was allowed to be changed */
1682   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1683     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1684     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1685   }
1686   PetscFunctionReturn(0);
1687 }
1688 
1689 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1690 {
1691   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1692   PetscErrorCode    ierr;
1693   PetscMPIInt       n = A->rmap->n,p = 0;
1694   PetscInt          i,j,k,r,len = 0,row,col,count;
1695   PetscInt          *lrows,*owners = A->rmap->range;
1696   PetscSFNode       *rrows;
1697   PetscSF           sf;
1698   const PetscScalar *xx;
1699   PetscScalar       *bb,*mask;
1700   Vec               xmask,lmask;
1701   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ*)l->B->data;
1702   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1703   PetscScalar       *aa;
1704 
1705   PetscFunctionBegin;
1706   /* Create SF where leaves are input rows and roots are owned rows */
1707   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1708   for (r = 0; r < n; ++r) lrows[r] = -1;
1709   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1710   for (r = 0; r < N; ++r) {
1711     const PetscInt idx   = rows[r];
1712     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1713     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1714       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1715     }
1716     rrows[r].rank  = p;
1717     rrows[r].index = rows[r] - owners[p];
1718   }
1719   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1720   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1721   /* Collect flags for rows to be zeroed */
1722   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1723   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1724   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1725   /* Compress and put in row numbers */
1726   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1727   /* zero diagonal part of matrix */
1728   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1729   /* handle off diagonal part of matrix */
1730   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1731   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1732   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1733   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1734   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1735   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1736   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1737   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1738   if (x) {
1739     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1740     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1741     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1742     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1743   }
1744   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1745   /* remove zeroed rows of off diagonal matrix */
1746   for (i = 0; i < len; ++i) {
1747     row   = lrows[i];
1748     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1749     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1750     for (k = 0; k < count; ++k) {
1751       aa[0] = 0.0;
1752       aa   += bs;
1753     }
1754   }
1755   /* loop over all elements of off process part of matrix zeroing removed columns*/
1756   for (i = 0; i < l->B->rmap->N; ++i) {
1757     row = i/bs;
1758     for (j = baij->i[row]; j < baij->i[row+1]; ++j) {
1759       for (k = 0; k < bs; ++k) {
1760         col = bs*baij->j[j] + k;
1761         if (PetscAbsScalar(mask[col])) {
1762           aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
1763           if (x) bb[i] -= aa[0]*xx[col];
1764           aa[0] = 0.0;
1765         }
1766       }
1767     }
1768   }
1769   if (x) {
1770     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1771     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1772   }
1773   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1774   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1775   ierr = PetscFree(lrows);CHKERRQ(ierr);
1776 
1777   /* only change matrix nonzero state if pattern was allowed to be changed */
1778   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1779     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1780     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1781   }
1782   PetscFunctionReturn(0);
1783 }
1784 
1785 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1786 {
1787   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1788   PetscErrorCode ierr;
1789 
1790   PetscFunctionBegin;
1791   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1792   PetscFunctionReturn(0);
1793 }
1794 
1795 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
1796 
1797 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1798 {
1799   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1800   Mat            a,b,c,d;
1801   PetscBool      flg;
1802   PetscErrorCode ierr;
1803 
1804   PetscFunctionBegin;
1805   a = matA->A; b = matA->B;
1806   c = matB->A; d = matB->B;
1807 
1808   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1809   if (flg) {
1810     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1811   }
1812   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1813   PetscFunctionReturn(0);
1814 }
1815 
1816 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1817 {
1818   PetscErrorCode ierr;
1819   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1820   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
1821 
1822   PetscFunctionBegin;
1823   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1824   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1825     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1826   } else {
1827     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1828     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1829   }
1830   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
1831   PetscFunctionReturn(0);
1832 }
1833 
1834 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1835 {
1836   PetscErrorCode ierr;
1837 
1838   PetscFunctionBegin;
1839   ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1840   PetscFunctionReturn(0);
1841 }
1842 
1843 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
1844 {
1845   PetscErrorCode ierr;
1846   PetscInt       bs = Y->rmap->bs,m = Y->rmap->N/bs;
1847   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
1848   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
1849 
1850   PetscFunctionBegin;
1851   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
1852   PetscFunctionReturn(0);
1853 }
1854 
1855 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1856 {
1857   PetscErrorCode ierr;
1858   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
1859   PetscBLASInt   bnz,one=1;
1860   Mat_SeqBAIJ    *x,*y;
1861   PetscInt       bs2 = Y->rmap->bs*Y->rmap->bs;
1862 
1863   PetscFunctionBegin;
1864   if (str == SAME_NONZERO_PATTERN) {
1865     PetscScalar alpha = a;
1866     x    = (Mat_SeqBAIJ*)xx->A->data;
1867     y    = (Mat_SeqBAIJ*)yy->A->data;
1868     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
1869     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1870     x    = (Mat_SeqBAIJ*)xx->B->data;
1871     y    = (Mat_SeqBAIJ*)yy->B->data;
1872     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
1873     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1874     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1875   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1876     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1877   } else {
1878     Mat      B;
1879     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1880     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1881     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1882     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1883     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1884     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1885     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1886     ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr);
1887     ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1888     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1889     ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1890     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
1891     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1892     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
1893     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1894     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1895   }
1896   PetscFunctionReturn(0);
1897 }
1898 
1899 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1900 {
1901   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1902   PetscErrorCode ierr;
1903 
1904   PetscFunctionBegin;
1905   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1906   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1907   PetscFunctionReturn(0);
1908 }
1909 
1910 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1911 {
1912   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1913   PetscErrorCode ierr;
1914 
1915   PetscFunctionBegin;
1916   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1917   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1918   PetscFunctionReturn(0);
1919 }
1920 
1921 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1922 {
1923   PetscErrorCode ierr;
1924   IS             iscol_local;
1925   PetscInt       csize;
1926 
1927   PetscFunctionBegin;
1928   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1929   if (call == MAT_REUSE_MATRIX) {
1930     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1931     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1932   } else {
1933     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1934   }
1935   ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1936   if (call == MAT_INITIAL_MATRIX) {
1937     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1938     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1939   }
1940   PetscFunctionReturn(0);
1941 }
1942 
1943 /*
1944   Not great since it makes two copies of the submatrix, first an SeqBAIJ
1945   in local and then by concatenating the local matrices the end result.
1946   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
1947   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
1948 */
1949 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
1950 {
1951   PetscErrorCode ierr;
1952   PetscMPIInt    rank,size;
1953   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
1954   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
1955   Mat            M,Mreuse;
1956   MatScalar      *vwork,*aa;
1957   MPI_Comm       comm;
1958   IS             isrow_new, iscol_new;
1959   Mat_SeqBAIJ    *aij;
1960 
1961   PetscFunctionBegin;
1962   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
1963   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1964   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1965   /* The compression and expansion should be avoided. Doesn't point
1966      out errors, might change the indices, hence buggey */
1967   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
1968   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
1969 
1970   if (call ==  MAT_REUSE_MATRIX) {
1971     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
1972     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1973     ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&Mreuse);CHKERRQ(ierr);
1974   } else {
1975     ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&Mreuse);CHKERRQ(ierr);
1976   }
1977   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
1978   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
1979   /*
1980       m - number of local rows
1981       n - number of columns (same on all processors)
1982       rstart - first row in new global matrix generated
1983   */
1984   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
1985   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1986   m    = m/bs;
1987   n    = n/bs;
1988 
1989   if (call == MAT_INITIAL_MATRIX) {
1990     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
1991     ii  = aij->i;
1992     jj  = aij->j;
1993 
1994     /*
1995         Determine the number of non-zeros in the diagonal and off-diagonal
1996         portions of the matrix in order to do correct preallocation
1997     */
1998 
1999     /* first get start and end of "diagonal" columns */
2000     if (csize == PETSC_DECIDE) {
2001       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2002       if (mglobal == n*bs) { /* square matrix */
2003         nlocal = m;
2004       } else {
2005         nlocal = n/size + ((n % size) > rank);
2006       }
2007     } else {
2008       nlocal = csize/bs;
2009     }
2010     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2011     rstart = rend - nlocal;
2012     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2013 
2014     /* next, compute all the lengths */
2015     ierr  = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr);
2016     for (i=0; i<m; i++) {
2017       jend = ii[i+1] - ii[i];
2018       olen = 0;
2019       dlen = 0;
2020       for (j=0; j<jend; j++) {
2021         if (*jj < rstart || *jj >= rend) olen++;
2022         else dlen++;
2023         jj++;
2024       }
2025       olens[i] = olen;
2026       dlens[i] = dlen;
2027     }
2028     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2029     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2030     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2031     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2032     ierr = MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2033     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
2034   } else {
2035     PetscInt ml,nl;
2036 
2037     M    = *newmat;
2038     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2039     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2040     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2041     /*
2042          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2043        rather than the slower MatSetValues().
2044     */
2045     M->was_assembled = PETSC_TRUE;
2046     M->assembled     = PETSC_FALSE;
2047   }
2048   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2049   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2050   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
2051   ii   = aij->i;
2052   jj   = aij->j;
2053   aa   = aij->a;
2054   for (i=0; i<m; i++) {
2055     row   = rstart/bs + i;
2056     nz    = ii[i+1] - ii[i];
2057     cwork = jj;     jj += nz;
2058     vwork = aa;     aa += nz*bs*bs;
2059     ierr  = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2060   }
2061 
2062   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2063   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2064   *newmat = M;
2065 
2066   /* save submatrix used in processor for next request */
2067   if (call ==  MAT_INITIAL_MATRIX) {
2068     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2069     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2070   }
2071   PetscFunctionReturn(0);
2072 }
2073 
2074 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2075 {
2076   MPI_Comm       comm,pcomm;
2077   PetscInt       clocal_size,nrows;
2078   const PetscInt *rows;
2079   PetscMPIInt    size;
2080   IS             crowp,lcolp;
2081   PetscErrorCode ierr;
2082 
2083   PetscFunctionBegin;
2084   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2085   /* make a collective version of 'rowp' */
2086   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2087   if (pcomm==comm) {
2088     crowp = rowp;
2089   } else {
2090     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2091     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2092     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2093     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2094   }
2095   ierr = ISSetPermutation(crowp);CHKERRQ(ierr);
2096   /* make a local version of 'colp' */
2097   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2098   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2099   if (size==1) {
2100     lcolp = colp;
2101   } else {
2102     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2103   }
2104   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2105   /* now we just get the submatrix */
2106   ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr);
2107   ierr = MatCreateSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2108   /* clean up */
2109   if (pcomm!=comm) {
2110     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2111   }
2112   if (size>1) {
2113     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2114   }
2115   PetscFunctionReturn(0);
2116 }
2117 
2118 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2119 {
2120   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2121   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
2122 
2123   PetscFunctionBegin;
2124   if (nghosts) *nghosts = B->nbs;
2125   if (ghosts) *ghosts = baij->garray;
2126   PetscFunctionReturn(0);
2127 }
2128 
2129 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2130 {
2131   Mat            B;
2132   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2133   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2134   Mat_SeqAIJ     *b;
2135   PetscErrorCode ierr;
2136   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2137   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2138   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2139 
2140   PetscFunctionBegin;
2141   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2142   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2143 
2144   /* ----------------------------------------------------------------
2145      Tell every processor the number of nonzeros per row
2146   */
2147   ierr = PetscMalloc1(A->rmap->N/bs,&lens);CHKERRQ(ierr);
2148   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2149     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2150   }
2151   ierr      = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr);
2152   displs    = recvcounts + size;
2153   for (i=0; i<size; i++) {
2154     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2155     displs[i]     = A->rmap->range[i]/bs;
2156   }
2157 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2158   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2159 #else
2160   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2161   ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2162 #endif
2163   /* ---------------------------------------------------------------
2164      Create the sequential matrix of the same type as the local block diagonal
2165   */
2166   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2167   ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2168   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2169   ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2170   b    = (Mat_SeqAIJ*)B->data;
2171 
2172   /*--------------------------------------------------------------------
2173     Copy my part of matrix column indices over
2174   */
2175   sendcount  = ad->nz + bd->nz;
2176   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2177   a_jsendbuf = ad->j;
2178   b_jsendbuf = bd->j;
2179   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2180   cnt        = 0;
2181   for (i=0; i<n; i++) {
2182 
2183     /* put in lower diagonal portion */
2184     m = bd->i[i+1] - bd->i[i];
2185     while (m > 0) {
2186       /* is it above diagonal (in bd (compressed) numbering) */
2187       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2188       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2189       m--;
2190     }
2191 
2192     /* put in diagonal portion */
2193     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2194       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2195     }
2196 
2197     /* put in upper diagonal portion */
2198     while (m-- > 0) {
2199       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2200     }
2201   }
2202   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2203 
2204   /*--------------------------------------------------------------------
2205     Gather all column indices to all processors
2206   */
2207   for (i=0; i<size; i++) {
2208     recvcounts[i] = 0;
2209     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2210       recvcounts[i] += lens[j];
2211     }
2212   }
2213   displs[0] = 0;
2214   for (i=1; i<size; i++) {
2215     displs[i] = displs[i-1] + recvcounts[i-1];
2216   }
2217 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2218   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2219 #else
2220   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2221 #endif
2222   /*--------------------------------------------------------------------
2223     Assemble the matrix into useable form (note numerical values not yet set)
2224   */
2225   /* set the b->ilen (length of each row) values */
2226   ierr = PetscArraycpy(b->ilen,lens,A->rmap->N/bs);CHKERRQ(ierr);
2227   /* set the b->i indices */
2228   b->i[0] = 0;
2229   for (i=1; i<=A->rmap->N/bs; i++) {
2230     b->i[i] = b->i[i-1] + lens[i-1];
2231   }
2232   ierr = PetscFree(lens);CHKERRQ(ierr);
2233   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2234   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2235   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2236 
2237   if (A->symmetric) {
2238     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2239   } else if (A->hermitian) {
2240     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2241   } else if (A->structurally_symmetric) {
2242     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2243   }
2244   *newmat = B;
2245   PetscFunctionReturn(0);
2246 }
2247 
2248 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2249 {
2250   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2251   PetscErrorCode ierr;
2252   Vec            bb1 = 0;
2253 
2254   PetscFunctionBegin;
2255   if (flag == SOR_APPLY_UPPER) {
2256     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2257     PetscFunctionReturn(0);
2258   }
2259 
2260   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2261     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2262   }
2263 
2264   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2265     if (flag & SOR_ZERO_INITIAL_GUESS) {
2266       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2267       its--;
2268     }
2269 
2270     while (its--) {
2271       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2272       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2273 
2274       /* update rhs: bb1 = bb - B*x */
2275       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2276       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2277 
2278       /* local sweep */
2279       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2280     }
2281   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2282     if (flag & SOR_ZERO_INITIAL_GUESS) {
2283       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2284       its--;
2285     }
2286     while (its--) {
2287       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2288       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2289 
2290       /* update rhs: bb1 = bb - B*x */
2291       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2292       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2293 
2294       /* local sweep */
2295       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2296     }
2297   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2298     if (flag & SOR_ZERO_INITIAL_GUESS) {
2299       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2300       its--;
2301     }
2302     while (its--) {
2303       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2304       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2305 
2306       /* update rhs: bb1 = bb - B*x */
2307       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2308       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2309 
2310       /* local sweep */
2311       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2312     }
2313   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2314 
2315   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2316   PetscFunctionReturn(0);
2317 }
2318 
2319 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms)
2320 {
2321   PetscErrorCode ierr;
2322   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)A->data;
2323   PetscInt       N,i,*garray = aij->garray;
2324   PetscInt       ib,jb,bs = A->rmap->bs;
2325   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) aij->A->data;
2326   MatScalar      *a_val = a_aij->a;
2327   Mat_SeqBAIJ    *b_aij = (Mat_SeqBAIJ*) aij->B->data;
2328   MatScalar      *b_val = b_aij->a;
2329   PetscReal      *work;
2330 
2331   PetscFunctionBegin;
2332   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
2333   ierr = PetscCalloc1(N,&work);CHKERRQ(ierr);
2334   if (type == NORM_2) {
2335     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2336       for (jb=0; jb<bs; jb++) {
2337         for (ib=0; ib<bs; ib++) {
2338           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2339           a_val++;
2340         }
2341       }
2342     }
2343     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2344       for (jb=0; jb<bs; jb++) {
2345         for (ib=0; ib<bs; ib++) {
2346           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2347           b_val++;
2348         }
2349       }
2350     }
2351   } else if (type == NORM_1) {
2352     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2353       for (jb=0; jb<bs; jb++) {
2354         for (ib=0; ib<bs; ib++) {
2355           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2356           a_val++;
2357         }
2358       }
2359     }
2360     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2361       for (jb=0; jb<bs; jb++) {
2362        for (ib=0; ib<bs; ib++) {
2363           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2364           b_val++;
2365         }
2366       }
2367     }
2368   } else if (type == NORM_INFINITY) {
2369     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2370       for (jb=0; jb<bs; jb++) {
2371         for (ib=0; ib<bs; ib++) {
2372           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2373           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2374           a_val++;
2375         }
2376       }
2377     }
2378     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2379       for (jb=0; jb<bs; jb++) {
2380         for (ib=0; ib<bs; ib++) {
2381           int col = garray[b_aij->j[i]] * bs + jb;
2382           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2383           b_val++;
2384         }
2385       }
2386     }
2387   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
2388   if (type == NORM_INFINITY) {
2389     ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2390   } else {
2391     ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2392   }
2393   ierr = PetscFree(work);CHKERRQ(ierr);
2394   if (type == NORM_2) {
2395     for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]);
2396   }
2397   PetscFunctionReturn(0);
2398 }
2399 
2400 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2401 {
2402   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2403   PetscErrorCode ierr;
2404 
2405   PetscFunctionBegin;
2406   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2407   A->factorerrortype             = a->A->factorerrortype;
2408   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2409   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2410   PetscFunctionReturn(0);
2411 }
2412 
2413 PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a)
2414 {
2415   PetscErrorCode ierr;
2416   Mat_MPIBAIJ    *maij = (Mat_MPIBAIJ*)Y->data;
2417   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)maij->A->data;
2418 
2419   PetscFunctionBegin;
2420   if (!Y->preallocated) {
2421     ierr = MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
2422   } else if (!aij->nz) {
2423     PetscInt nonew = aij->nonew;
2424     ierr = MatSeqBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2425     aij->nonew = nonew;
2426   }
2427   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2428   PetscFunctionReturn(0);
2429 }
2430 
2431 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2432 {
2433   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2434   PetscErrorCode ierr;
2435 
2436   PetscFunctionBegin;
2437   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2438   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2439   if (d) {
2440     PetscInt rstart;
2441     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2442     *d += rstart/A->rmap->bs;
2443 
2444   }
2445   PetscFunctionReturn(0);
2446 }
2447 
2448 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2449 {
2450   PetscFunctionBegin;
2451   *a = ((Mat_MPIBAIJ*)A->data)->A;
2452   PetscFunctionReturn(0);
2453 }
2454 
2455 /* -------------------------------------------------------------------*/
2456 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2457                                        MatGetRow_MPIBAIJ,
2458                                        MatRestoreRow_MPIBAIJ,
2459                                        MatMult_MPIBAIJ,
2460                                 /* 4*/ MatMultAdd_MPIBAIJ,
2461                                        MatMultTranspose_MPIBAIJ,
2462                                        MatMultTransposeAdd_MPIBAIJ,
2463                                        0,
2464                                        0,
2465                                        0,
2466                                 /*10*/ 0,
2467                                        0,
2468                                        0,
2469                                        MatSOR_MPIBAIJ,
2470                                        MatTranspose_MPIBAIJ,
2471                                 /*15*/ MatGetInfo_MPIBAIJ,
2472                                        MatEqual_MPIBAIJ,
2473                                        MatGetDiagonal_MPIBAIJ,
2474                                        MatDiagonalScale_MPIBAIJ,
2475                                        MatNorm_MPIBAIJ,
2476                                 /*20*/ MatAssemblyBegin_MPIBAIJ,
2477                                        MatAssemblyEnd_MPIBAIJ,
2478                                        MatSetOption_MPIBAIJ,
2479                                        MatZeroEntries_MPIBAIJ,
2480                                 /*24*/ MatZeroRows_MPIBAIJ,
2481                                        0,
2482                                        0,
2483                                        0,
2484                                        0,
2485                                 /*29*/ MatSetUp_MPIBAIJ,
2486                                        0,
2487                                        0,
2488                                        MatGetDiagonalBlock_MPIBAIJ,
2489                                        0,
2490                                 /*34*/ MatDuplicate_MPIBAIJ,
2491                                        0,
2492                                        0,
2493                                        0,
2494                                        0,
2495                                 /*39*/ MatAXPY_MPIBAIJ,
2496                                        MatCreateSubMatrices_MPIBAIJ,
2497                                        MatIncreaseOverlap_MPIBAIJ,
2498                                        MatGetValues_MPIBAIJ,
2499                                        MatCopy_MPIBAIJ,
2500                                 /*44*/ 0,
2501                                        MatScale_MPIBAIJ,
2502                                        MatShift_MPIBAIJ,
2503                                        0,
2504                                        MatZeroRowsColumns_MPIBAIJ,
2505                                 /*49*/ 0,
2506                                        0,
2507                                        0,
2508                                        0,
2509                                        0,
2510                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2511                                        0,
2512                                        MatSetUnfactored_MPIBAIJ,
2513                                        MatPermute_MPIBAIJ,
2514                                        MatSetValuesBlocked_MPIBAIJ,
2515                                 /*59*/ MatCreateSubMatrix_MPIBAIJ,
2516                                        MatDestroy_MPIBAIJ,
2517                                        MatView_MPIBAIJ,
2518                                        0,
2519                                        0,
2520                                 /*64*/ 0,
2521                                        0,
2522                                        0,
2523                                        0,
2524                                        0,
2525                                 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2526                                        0,
2527                                        0,
2528                                        0,
2529                                        0,
2530                                 /*74*/ 0,
2531                                        MatFDColoringApply_BAIJ,
2532                                        0,
2533                                        0,
2534                                        0,
2535                                 /*79*/ 0,
2536                                        0,
2537                                        0,
2538                                        0,
2539                                        MatLoad_MPIBAIJ,
2540                                 /*84*/ 0,
2541                                        0,
2542                                        0,
2543                                        0,
2544                                        0,
2545                                 /*89*/ 0,
2546                                        0,
2547                                        0,
2548                                        0,
2549                                        0,
2550                                 /*94*/ 0,
2551                                        0,
2552                                        0,
2553                                        0,
2554                                        0,
2555                                 /*99*/ 0,
2556                                        0,
2557                                        0,
2558                                        0,
2559                                        0,
2560                                 /*104*/0,
2561                                        MatRealPart_MPIBAIJ,
2562                                        MatImaginaryPart_MPIBAIJ,
2563                                        0,
2564                                        0,
2565                                 /*109*/0,
2566                                        0,
2567                                        0,
2568                                        0,
2569                                        MatMissingDiagonal_MPIBAIJ,
2570                                 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2571                                        0,
2572                                        MatGetGhosts_MPIBAIJ,
2573                                        0,
2574                                        0,
2575                                 /*119*/0,
2576                                        0,
2577                                        0,
2578                                        0,
2579                                        MatGetMultiProcBlock_MPIBAIJ,
2580                                 /*124*/0,
2581                                        MatGetColumnNorms_MPIBAIJ,
2582                                        MatInvertBlockDiagonal_MPIBAIJ,
2583                                        0,
2584                                        0,
2585                                /*129*/ 0,
2586                                        0,
2587                                        0,
2588                                        0,
2589                                        0,
2590                                /*134*/ 0,
2591                                        0,
2592                                        0,
2593                                        0,
2594                                        0,
2595                                /*139*/ MatSetBlockSizes_Default,
2596                                        0,
2597                                        0,
2598                                        MatFDColoringSetUp_MPIXAIJ,
2599                                        0,
2600                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ
2601 };
2602 
2603 
2604 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
2605 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
2606 
2607 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2608 {
2609   PetscInt       m,rstart,cstart,cend;
2610   PetscInt       i,j,dlen,olen,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2611   const PetscInt *JJ    =0;
2612   PetscScalar    *values=0;
2613   PetscBool      roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2614   PetscErrorCode ierr;
2615   PetscBool      nooffprocentries;
2616 
2617   PetscFunctionBegin;
2618   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2619   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2620   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2621   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2622   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2623   m      = B->rmap->n/bs;
2624   rstart = B->rmap->rstart/bs;
2625   cstart = B->cmap->rstart/bs;
2626   cend   = B->cmap->rend/bs;
2627 
2628   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2629   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2630   for (i=0; i<m; i++) {
2631     nz = ii[i+1] - ii[i];
2632     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2633     nz_max = PetscMax(nz_max,nz);
2634     dlen   = 0;
2635     olen   = 0;
2636     JJ     = jj + ii[i];
2637     for (j=0; j<nz; j++) {
2638       if (*JJ < cstart || *JJ >= cend) olen++;
2639       else dlen++;
2640       JJ++;
2641     }
2642     d_nnz[i] = dlen;
2643     o_nnz[i] = olen;
2644   }
2645   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2646   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2647 
2648   values = (PetscScalar*)V;
2649   if (!values) {
2650     ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2651   }
2652   for (i=0; i<m; i++) {
2653     PetscInt          row    = i + rstart;
2654     PetscInt          ncols  = ii[i+1] - ii[i];
2655     const PetscInt    *icols = jj + ii[i];
2656     if (bs == 1 || !roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2657       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2658       ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2659     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2660       PetscInt j;
2661       for (j=0; j<ncols; j++) {
2662         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2663         ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2664       }
2665     }
2666   }
2667 
2668   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2669   nooffprocentries    = B->nooffprocentries;
2670   B->nooffprocentries = PETSC_TRUE;
2671   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2672   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2673   B->nooffprocentries = nooffprocentries;
2674 
2675   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2676   PetscFunctionReturn(0);
2677 }
2678 
2679 /*@C
2680    MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values
2681 
2682    Collective
2683 
2684    Input Parameters:
2685 +  B - the matrix
2686 .  bs - the block size
2687 .  i - the indices into j for the start of each local row (starts with zero)
2688 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2689 -  v - optional values in the matrix
2690 
2691    Level: advanced
2692 
2693    Notes:
2694     The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
2695    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
2696    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2697    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2698    block column and the second index is over columns within a block.
2699 
2700    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
2701 
2702 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ
2703 @*/
2704 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2705 {
2706   PetscErrorCode ierr;
2707 
2708   PetscFunctionBegin;
2709   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2710   PetscValidType(B,1);
2711   PetscValidLogicalCollectiveInt(B,bs,2);
2712   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2713   PetscFunctionReturn(0);
2714 }
2715 
2716 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2717 {
2718   Mat_MPIBAIJ    *b;
2719   PetscErrorCode ierr;
2720   PetscInt       i;
2721   PetscMPIInt    size;
2722 
2723   PetscFunctionBegin;
2724   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2725   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2726   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2727   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2728 
2729   if (d_nnz) {
2730     for (i=0; i<B->rmap->n/bs; i++) {
2731       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2732     }
2733   }
2734   if (o_nnz) {
2735     for (i=0; i<B->rmap->n/bs; i++) {
2736       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2737     }
2738   }
2739 
2740   b      = (Mat_MPIBAIJ*)B->data;
2741   b->bs2 = bs*bs;
2742   b->mbs = B->rmap->n/bs;
2743   b->nbs = B->cmap->n/bs;
2744   b->Mbs = B->rmap->N/bs;
2745   b->Nbs = B->cmap->N/bs;
2746 
2747   for (i=0; i<=b->size; i++) {
2748     b->rangebs[i] = B->rmap->range[i]/bs;
2749   }
2750   b->rstartbs = B->rmap->rstart/bs;
2751   b->rendbs   = B->rmap->rend/bs;
2752   b->cstartbs = B->cmap->rstart/bs;
2753   b->cendbs   = B->cmap->rend/bs;
2754 
2755 #if defined(PETSC_USE_CTABLE)
2756   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2757 #else
2758   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2759 #endif
2760   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2761   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2762   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2763 
2764   /* Because the B will have been resized we simply destroy it and create a new one each time */
2765   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2766   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2767   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2768   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2769   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2770   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2771 
2772   if (!B->preallocated) {
2773     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2774     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2775     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2776     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2777     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2778   }
2779 
2780   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2781   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2782   B->preallocated  = PETSC_TRUE;
2783   B->was_assembled = PETSC_FALSE;
2784   B->assembled     = PETSC_FALSE;
2785   PetscFunctionReturn(0);
2786 }
2787 
2788 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2789 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2790 
2791 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
2792 {
2793   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2794   PetscErrorCode ierr;
2795   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2796   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2797   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2798 
2799   PetscFunctionBegin;
2800   ierr  = PetscMalloc1(M+1,&ii);CHKERRQ(ierr);
2801   ii[0] = 0;
2802   for (i=0; i<M; i++) {
2803     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2804     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
2805     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2806     /* remove one from count of matrix has diagonal */
2807     for (j=id[i]; j<id[i+1]; j++) {
2808       if (jd[j] == i) {ii[i+1]--;break;}
2809     }
2810   }
2811   ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr);
2812   cnt  = 0;
2813   for (i=0; i<M; i++) {
2814     for (j=io[i]; j<io[i+1]; j++) {
2815       if (garray[jo[j]] > rstart) break;
2816       jj[cnt++] = garray[jo[j]];
2817     }
2818     for (k=id[i]; k<id[i+1]; k++) {
2819       if (jd[k] != i) {
2820         jj[cnt++] = rstart + jd[k];
2821       }
2822     }
2823     for (; j<io[i+1]; j++) {
2824       jj[cnt++] = garray[jo[j]];
2825     }
2826   }
2827   ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr);
2828   PetscFunctionReturn(0);
2829 }
2830 
2831 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2832 
2833 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
2834 
2835 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
2836 {
2837   PetscErrorCode ierr;
2838   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2839   Mat            B;
2840   Mat_MPIAIJ     *b;
2841 
2842   PetscFunctionBegin;
2843   if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
2844 
2845   if (reuse == MAT_REUSE_MATRIX) {
2846     B = *newmat;
2847   } else {
2848     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2849     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2850     ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2851     ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2852     ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
2853     ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
2854   }
2855   b = (Mat_MPIAIJ*) B->data;
2856 
2857   if (reuse == MAT_REUSE_MATRIX) {
2858     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A);CHKERRQ(ierr);
2859     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B);CHKERRQ(ierr);
2860   } else {
2861     ierr = MatDestroy(&b->A);CHKERRQ(ierr);
2862     ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2863     ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
2864     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
2865     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
2866     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2867     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2868   }
2869   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2870   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2871 
2872   if (reuse == MAT_INPLACE_MATRIX) {
2873     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
2874   } else {
2875    *newmat = B;
2876   }
2877   PetscFunctionReturn(0);
2878 }
2879 
2880 /*MC
2881    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2882 
2883    Options Database Keys:
2884 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2885 . -mat_block_size <bs> - set the blocksize used to store the matrix
2886 - -mat_use_hash_table <fact>
2887 
2888    Level: beginner
2889 
2890    Notes:
2891     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
2892     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
2893 
2894 .seealso: MatCreateBAIJ
2895 M*/
2896 
2897 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
2898 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
2899 
2900 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2901 {
2902   Mat_MPIBAIJ    *b;
2903   PetscErrorCode ierr;
2904   PetscBool      flg = PETSC_FALSE;
2905 
2906   PetscFunctionBegin;
2907   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2908   B->data = (void*)b;
2909 
2910   ierr         = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2911   B->assembled = PETSC_FALSE;
2912 
2913   B->insertmode = NOT_SET_VALUES;
2914   ierr          = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
2915   ierr          = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
2916 
2917   /* build local table of row and column ownerships */
2918   ierr = PetscMalloc1(b->size+1,&b->rangebs);CHKERRQ(ierr);
2919 
2920   /* build cache for off array entries formed */
2921   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
2922 
2923   b->donotstash  = PETSC_FALSE;
2924   b->colmap      = NULL;
2925   b->garray      = NULL;
2926   b->roworiented = PETSC_TRUE;
2927 
2928   /* stuff used in block assembly */
2929   b->barray = 0;
2930 
2931   /* stuff used for matrix vector multiply */
2932   b->lvec  = 0;
2933   b->Mvctx = 0;
2934 
2935   /* stuff for MatGetRow() */
2936   b->rowindices   = 0;
2937   b->rowvalues    = 0;
2938   b->getrowactive = PETSC_FALSE;
2939 
2940   /* hash table stuff */
2941   b->ht           = 0;
2942   b->hd           = 0;
2943   b->ht_size      = 0;
2944   b->ht_flag      = PETSC_FALSE;
2945   b->ht_fact      = 0;
2946   b->ht_total_ct  = 0;
2947   b->ht_insert_ct = 0;
2948 
2949   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2950   b->ijonly = PETSC_FALSE;
2951 
2952 
2953   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
2954   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
2955   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
2956 #if defined(PETSC_HAVE_HYPRE)
2957   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
2958 #endif
2959   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2960   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2961   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2962   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
2963   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
2964   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2965   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpibaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
2966   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
2967   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
2968 
2969   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
2970   ierr = PetscOptionsName("-mat_use_hash_table","Use hash table to save time in constructing matrix","MatSetOption",&flg);CHKERRQ(ierr);
2971   if (flg) {
2972     PetscReal fact = 1.39;
2973     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2974     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
2975     if (fact <= 1.0) fact = 1.39;
2976     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2977     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2978   }
2979   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2980   PetscFunctionReturn(0);
2981 }
2982 
2983 /*MC
2984    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2985 
2986    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2987    and MATMPIBAIJ otherwise.
2988 
2989    Options Database Keys:
2990 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2991 
2992   Level: beginner
2993 
2994 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2995 M*/
2996 
2997 /*@C
2998    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
2999    (block compressed row).  For good matrix assembly performance
3000    the user should preallocate the matrix storage by setting the parameters
3001    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3002    performance can be increased by more than a factor of 50.
3003 
3004    Collective on Mat
3005 
3006    Input Parameters:
3007 +  B - the matrix
3008 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3009           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3010 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3011            submatrix  (same for all local rows)
3012 .  d_nnz - array containing the number of block nonzeros in the various block rows
3013            of the in diagonal portion of the local (possibly different for each block
3014            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3015            set it even if it is zero.
3016 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3017            submatrix (same for all local rows).
3018 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3019            off-diagonal portion of the local submatrix (possibly different for
3020            each block row) or NULL.
3021 
3022    If the *_nnz parameter is given then the *_nz parameter is ignored
3023 
3024    Options Database Keys:
3025 +   -mat_block_size - size of the blocks to use
3026 -   -mat_use_hash_table <fact>
3027 
3028    Notes:
3029    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3030    than it must be used on all processors that share the object for that argument.
3031 
3032    Storage Information:
3033    For a square global matrix we define each processor's diagonal portion
3034    to be its local rows and the corresponding columns (a square submatrix);
3035    each processor's off-diagonal portion encompasses the remainder of the
3036    local matrix (a rectangular submatrix).
3037 
3038    The user can specify preallocated storage for the diagonal part of
3039    the local submatrix with either d_nz or d_nnz (not both).  Set
3040    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3041    memory allocation.  Likewise, specify preallocated storage for the
3042    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3043 
3044    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3045    the figure below we depict these three local rows and all columns (0-11).
3046 
3047 .vb
3048            0 1 2 3 4 5 6 7 8 9 10 11
3049           --------------------------
3050    row 3  |o o o d d d o o o o  o  o
3051    row 4  |o o o d d d o o o o  o  o
3052    row 5  |o o o d d d o o o o  o  o
3053           --------------------------
3054 .ve
3055 
3056    Thus, any entries in the d locations are stored in the d (diagonal)
3057    submatrix, and any entries in the o locations are stored in the
3058    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3059    stored simply in the MATSEQBAIJ format for compressed row storage.
3060 
3061    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3062    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3063    In general, for PDE problems in which most nonzeros are near the diagonal,
3064    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3065    or you will get TERRIBLE performance; see the users' manual chapter on
3066    matrices.
3067 
3068    You can call MatGetInfo() to get information on how effective the preallocation was;
3069    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3070    You can also run with the option -info and look for messages with the string
3071    malloc in them to see if additional memory allocation was needed.
3072 
3073    Level: intermediate
3074 
3075 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3076 @*/
3077 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3078 {
3079   PetscErrorCode ierr;
3080 
3081   PetscFunctionBegin;
3082   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3083   PetscValidType(B,1);
3084   PetscValidLogicalCollectiveInt(B,bs,2);
3085   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3086   PetscFunctionReturn(0);
3087 }
3088 
3089 /*@C
3090    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3091    (block compressed row).  For good matrix assembly performance
3092    the user should preallocate the matrix storage by setting the parameters
3093    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3094    performance can be increased by more than a factor of 50.
3095 
3096    Collective
3097 
3098    Input Parameters:
3099 +  comm - MPI communicator
3100 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3101           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3102 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3103            This value should be the same as the local size used in creating the
3104            y vector for the matrix-vector product y = Ax.
3105 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3106            This value should be the same as the local size used in creating the
3107            x vector for the matrix-vector product y = Ax.
3108 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3109 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3110 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3111            submatrix  (same for all local rows)
3112 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3113            of the in diagonal portion of the local (possibly different for each block
3114            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3115            and set it even if it is zero.
3116 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3117            submatrix (same for all local rows).
3118 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3119            off-diagonal portion of the local submatrix (possibly different for
3120            each block row) or NULL.
3121 
3122    Output Parameter:
3123 .  A - the matrix
3124 
3125    Options Database Keys:
3126 +   -mat_block_size - size of the blocks to use
3127 -   -mat_use_hash_table <fact>
3128 
3129    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3130    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3131    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3132 
3133    Notes:
3134    If the *_nnz parameter is given then the *_nz parameter is ignored
3135 
3136    A nonzero block is any block that as 1 or more nonzeros in it
3137 
3138    The user MUST specify either the local or global matrix dimensions
3139    (possibly both).
3140 
3141    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3142    than it must be used on all processors that share the object for that argument.
3143 
3144    Storage Information:
3145    For a square global matrix we define each processor's diagonal portion
3146    to be its local rows and the corresponding columns (a square submatrix);
3147    each processor's off-diagonal portion encompasses the remainder of the
3148    local matrix (a rectangular submatrix).
3149 
3150    The user can specify preallocated storage for the diagonal part of
3151    the local submatrix with either d_nz or d_nnz (not both).  Set
3152    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3153    memory allocation.  Likewise, specify preallocated storage for the
3154    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3155 
3156    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3157    the figure below we depict these three local rows and all columns (0-11).
3158 
3159 .vb
3160            0 1 2 3 4 5 6 7 8 9 10 11
3161           --------------------------
3162    row 3  |o o o d d d o o o o  o  o
3163    row 4  |o o o d d d o o o o  o  o
3164    row 5  |o o o d d d o o o o  o  o
3165           --------------------------
3166 .ve
3167 
3168    Thus, any entries in the d locations are stored in the d (diagonal)
3169    submatrix, and any entries in the o locations are stored in the
3170    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3171    stored simply in the MATSEQBAIJ format for compressed row storage.
3172 
3173    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3174    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3175    In general, for PDE problems in which most nonzeros are near the diagonal,
3176    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3177    or you will get TERRIBLE performance; see the users' manual chapter on
3178    matrices.
3179 
3180    Level: intermediate
3181 
3182 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3183 @*/
3184 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3185 {
3186   PetscErrorCode ierr;
3187   PetscMPIInt    size;
3188 
3189   PetscFunctionBegin;
3190   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3191   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3192   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3193   if (size > 1) {
3194     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3195     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3196   } else {
3197     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3198     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3199   }
3200   PetscFunctionReturn(0);
3201 }
3202 
3203 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3204 {
3205   Mat            mat;
3206   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3207   PetscErrorCode ierr;
3208   PetscInt       len=0;
3209 
3210   PetscFunctionBegin;
3211   *newmat = 0;
3212   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3213   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3214   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3215 
3216   mat->factortype   = matin->factortype;
3217   mat->preallocated = PETSC_TRUE;
3218   mat->assembled    = PETSC_TRUE;
3219   mat->insertmode   = NOT_SET_VALUES;
3220 
3221   a             = (Mat_MPIBAIJ*)mat->data;
3222   mat->rmap->bs = matin->rmap->bs;
3223   a->bs2        = oldmat->bs2;
3224   a->mbs        = oldmat->mbs;
3225   a->nbs        = oldmat->nbs;
3226   a->Mbs        = oldmat->Mbs;
3227   a->Nbs        = oldmat->Nbs;
3228 
3229   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3230   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3231 
3232   a->size         = oldmat->size;
3233   a->rank         = oldmat->rank;
3234   a->donotstash   = oldmat->donotstash;
3235   a->roworiented  = oldmat->roworiented;
3236   a->rowindices   = 0;
3237   a->rowvalues    = 0;
3238   a->getrowactive = PETSC_FALSE;
3239   a->barray       = 0;
3240   a->rstartbs     = oldmat->rstartbs;
3241   a->rendbs       = oldmat->rendbs;
3242   a->cstartbs     = oldmat->cstartbs;
3243   a->cendbs       = oldmat->cendbs;
3244 
3245   /* hash table stuff */
3246   a->ht           = 0;
3247   a->hd           = 0;
3248   a->ht_size      = 0;
3249   a->ht_flag      = oldmat->ht_flag;
3250   a->ht_fact      = oldmat->ht_fact;
3251   a->ht_total_ct  = 0;
3252   a->ht_insert_ct = 0;
3253 
3254   ierr = PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+1);CHKERRQ(ierr);
3255   if (oldmat->colmap) {
3256 #if defined(PETSC_USE_CTABLE)
3257     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3258 #else
3259     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
3260     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3261     ierr = PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);CHKERRQ(ierr);
3262 #endif
3263   } else a->colmap = 0;
3264 
3265   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3266     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
3267     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3268     ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr);
3269   } else a->garray = 0;
3270 
3271   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3272   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3273   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3274   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3275   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3276 
3277   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3278   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3279   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3280   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3281   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3282   *newmat = mat;
3283   PetscFunctionReturn(0);
3284 }
3285 
3286 /* Used for both MPIBAIJ and MPISBAIJ matrices */
3287 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
3288 {
3289   PetscInt       header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k;
3290   PetscInt       *rowidxs,*colidxs,rs,cs,ce;
3291   PetscScalar    *matvals;
3292   PetscErrorCode ierr;
3293 
3294   PetscFunctionBegin;
3295   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3296 
3297   /* read in matrix header */
3298   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3299   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3300   M  = header[1]; N = header[2]; nz = header[3];
3301   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
3302   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
3303   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIBAIJ");
3304 
3305   /* set block sizes from the viewer's .info file */
3306   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3307   /* set local sizes if not set already */
3308   if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n;
3309   if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n;
3310   /* set global sizes if not set already */
3311   if (mat->rmap->N < 0) mat->rmap->N = M;
3312   if (mat->cmap->N < 0) mat->cmap->N = N;
3313   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3314   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3315 
3316   /* check if the matrix sizes are correct */
3317   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3318   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
3319   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
3320   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
3321   ierr = PetscLayoutGetRange(mat->rmap,&rs,NULL);
3322   ierr = PetscLayoutGetRange(mat->cmap,&cs,&ce);
3323   mbs = m/bs; nbs = n/bs;
3324 
3325   /* read in row lengths and build row indices */
3326   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3327   ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr);
3328   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3329   ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr);
3330   if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
3331 
3332   /* read in column indices and matrix values */
3333   ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr);
3334   ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
3335   ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
3336 
3337   { /* preallocate matrix storage */
3338     PetscBT    bt; /* helper bit set to count diagonal nonzeros */
3339     PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */
3340     PetscBool  sbaij,done;
3341     PetscInt   *d_nnz,*o_nnz;
3342 
3343     ierr = PetscBTCreate(nbs,&bt);CHKERRQ(ierr);
3344     ierr = PetscHSetICreate(&ht);CHKERRQ(ierr);
3345     ierr = PetscCalloc2(mbs,&d_nnz,mbs,&o_nnz);CHKERRQ(ierr);
3346     ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPISBAIJ,&sbaij);CHKERRQ(ierr);
3347     for (i=0; i<mbs; i++) {
3348       ierr = PetscBTMemzero(nbs,bt);CHKERRQ(ierr);
3349       ierr = PetscHSetIClear(ht);CHKERRQ(ierr);
3350       for (k=0; k<bs; k++) {
3351         PetscInt row = bs*i + k;
3352         for (j=rowidxs[row]; j<rowidxs[row+1]; j++) {
3353           PetscInt col = colidxs[j];
3354           if (!sbaij || col >= row) {
3355             if (col >= cs && col < ce) {
3356               if (!PetscBTLookupSet(bt,(col-cs)/bs)) d_nnz[i]++;
3357             } else {
3358               ierr = PetscHSetIQueryAdd(ht,col/bs,&done);CHKERRQ(ierr);
3359               if (done) o_nnz[i]++;
3360             }
3361           }
3362         }
3363       }
3364     }
3365     ierr = PetscBTDestroy(&bt);CHKERRQ(ierr);
3366     ierr = PetscHSetIDestroy(&ht);CHKERRQ(ierr);
3367     ierr = MatMPIBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3368     ierr = MatMPISBAIJSetPreallocation(mat,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3369     ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3370   }
3371 
3372   /* store matrix values */
3373   for (i=0; i<m; i++) {
3374     PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i+1];
3375     ierr = (*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);CHKERRQ(ierr);
3376   }
3377 
3378   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3379   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3380   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3381   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3382   PetscFunctionReturn(0);
3383 }
3384 
3385 PetscErrorCode MatLoad_MPIBAIJ(Mat mat,PetscViewer viewer)
3386 {
3387   PetscErrorCode ierr;
3388   PetscBool      isbinary;
3389 
3390   PetscFunctionBegin;
3391   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3392   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
3393   ierr = MatLoad_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
3394   PetscFunctionReturn(0);
3395 }
3396 
3397 /*@
3398    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3399 
3400    Input Parameters:
3401 +  mat  - the matrix
3402 -  fact - factor
3403 
3404    Not Collective, each process can use a different factor
3405 
3406    Level: advanced
3407 
3408   Notes:
3409    This can also be set by the command line option: -mat_use_hash_table <fact>
3410 
3411 .seealso: MatSetOption()
3412 @*/
3413 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3414 {
3415   PetscErrorCode ierr;
3416 
3417   PetscFunctionBegin;
3418   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3419   PetscFunctionReturn(0);
3420 }
3421 
3422 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3423 {
3424   Mat_MPIBAIJ *baij;
3425 
3426   PetscFunctionBegin;
3427   baij          = (Mat_MPIBAIJ*)mat->data;
3428   baij->ht_fact = fact;
3429   PetscFunctionReturn(0);
3430 }
3431 
3432 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3433 {
3434   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3435   PetscBool      flg;
3436   PetscErrorCode ierr;
3437 
3438   PetscFunctionBegin;
3439   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIBAIJ,&flg);CHKERRQ(ierr);
3440   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIBAIJ matrix as input");
3441   if (Ad)     *Ad     = a->A;
3442   if (Ao)     *Ao     = a->B;
3443   if (colmap) *colmap = a->garray;
3444   PetscFunctionReturn(0);
3445 }
3446 
3447 /*
3448     Special version for direct calls from Fortran (to eliminate two function call overheads
3449 */
3450 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3451 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3452 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3453 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3454 #endif
3455 
3456 /*@C
3457   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3458 
3459   Collective on Mat
3460 
3461   Input Parameters:
3462 + mat - the matrix
3463 . min - number of input rows
3464 . im - input rows
3465 . nin - number of input columns
3466 . in - input columns
3467 . v - numerical values input
3468 - addvin - INSERT_VALUES or ADD_VALUES
3469 
3470   Notes:
3471     This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3472 
3473   Level: advanced
3474 
3475 .seealso:   MatSetValuesBlocked()
3476 @*/
3477 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3478 {
3479   /* convert input arguments to C version */
3480   Mat        mat  = *matin;
3481   PetscInt   m    = *min, n = *nin;
3482   InsertMode addv = *addvin;
3483 
3484   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3485   const MatScalar *value;
3486   MatScalar       *barray     = baij->barray;
3487   PetscBool       roworiented = baij->roworiented;
3488   PetscErrorCode  ierr;
3489   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3490   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3491   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3492 
3493   PetscFunctionBegin;
3494   /* tasks normally handled by MatSetValuesBlocked() */
3495   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3496 #if defined(PETSC_USE_DEBUG)
3497   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3498   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3499 #endif
3500   if (mat->assembled) {
3501     mat->was_assembled = PETSC_TRUE;
3502     mat->assembled     = PETSC_FALSE;
3503   }
3504   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3505 
3506 
3507   if (!barray) {
3508     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
3509     baij->barray = barray;
3510   }
3511 
3512   if (roworiented) stepval = (n-1)*bs;
3513   else stepval = (m-1)*bs;
3514 
3515   for (i=0; i<m; i++) {
3516     if (im[i] < 0) continue;
3517 #if defined(PETSC_USE_DEBUG)
3518     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3519 #endif
3520     if (im[i] >= rstart && im[i] < rend) {
3521       row = im[i] - rstart;
3522       for (j=0; j<n; j++) {
3523         /* If NumCol = 1 then a copy is not required */
3524         if ((roworiented) && (n == 1)) {
3525           barray = (MatScalar*)v + i*bs2;
3526         } else if ((!roworiented) && (m == 1)) {
3527           barray = (MatScalar*)v + j*bs2;
3528         } else { /* Here a copy is required */
3529           if (roworiented) {
3530             value = v + i*(stepval+bs)*bs + j*bs;
3531           } else {
3532             value = v + j*(stepval+bs)*bs + i*bs;
3533           }
3534           for (ii=0; ii<bs; ii++,value+=stepval) {
3535             for (jj=0; jj<bs; jj++) {
3536               *barray++ = *value++;
3537             }
3538           }
3539           barray -=bs2;
3540         }
3541 
3542         if (in[j] >= cstart && in[j] < cend) {
3543           col  = in[j] - cstart;
3544           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
3545         } else if (in[j] < 0) continue;
3546 #if defined(PETSC_USE_DEBUG)
3547         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3548 #endif
3549         else {
3550           if (mat->was_assembled) {
3551             if (!baij->colmap) {
3552               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3553             }
3554 
3555 #if defined(PETSC_USE_DEBUG)
3556 #if defined(PETSC_USE_CTABLE)
3557             { PetscInt data;
3558               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3559               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3560             }
3561 #else
3562             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3563 #endif
3564 #endif
3565 #if defined(PETSC_USE_CTABLE)
3566             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3567             col  = (col - 1)/bs;
3568 #else
3569             col = (baij->colmap[in[j]] - 1)/bs;
3570 #endif
3571             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3572               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3573               col  =  in[j];
3574             }
3575           } else col = in[j];
3576           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
3577         }
3578       }
3579     } else {
3580       if (!baij->donotstash) {
3581         if (roworiented) {
3582           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3583         } else {
3584           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3585         }
3586       }
3587     }
3588   }
3589 
3590   /* task normally handled by MatSetValuesBlocked() */
3591   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3592   PetscFunctionReturn(0);
3593 }
3594 
3595 /*@
3596      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard block
3597          CSR format the local rows.
3598 
3599    Collective
3600 
3601    Input Parameters:
3602 +  comm - MPI communicator
3603 .  bs - the block size, only a block size of 1 is supported
3604 .  m - number of local rows (Cannot be PETSC_DECIDE)
3605 .  n - This value should be the same as the local size used in creating the
3606        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3607        calculated if N is given) For square matrices n is almost always m.
3608 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3609 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3610 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3611 .   j - column indices
3612 -   a - matrix values
3613 
3614    Output Parameter:
3615 .   mat - the matrix
3616 
3617    Level: intermediate
3618 
3619    Notes:
3620        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3621      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3622      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3623 
3624      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3625      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3626      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3627      with column-major ordering within blocks.
3628 
3629        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3630 
3631 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3632           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3633 @*/
3634 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3635 {
3636   PetscErrorCode ierr;
3637 
3638   PetscFunctionBegin;
3639   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3640   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3641   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3642   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3643   ierr = MatSetType(*mat,MATMPIBAIJ);CHKERRQ(ierr);
3644   ierr = MatSetBlockSize(*mat,bs);CHKERRQ(ierr);
3645   ierr = MatSetUp(*mat);CHKERRQ(ierr);
3646   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3647   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3648   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3649   PetscFunctionReturn(0);
3650 }
3651 
3652 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3653 {
3654   PetscErrorCode ierr;
3655   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3656   PetscInt       *indx;
3657   PetscScalar    *values;
3658 
3659   PetscFunctionBegin;
3660   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3661   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3662     Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inmat->data;
3663     PetscInt       *dnz,*onz,mbs,Nbs,nbs;
3664     PetscInt       *bindx,rmax=a->rmax,j;
3665     PetscMPIInt    rank,size;
3666 
3667     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3668     mbs = m/bs; Nbs = N/cbs;
3669     if (n == PETSC_DECIDE) {
3670       ierr = PetscSplitOwnershipBlock(comm,cbs,&n,&N);
3671     }
3672     nbs = n/cbs;
3673 
3674     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
3675     ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */
3676 
3677     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3678     ierr = MPI_Comm_rank(comm,&size);CHKERRQ(ierr);
3679     if (rank == size-1) {
3680       /* Check sum(nbs) = Nbs */
3681       if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs);
3682     }
3683 
3684     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */
3685     for (i=0; i<mbs; i++) {
3686       ierr = MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3687       nnz = nnz/bs;
3688       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3689       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3690       ierr = MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3691     }
3692     ierr = PetscFree(bindx);CHKERRQ(ierr);
3693 
3694     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3695     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3696     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3697     ierr = MatSetType(*outmat,MATBAIJ);CHKERRQ(ierr);
3698     ierr = MatSeqBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr);
3699     ierr = MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3700     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3701   }
3702 
3703   /* numeric phase */
3704   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3705   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3706 
3707   for (i=0; i<m; i++) {
3708     ierr = MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3709     Ii   = i + rstart;
3710     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3711     ierr = MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3712   }
3713   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3714   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3715   PetscFunctionReturn(0);
3716 }
3717