xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 8dd7b8de55100f02b4a49c7581bb3bd4e4b5a7df)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 #if defined(PETSC_HAVE_HYPRE)
8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
9 #endif
10 
11 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
12 {
13   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
14   PetscErrorCode ierr;
15   PetscInt       i,*idxb = 0;
16   PetscScalar    *va,*vb;
17   Vec            vtmp;
18 
19   PetscFunctionBegin;
20   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
21   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
22   if (idx) {
23     for (i=0; i<A->rmap->n; i++) {
24       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
25     }
26   }
27 
28   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
29   if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);}
30   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
31   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
32 
33   for (i=0; i<A->rmap->n; i++) {
34     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
35       va[i] = vb[i];
36       if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);
37     }
38   }
39 
40   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
41   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
42   ierr = PetscFree(idxb);CHKERRQ(ierr);
43   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
44   PetscFunctionReturn(0);
45 }
46 
47 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
48 {
49   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
54   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 
58 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
59 {
60   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
61   PetscErrorCode ierr;
62 
63   PetscFunctionBegin;
64   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
65   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
66   PetscFunctionReturn(0);
67 }
68 
69 /*
70      Local utility routine that creates a mapping from the global column
71    number to the local number in the off-diagonal part of the local
72    storage of the matrix.  This is done in a non scalable way since the
73    length of colmap equals the global matrix length.
74 */
75 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
76 {
77   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
78   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
79   PetscErrorCode ierr;
80   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
81 
82   PetscFunctionBegin;
83 #if defined(PETSC_USE_CTABLE)
84   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
85   for (i=0; i<nbs; i++) {
86     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
87   }
88 #else
89   ierr = PetscCalloc1(baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
90   ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
91   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
92 #endif
93   PetscFunctionReturn(0);
94 }
95 
96 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,orow,ocol)       \
97   { \
98     brow = row/bs;  \
99     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
100     rmax = aimax[brow]; nrow = ailen[brow]; \
101     bcol = col/bs; \
102     ridx = row % bs; cidx = col % bs; \
103     low  = 0; high = nrow; \
104     while (high-low > 3) { \
105       t = (low+high)/2; \
106       if (rp[t] > bcol) high = t; \
107       else              low  = t; \
108     } \
109     for (_i=low; _i<high; _i++) { \
110       if (rp[_i] > bcol) break; \
111       if (rp[_i] == bcol) { \
112         bap = ap +  bs2*_i + bs*cidx + ridx; \
113         if (addv == ADD_VALUES) *bap += value;  \
114         else                    *bap  = value;  \
115         goto a_noinsert; \
116       } \
117     } \
118     if (a->nonew == 1) goto a_noinsert; \
119     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
120     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
121     N = nrow++ - 1;  \
122     /* shift up all the later entries in this row */ \
123     ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\
124     ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr); \
125     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr);  \
126     rp[_i]                      = bcol;  \
127     ap[bs2*_i + bs*cidx + ridx] = value;  \
128 a_noinsert:; \
129     ailen[brow] = nrow; \
130   }
131 
132 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,orow,ocol)       \
133   { \
134     brow = row/bs;  \
135     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
136     rmax = bimax[brow]; nrow = bilen[brow]; \
137     bcol = col/bs; \
138     ridx = row % bs; cidx = col % bs; \
139     low  = 0; high = nrow; \
140     while (high-low > 3) { \
141       t = (low+high)/2; \
142       if (rp[t] > bcol) high = t; \
143       else              low  = t; \
144     } \
145     for (_i=low; _i<high; _i++) { \
146       if (rp[_i] > bcol) break; \
147       if (rp[_i] == bcol) { \
148         bap = ap +  bs2*_i + bs*cidx + ridx; \
149         if (addv == ADD_VALUES) *bap += value;  \
150         else                    *bap  = value;  \
151         goto b_noinsert; \
152       } \
153     } \
154     if (b->nonew == 1) goto b_noinsert; \
155     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column  (%D, %D) into matrix", orow, ocol); \
156     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
157     N = nrow++ - 1;  \
158     /* shift up all the later entries in this row */ \
159     ierr = PetscArraymove(rp+_i+1,rp+_i,N-_i+1);CHKERRQ(ierr);\
160     ierr = PetscArraymove(ap+bs2*(_i+1),ap+bs2*_i,bs2*(N-_i+1));CHKERRQ(ierr);\
161     ierr = PetscArrayzero(ap+bs2*_i,bs2);CHKERRQ(ierr);  \
162     rp[_i]                      = bcol;  \
163     ap[bs2*_i + bs*cidx + ridx] = value;  \
164 b_noinsert:; \
165     bilen[brow] = nrow; \
166   }
167 
168 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
169 {
170   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
171   MatScalar      value;
172   PetscBool      roworiented = baij->roworiented;
173   PetscErrorCode ierr;
174   PetscInt       i,j,row,col;
175   PetscInt       rstart_orig=mat->rmap->rstart;
176   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
177   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
178 
179   /* Some Variables required in the macro */
180   Mat         A     = baij->A;
181   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
182   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
183   MatScalar   *aa   =a->a;
184 
185   Mat         B     = baij->B;
186   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
187   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
188   MatScalar   *ba   =b->a;
189 
190   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
191   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
192   MatScalar *ap,*bap;
193 
194   PetscFunctionBegin;
195   for (i=0; i<m; i++) {
196     if (im[i] < 0) continue;
197 #if defined(PETSC_USE_DEBUG)
198     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
199 #endif
200     if (im[i] >= rstart_orig && im[i] < rend_orig) {
201       row = im[i] - rstart_orig;
202       for (j=0; j<n; j++) {
203         if (in[j] >= cstart_orig && in[j] < cend_orig) {
204           col = in[j] - cstart_orig;
205           if (roworiented) value = v[i*n+j];
206           else             value = v[i+j*m];
207           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv,im[i],in[j]);
208         } else if (in[j] < 0) continue;
209 #if defined(PETSC_USE_DEBUG)
210         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
211 #endif
212         else {
213           if (mat->was_assembled) {
214             if (!baij->colmap) {
215               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
216             }
217 #if defined(PETSC_USE_CTABLE)
218             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
219             col  = col - 1;
220 #else
221             col = baij->colmap[in[j]/bs] - 1;
222 #endif
223             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
224               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
225               col  =  in[j];
226               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
227               B    = baij->B;
228               b    = (Mat_SeqBAIJ*)(B)->data;
229               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
230               ba   =b->a;
231             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
232             else col += in[j]%bs;
233           } else col = in[j];
234           if (roworiented) value = v[i*n+j];
235           else             value = v[i+j*m];
236           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv,im[i],in[j]);
237           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
238         }
239       }
240     } else {
241       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
242       if (!baij->donotstash) {
243         mat->assembled = PETSC_FALSE;
244         if (roworiented) {
245           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
246         } else {
247           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
248         }
249       }
250     }
251   }
252   PetscFunctionReturn(0);
253 }
254 
255 PETSC_STATIC_INLINE PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A,PetscInt row,PetscInt col,const PetscScalar v[],InsertMode is,PetscInt orow,PetscInt ocol)
256 {
257   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
258   PetscInt          *rp,low,high,t,ii,jj,nrow,i,rmax,N;
259   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
260   PetscErrorCode    ierr;
261   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs;
262   PetscBool         roworiented=a->roworiented;
263   const PetscScalar *value     = v;
264   MatScalar         *ap,*aa = a->a,*bap;
265 
266   PetscFunctionBegin;
267   rp   = aj + ai[row];
268   ap   = aa + bs2*ai[row];
269   rmax = imax[row];
270   nrow = ailen[row];
271   value = v;
272   low = 0;
273   high = nrow;
274   while (high-low > 7) {
275     t = (low+high)/2;
276     if (rp[t] > col) high = t;
277     else             low  = t;
278   }
279   for (i=low; i<high; i++) {
280     if (rp[i] > col) break;
281     if (rp[i] == col) {
282       bap = ap +  bs2*i;
283       if (roworiented) {
284         if (is == ADD_VALUES) {
285           for (ii=0; ii<bs; ii++) {
286             for (jj=ii; jj<bs2; jj+=bs) {
287               bap[jj] += *value++;
288             }
289           }
290         } else {
291           for (ii=0; ii<bs; ii++) {
292             for (jj=ii; jj<bs2; jj+=bs) {
293               bap[jj] = *value++;
294             }
295           }
296         }
297       } else {
298         if (is == ADD_VALUES) {
299           for (ii=0; ii<bs; ii++,value+=bs) {
300             for (jj=0; jj<bs; jj++) {
301               bap[jj] += value[jj];
302             }
303             bap += bs;
304           }
305         } else {
306           for (ii=0; ii<bs; ii++,value+=bs) {
307             for (jj=0; jj<bs; jj++) {
308               bap[jj]  = value[jj];
309             }
310             bap += bs;
311           }
312         }
313       }
314       goto noinsert2;
315     }
316   }
317   if (nonew == 1) goto noinsert2;
318   if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new global block indexed nonzero block (%D, %D) in the matrix", orow, ocol);
319   MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
320   N = nrow++ - 1; high++;
321   /* shift up all the later entries in this row */
322   ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
323   ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
324   rp[i] = col;
325   bap   = ap +  bs2*i;
326   if (roworiented) {
327     for (ii=0; ii<bs; ii++) {
328       for (jj=ii; jj<bs2; jj+=bs) {
329         bap[jj] = *value++;
330       }
331     }
332   } else {
333     for (ii=0; ii<bs; ii++) {
334       for (jj=0; jj<bs; jj++) {
335         *bap++ = *value++;
336       }
337     }
338   }
339   noinsert2:;
340   ailen[row] = nrow;
341   PetscFunctionReturn(0);
342 }
343 
344 /*
345     This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed
346     by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine
347 */
348 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
349 {
350   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
351   const PetscScalar *value;
352   MatScalar         *barray     = baij->barray;
353   PetscBool         roworiented = baij->roworiented;
354   PetscErrorCode    ierr;
355   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
356   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
357   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
358 
359   PetscFunctionBegin;
360   if (!barray) {
361     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
362     baij->barray = barray;
363   }
364 
365   if (roworiented) stepval = (n-1)*bs;
366   else stepval = (m-1)*bs;
367 
368   for (i=0; i<m; i++) {
369     if (im[i] < 0) continue;
370 #if defined(PETSC_USE_DEBUG)
371     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed row too large %D max %D",im[i],baij->Mbs-1);
372 #endif
373     if (im[i] >= rstart && im[i] < rend) {
374       row = im[i] - rstart;
375       for (j=0; j<n; j++) {
376         /* If NumCol = 1 then a copy is not required */
377         if ((roworiented) && (n == 1)) {
378           barray = (MatScalar*)v + i*bs2;
379         } else if ((!roworiented) && (m == 1)) {
380           barray = (MatScalar*)v + j*bs2;
381         } else { /* Here a copy is required */
382           if (roworiented) {
383             value = v + (i*(stepval+bs) + j)*bs;
384           } else {
385             value = v + (j*(stepval+bs) + i)*bs;
386           }
387           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
388             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
389             barray += bs;
390           }
391           barray -= bs2;
392         }
393 
394         if (in[j] >= cstart && in[j] < cend) {
395           col  = in[j] - cstart;
396           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
397         } else if (in[j] < 0) continue;
398 #if defined(PETSC_USE_DEBUG)
399         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block indexed column too large %D max %D",in[j],baij->Nbs-1);
400 #endif
401         else {
402           if (mat->was_assembled) {
403             if (!baij->colmap) {
404               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
405             }
406 
407 #if defined(PETSC_USE_DEBUG)
408 #if defined(PETSC_USE_CTABLE)
409             { PetscInt data;
410               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
411               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
412             }
413 #else
414             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
415 #endif
416 #endif
417 #if defined(PETSC_USE_CTABLE)
418             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
419             col  = (col - 1)/bs;
420 #else
421             col = (baij->colmap[in[j]] - 1)/bs;
422 #endif
423             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
424               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
425               col  =  in[j];
426             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked indexed nonzero block (%D, %D) into matrix",im[i],in[j]);
427           } else col = in[j];
428           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
429         }
430       }
431     } else {
432       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process block indexed row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
433       if (!baij->donotstash) {
434         if (roworiented) {
435           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
436         } else {
437           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
438         }
439       }
440     }
441   }
442   PetscFunctionReturn(0);
443 }
444 
445 #define HASH_KEY 0.6180339887
446 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
447 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
448 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
449 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
450 {
451   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
452   PetscBool      roworiented = baij->roworiented;
453   PetscErrorCode ierr;
454   PetscInt       i,j,row,col;
455   PetscInt       rstart_orig=mat->rmap->rstart;
456   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
457   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
458   PetscReal      tmp;
459   MatScalar      **HD = baij->hd,value;
460 #if defined(PETSC_USE_DEBUG)
461   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
462 #endif
463 
464   PetscFunctionBegin;
465   for (i=0; i<m; i++) {
466 #if defined(PETSC_USE_DEBUG)
467     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
468     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
469 #endif
470     row = im[i];
471     if (row >= rstart_orig && row < rend_orig) {
472       for (j=0; j<n; j++) {
473         col = in[j];
474         if (roworiented) value = v[i*n+j];
475         else             value = v[i+j*m];
476         /* Look up PetscInto the Hash Table */
477         key = (row/bs)*Nbs+(col/bs)+1;
478         h1  = HASH(size,key,tmp);
479 
480 
481         idx = h1;
482 #if defined(PETSC_USE_DEBUG)
483         insert_ct++;
484         total_ct++;
485         if (HT[idx] != key) {
486           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
487           if (idx == size) {
488             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
489             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
490           }
491         }
492 #else
493         if (HT[idx] != key) {
494           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
495           if (idx == size) {
496             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
497             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
498           }
499         }
500 #endif
501         /* A HASH table entry is found, so insert the values at the correct address */
502         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
503         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
504       }
505     } else if (!baij->donotstash) {
506       if (roworiented) {
507         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
508       } else {
509         ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
510       }
511     }
512   }
513 #if defined(PETSC_USE_DEBUG)
514   baij->ht_total_ct  += total_ct;
515   baij->ht_insert_ct += insert_ct;
516 #endif
517   PetscFunctionReturn(0);
518 }
519 
520 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
521 {
522   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
523   PetscBool         roworiented = baij->roworiented;
524   PetscErrorCode    ierr;
525   PetscInt          i,j,ii,jj,row,col;
526   PetscInt          rstart=baij->rstartbs;
527   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
528   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
529   PetscReal         tmp;
530   MatScalar         **HD = baij->hd,*baij_a;
531   const PetscScalar *v_t,*value;
532 #if defined(PETSC_USE_DEBUG)
533   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
534 #endif
535 
536   PetscFunctionBegin;
537   if (roworiented) stepval = (n-1)*bs;
538   else stepval = (m-1)*bs;
539 
540   for (i=0; i<m; i++) {
541 #if defined(PETSC_USE_DEBUG)
542     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
543     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
544 #endif
545     row = im[i];
546     v_t = v + i*nbs2;
547     if (row >= rstart && row < rend) {
548       for (j=0; j<n; j++) {
549         col = in[j];
550 
551         /* Look up into the Hash Table */
552         key = row*Nbs+col+1;
553         h1  = HASH(size,key,tmp);
554 
555         idx = h1;
556 #if defined(PETSC_USE_DEBUG)
557         total_ct++;
558         insert_ct++;
559         if (HT[idx] != key) {
560           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
561           if (idx == size) {
562             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
563             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
564           }
565         }
566 #else
567         if (HT[idx] != key) {
568           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
569           if (idx == size) {
570             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
571             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
572           }
573         }
574 #endif
575         baij_a = HD[idx];
576         if (roworiented) {
577           /*value = v + i*(stepval+bs)*bs + j*bs;*/
578           /* value = v + (i*(stepval+bs)+j)*bs; */
579           value = v_t;
580           v_t  += bs;
581           if (addv == ADD_VALUES) {
582             for (ii=0; ii<bs; ii++,value+=stepval) {
583               for (jj=ii; jj<bs2; jj+=bs) {
584                 baij_a[jj] += *value++;
585               }
586             }
587           } else {
588             for (ii=0; ii<bs; ii++,value+=stepval) {
589               for (jj=ii; jj<bs2; jj+=bs) {
590                 baij_a[jj] = *value++;
591               }
592             }
593           }
594         } else {
595           value = v + j*(stepval+bs)*bs + i*bs;
596           if (addv == ADD_VALUES) {
597             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
598               for (jj=0; jj<bs; jj++) {
599                 baij_a[jj] += *value++;
600               }
601             }
602           } else {
603             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
604               for (jj=0; jj<bs; jj++) {
605                 baij_a[jj] = *value++;
606               }
607             }
608           }
609         }
610       }
611     } else {
612       if (!baij->donotstash) {
613         if (roworiented) {
614           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
615         } else {
616           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
617         }
618       }
619     }
620   }
621 #if defined(PETSC_USE_DEBUG)
622   baij->ht_total_ct  += total_ct;
623   baij->ht_insert_ct += insert_ct;
624 #endif
625   PetscFunctionReturn(0);
626 }
627 
628 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
629 {
630   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
631   PetscErrorCode ierr;
632   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
633   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
634 
635   PetscFunctionBegin;
636   for (i=0; i<m; i++) {
637     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
638     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
639     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
640       row = idxm[i] - bsrstart;
641       for (j=0; j<n; j++) {
642         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
643         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
644         if (idxn[j] >= bscstart && idxn[j] < bscend) {
645           col  = idxn[j] - bscstart;
646           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
647         } else {
648           if (!baij->colmap) {
649             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
650           }
651 #if defined(PETSC_USE_CTABLE)
652           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
653           data--;
654 #else
655           data = baij->colmap[idxn[j]/bs]-1;
656 #endif
657           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
658           else {
659             col  = data + idxn[j]%bs;
660             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
661           }
662         }
663       }
664     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
665   }
666   PetscFunctionReturn(0);
667 }
668 
669 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
670 {
671   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
672   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
673   PetscErrorCode ierr;
674   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
675   PetscReal      sum = 0.0;
676   MatScalar      *v;
677 
678   PetscFunctionBegin;
679   if (baij->size == 1) {
680     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
681   } else {
682     if (type == NORM_FROBENIUS) {
683       v  = amat->a;
684       nz = amat->nz*bs2;
685       for (i=0; i<nz; i++) {
686         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
687       }
688       v  = bmat->a;
689       nz = bmat->nz*bs2;
690       for (i=0; i<nz; i++) {
691         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
692       }
693       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
694       *nrm = PetscSqrtReal(*nrm);
695     } else if (type == NORM_1) { /* max column sum */
696       PetscReal *tmp,*tmp2;
697       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
698       ierr = PetscCalloc1(mat->cmap->N,&tmp);CHKERRQ(ierr);
699       ierr = PetscMalloc1(mat->cmap->N,&tmp2);CHKERRQ(ierr);
700       v    = amat->a; jj = amat->j;
701       for (i=0; i<amat->nz; i++) {
702         for (j=0; j<bs; j++) {
703           col = bs*(cstart + *jj) + j; /* column index */
704           for (row=0; row<bs; row++) {
705             tmp[col] += PetscAbsScalar(*v);  v++;
706           }
707         }
708         jj++;
709       }
710       v = bmat->a; jj = bmat->j;
711       for (i=0; i<bmat->nz; i++) {
712         for (j=0; j<bs; j++) {
713           col = bs*garray[*jj] + j;
714           for (row=0; row<bs; row++) {
715             tmp[col] += PetscAbsScalar(*v); v++;
716           }
717         }
718         jj++;
719       }
720       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
721       *nrm = 0.0;
722       for (j=0; j<mat->cmap->N; j++) {
723         if (tmp2[j] > *nrm) *nrm = tmp2[j];
724       }
725       ierr = PetscFree(tmp);CHKERRQ(ierr);
726       ierr = PetscFree(tmp2);CHKERRQ(ierr);
727     } else if (type == NORM_INFINITY) { /* max row sum */
728       PetscReal *sums;
729       ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr);
730       sum  = 0.0;
731       for (j=0; j<amat->mbs; j++) {
732         for (row=0; row<bs; row++) sums[row] = 0.0;
733         v  = amat->a + bs2*amat->i[j];
734         nz = amat->i[j+1]-amat->i[j];
735         for (i=0; i<nz; i++) {
736           for (col=0; col<bs; col++) {
737             for (row=0; row<bs; row++) {
738               sums[row] += PetscAbsScalar(*v); v++;
739             }
740           }
741         }
742         v  = bmat->a + bs2*bmat->i[j];
743         nz = bmat->i[j+1]-bmat->i[j];
744         for (i=0; i<nz; i++) {
745           for (col=0; col<bs; col++) {
746             for (row=0; row<bs; row++) {
747               sums[row] += PetscAbsScalar(*v); v++;
748             }
749           }
750         }
751         for (row=0; row<bs; row++) {
752           if (sums[row] > sum) sum = sums[row];
753         }
754       }
755       ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
756       ierr = PetscFree(sums);CHKERRQ(ierr);
757     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
758   }
759   PetscFunctionReturn(0);
760 }
761 
762 /*
763   Creates the hash table, and sets the table
764   This table is created only once.
765   If new entried need to be added to the matrix
766   then the hash table has to be destroyed and
767   recreated.
768 */
769 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
770 {
771   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
772   Mat            A     = baij->A,B=baij->B;
773   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
774   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
775   PetscErrorCode ierr;
776   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
777   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
778   PetscInt       *HT,key;
779   MatScalar      **HD;
780   PetscReal      tmp;
781 #if defined(PETSC_USE_INFO)
782   PetscInt ct=0,max=0;
783 #endif
784 
785   PetscFunctionBegin;
786   if (baij->ht) PetscFunctionReturn(0);
787 
788   baij->ht_size = (PetscInt)(factor*nz);
789   ht_size       = baij->ht_size;
790 
791   /* Allocate Memory for Hash Table */
792   ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr);
793   HD   = baij->hd;
794   HT   = baij->ht;
795 
796   /* Loop Over A */
797   for (i=0; i<a->mbs; i++) {
798     for (j=ai[i]; j<ai[i+1]; j++) {
799       row = i+rstart;
800       col = aj[j]+cstart;
801 
802       key = row*Nbs + col + 1;
803       h1  = HASH(ht_size,key,tmp);
804       for (k=0; k<ht_size; k++) {
805         if (!HT[(h1+k)%ht_size]) {
806           HT[(h1+k)%ht_size] = key;
807           HD[(h1+k)%ht_size] = a->a + j*bs2;
808           break;
809 #if defined(PETSC_USE_INFO)
810         } else {
811           ct++;
812 #endif
813         }
814       }
815 #if defined(PETSC_USE_INFO)
816       if (k> max) max = k;
817 #endif
818     }
819   }
820   /* Loop Over B */
821   for (i=0; i<b->mbs; i++) {
822     for (j=bi[i]; j<bi[i+1]; j++) {
823       row = i+rstart;
824       col = garray[bj[j]];
825       key = row*Nbs + col + 1;
826       h1  = HASH(ht_size,key,tmp);
827       for (k=0; k<ht_size; k++) {
828         if (!HT[(h1+k)%ht_size]) {
829           HT[(h1+k)%ht_size] = key;
830           HD[(h1+k)%ht_size] = b->a + j*bs2;
831           break;
832 #if defined(PETSC_USE_INFO)
833         } else {
834           ct++;
835 #endif
836         }
837       }
838 #if defined(PETSC_USE_INFO)
839       if (k> max) max = k;
840 #endif
841     }
842   }
843 
844   /* Print Summary */
845 #if defined(PETSC_USE_INFO)
846   for (i=0,j=0; i<ht_size; i++) {
847     if (HT[i]) j++;
848   }
849   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
850 #endif
851   PetscFunctionReturn(0);
852 }
853 
854 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
855 {
856   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
857   PetscErrorCode ierr;
858   PetscInt       nstash,reallocs;
859 
860   PetscFunctionBegin;
861   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
862 
863   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
864   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
865   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
866   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
867   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
868   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
869   PetscFunctionReturn(0);
870 }
871 
872 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
873 {
874   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
875   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
876   PetscErrorCode ierr;
877   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
878   PetscInt       *row,*col;
879   PetscBool      r1,r2,r3,other_disassembled;
880   MatScalar      *val;
881   PetscMPIInt    n;
882 
883   PetscFunctionBegin;
884   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
885   if (!baij->donotstash && !mat->nooffprocentries) {
886     while (1) {
887       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
888       if (!flg) break;
889 
890       for (i=0; i<n;) {
891         /* Now identify the consecutive vals belonging to the same row */
892         for (j=i,rstart=row[j]; j<n; j++) {
893           if (row[j] != rstart) break;
894         }
895         if (j < n) ncols = j-i;
896         else       ncols = n-i;
897         /* Now assemble all these values with a single function call */
898         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
899         i    = j;
900       }
901     }
902     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
903     /* Now process the block-stash. Since the values are stashed column-oriented,
904        set the roworiented flag to column oriented, and after MatSetValues()
905        restore the original flags */
906     r1 = baij->roworiented;
907     r2 = a->roworiented;
908     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
909 
910     baij->roworiented = PETSC_FALSE;
911     a->roworiented    = PETSC_FALSE;
912 
913     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
914     while (1) {
915       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
916       if (!flg) break;
917 
918       for (i=0; i<n;) {
919         /* Now identify the consecutive vals belonging to the same row */
920         for (j=i,rstart=row[j]; j<n; j++) {
921           if (row[j] != rstart) break;
922         }
923         if (j < n) ncols = j-i;
924         else       ncols = n-i;
925         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
926         i    = j;
927       }
928     }
929     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
930 
931     baij->roworiented = r1;
932     a->roworiented    = r2;
933 
934     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
935   }
936 
937   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
938   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
939 
940   /* determine if any processor has disassembled, if so we must
941      also disassemble ourselfs, in order that we may reassemble. */
942   /*
943      if nonzero structure of submatrix B cannot change then we know that
944      no processor disassembled thus we can skip this stuff
945   */
946   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
947     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
948     if (mat->was_assembled && !other_disassembled) {
949       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
950     }
951   }
952 
953   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
954     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
955   }
956   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
957   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
958 
959 #if defined(PETSC_USE_INFO)
960   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
961     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",(double)((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
962 
963     baij->ht_total_ct  = 0;
964     baij->ht_insert_ct = 0;
965   }
966 #endif
967   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
968     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
969 
970     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
971     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
972   }
973 
974   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
975 
976   baij->rowvalues = 0;
977 
978   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
979   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
980     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
981     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
982   }
983   PetscFunctionReturn(0);
984 }
985 
986 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer);
987 #include <petscdraw.h>
988 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
989 {
990   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
991   PetscErrorCode    ierr;
992   PetscMPIInt       rank = baij->rank;
993   PetscInt          bs   = mat->rmap->bs;
994   PetscBool         iascii,isdraw;
995   PetscViewer       sviewer;
996   PetscViewerFormat format;
997 
998   PetscFunctionBegin;
999   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1000   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1001   if (iascii) {
1002     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1003     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1004       MatInfo info;
1005       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1006       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1007       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1008       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %g\n",
1009                                                 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(double)info.memory);CHKERRQ(ierr);
1010       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1011       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1012       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1013       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1014       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1015       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1016       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1017       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1018       PetscFunctionReturn(0);
1019     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1020       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1021       PetscFunctionReturn(0);
1022     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1023       PetscFunctionReturn(0);
1024     }
1025   }
1026 
1027   if (isdraw) {
1028     PetscDraw draw;
1029     PetscBool isnull;
1030     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1031     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1032     if (isnull) PetscFunctionReturn(0);
1033   }
1034 
1035   {
1036     /* assemble the entire matrix onto first processor. */
1037     Mat         A;
1038     Mat_SeqBAIJ *Aloc;
1039     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1040     MatScalar   *a;
1041     const char  *matname;
1042 
1043     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1044     /* Perhaps this should be the type of mat? */
1045     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
1046     if (!rank) {
1047       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1048     } else {
1049       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1050     }
1051     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1052     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
1053     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1054     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1055 
1056     /* copy over the A part */
1057     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1058     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1059     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1060 
1061     for (i=0; i<mbs; i++) {
1062       rvals[0] = bs*(baij->rstartbs + i);
1063       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1064       for (j=ai[i]; j<ai[i+1]; j++) {
1065         col = (baij->cstartbs+aj[j])*bs;
1066         for (k=0; k<bs; k++) {
1067           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1068           col++; a += bs;
1069         }
1070       }
1071     }
1072     /* copy over the B part */
1073     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1074     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1075     for (i=0; i<mbs; i++) {
1076       rvals[0] = bs*(baij->rstartbs + i);
1077       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1078       for (j=ai[i]; j<ai[i+1]; j++) {
1079         col = baij->garray[aj[j]]*bs;
1080         for (k=0; k<bs; k++) {
1081           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1082           col++; a += bs;
1083         }
1084       }
1085     }
1086     ierr = PetscFree(rvals);CHKERRQ(ierr);
1087     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1088     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1089     /*
1090        Everyone has to call to draw the matrix since the graphics waits are
1091        synchronized across all processors that share the PetscDraw object
1092     */
1093     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1094     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
1095     if (!rank) {
1096       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
1097       ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1098     }
1099     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1100     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1101     ierr = MatDestroy(&A);CHKERRQ(ierr);
1102   }
1103   PetscFunctionReturn(0);
1104 }
1105 
1106 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1107 {
1108   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1109   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)a->A->data;
1110   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
1111   PetscErrorCode ierr;
1112   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1113   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1114   int            fd;
1115   PetscScalar    *column_values;
1116   FILE           *file;
1117   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1118   PetscInt       message_count,flowcontrolcount;
1119 
1120   PetscFunctionBegin;
1121   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1122   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1123   nz   = bs2*(A->nz + B->nz);
1124   rlen = mat->rmap->n;
1125   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1126   if (!rank) {
1127     header[0] = MAT_FILE_CLASSID;
1128     header[1] = mat->rmap->N;
1129     header[2] = mat->cmap->N;
1130 
1131     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1132     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1133     /* get largest number of rows any processor has */
1134     range = mat->rmap->range;
1135     for (i=1; i<size; i++) {
1136       rlen = PetscMax(rlen,range[i+1] - range[i]);
1137     }
1138   } else {
1139     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1140   }
1141 
1142   ierr = PetscMalloc1(rlen/bs,&crow_lens);CHKERRQ(ierr);
1143   /* compute lengths of each row  */
1144   for (i=0; i<a->mbs; i++) {
1145     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1146   }
1147   /* store the row lengths to the file */
1148   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1149   if (!rank) {
1150     MPI_Status status;
1151     ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr);
1152     rlen = (range[1] - range[0])/bs;
1153     for (i=0; i<rlen; i++) {
1154       for (j=0; j<bs; j++) {
1155         row_lens[i*bs+j] = bs*crow_lens[i];
1156       }
1157     }
1158     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1159     for (i=1; i<size; i++) {
1160       rlen = (range[i+1] - range[i])/bs;
1161       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1162       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1163       for (k=0; k<rlen; k++) {
1164         for (j=0; j<bs; j++) {
1165           row_lens[k*bs+j] = bs*crow_lens[k];
1166         }
1167       }
1168       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1169     }
1170     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1171     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1172   } else {
1173     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1174     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1175     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1176   }
1177   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1178 
1179   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1180      information needed to make it for each row from a block row. This does require more communication but still not more than
1181      the communication needed for the nonzero values  */
1182   nzmax = nz; /*  space a largest processor needs */
1183   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1184   ierr  = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr);
1185   cnt   = 0;
1186   for (i=0; i<a->mbs; i++) {
1187     pcnt = cnt;
1188     for (j=B->i[i]; j<B->i[i+1]; j++) {
1189       if ((col = garray[B->j[j]]) > cstart) break;
1190       for (l=0; l<bs; l++) {
1191         column_indices[cnt++] = bs*col+l;
1192       }
1193     }
1194     for (k=A->i[i]; k<A->i[i+1]; k++) {
1195       for (l=0; l<bs; l++) {
1196         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1197       }
1198     }
1199     for (; j<B->i[i+1]; j++) {
1200       for (l=0; l<bs; l++) {
1201         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1202       }
1203     }
1204     len = cnt - pcnt;
1205     for (k=1; k<bs; k++) {
1206       ierr = PetscArraycpy(&column_indices[cnt],&column_indices[pcnt],len);CHKERRQ(ierr);
1207       cnt += len;
1208     }
1209   }
1210   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1211 
1212   /* store the columns to the file */
1213   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1214   if (!rank) {
1215     MPI_Status status;
1216     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1217     for (i=1; i<size; i++) {
1218       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1219       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1220       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1221       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1222     }
1223     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1224   } else {
1225     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1226     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1227     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1228     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1229   }
1230   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1231 
1232   /* load up the numerical values */
1233   ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr);
1234   cnt  = 0;
1235   for (i=0; i<a->mbs; i++) {
1236     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1237     for (j=B->i[i]; j<B->i[i+1]; j++) {
1238       if (garray[B->j[j]] > cstart) break;
1239       for (l=0; l<bs; l++) {
1240         for (ll=0; ll<bs; ll++) {
1241           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1242         }
1243       }
1244       cnt += bs;
1245     }
1246     for (k=A->i[i]; k<A->i[i+1]; k++) {
1247       for (l=0; l<bs; l++) {
1248         for (ll=0; ll<bs; ll++) {
1249           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1250         }
1251       }
1252       cnt += bs;
1253     }
1254     for (; j<B->i[i+1]; j++) {
1255       for (l=0; l<bs; l++) {
1256         for (ll=0; ll<bs; ll++) {
1257           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1258         }
1259       }
1260       cnt += bs;
1261     }
1262     cnt += (bs-1)*rlen;
1263   }
1264   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1265 
1266   /* store the column values to the file */
1267   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1268   if (!rank) {
1269     MPI_Status status;
1270     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1271     for (i=1; i<size; i++) {
1272       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1273       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1274       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1275       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1276     }
1277     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1278   } else {
1279     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1280     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1281     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1282     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1283   }
1284   ierr = PetscFree(column_values);CHKERRQ(ierr);
1285 
1286   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1287   if (file) {
1288     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1289   }
1290   PetscFunctionReturn(0);
1291 }
1292 
1293 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1294 {
1295   PetscErrorCode ierr;
1296   PetscBool      iascii,isdraw,issocket,isbinary;
1297 
1298   PetscFunctionBegin;
1299   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1300   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1301   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1302   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1303   if (iascii || isdraw || issocket) {
1304     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1305   } else if (isbinary) {
1306     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1307   }
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1312 {
1313   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1314   PetscErrorCode ierr;
1315 
1316   PetscFunctionBegin;
1317 #if defined(PETSC_USE_LOG)
1318   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1319 #endif
1320   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1321   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1322   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1323   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1324 #if defined(PETSC_USE_CTABLE)
1325   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1326 #else
1327   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1328 #endif
1329   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1330   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1331   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1332   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1333   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1334   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1335   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1336   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1337 
1338   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1339   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1340   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1341   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1342   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1343   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1344   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr);
1345   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1346   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr);
1347 #if defined(PETSC_HAVE_HYPRE)
1348   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_hypre_C",NULL);CHKERRQ(ierr);
1349 #endif
1350   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_is_C",NULL);CHKERRQ(ierr);
1351   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatPtAP_is_mpibaij_C",NULL);CHKERRQ(ierr);
1352   PetscFunctionReturn(0);
1353 }
1354 
1355 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1356 {
1357   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1358   PetscErrorCode ierr;
1359   PetscInt       nt;
1360 
1361   PetscFunctionBegin;
1362   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1363   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1364   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1365   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1366   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1367   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1368   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1369   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1370   PetscFunctionReturn(0);
1371 }
1372 
1373 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1374 {
1375   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1376   PetscErrorCode ierr;
1377 
1378   PetscFunctionBegin;
1379   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1380   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1381   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1382   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1383   PetscFunctionReturn(0);
1384 }
1385 
1386 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1387 {
1388   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1389   PetscErrorCode ierr;
1390 
1391   PetscFunctionBegin;
1392   /* do nondiagonal part */
1393   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1394   /* do local part */
1395   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1396   /* add partial results together */
1397   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1398   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1399   PetscFunctionReturn(0);
1400 }
1401 
1402 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1403 {
1404   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1405   PetscErrorCode ierr;
1406 
1407   PetscFunctionBegin;
1408   /* do nondiagonal part */
1409   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1410   /* do local part */
1411   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1412   /* add partial results together */
1413   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1414   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1415   PetscFunctionReturn(0);
1416 }
1417 
1418 /*
1419   This only works correctly for square matrices where the subblock A->A is the
1420    diagonal block
1421 */
1422 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1423 {
1424   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1425   PetscErrorCode ierr;
1426 
1427   PetscFunctionBegin;
1428   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1429   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1430   PetscFunctionReturn(0);
1431 }
1432 
1433 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1434 {
1435   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1436   PetscErrorCode ierr;
1437 
1438   PetscFunctionBegin;
1439   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1440   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1441   PetscFunctionReturn(0);
1442 }
1443 
1444 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1445 {
1446   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1447   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1448   PetscErrorCode ierr;
1449   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1450   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1451   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1452 
1453   PetscFunctionBegin;
1454   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1455   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1456   mat->getrowactive = PETSC_TRUE;
1457 
1458   if (!mat->rowvalues && (idx || v)) {
1459     /*
1460         allocate enough space to hold information from the longest row.
1461     */
1462     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1463     PetscInt    max = 1,mbs = mat->mbs,tmp;
1464     for (i=0; i<mbs; i++) {
1465       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1466       if (max < tmp) max = tmp;
1467     }
1468     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1469   }
1470   lrow = row - brstart;
1471 
1472   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1473   if (!v)   {pvA = 0; pvB = 0;}
1474   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1475   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1476   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1477   nztot = nzA + nzB;
1478 
1479   cmap = mat->garray;
1480   if (v  || idx) {
1481     if (nztot) {
1482       /* Sort by increasing column numbers, assuming A and B already sorted */
1483       PetscInt imark = -1;
1484       if (v) {
1485         *v = v_p = mat->rowvalues;
1486         for (i=0; i<nzB; i++) {
1487           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1488           else break;
1489         }
1490         imark = i;
1491         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1492         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1493       }
1494       if (idx) {
1495         *idx = idx_p = mat->rowindices;
1496         if (imark > -1) {
1497           for (i=0; i<imark; i++) {
1498             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1499           }
1500         } else {
1501           for (i=0; i<nzB; i++) {
1502             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1503             else break;
1504           }
1505           imark = i;
1506         }
1507         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1508         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1509       }
1510     } else {
1511       if (idx) *idx = 0;
1512       if (v)   *v   = 0;
1513     }
1514   }
1515   *nz  = nztot;
1516   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1517   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1518   PetscFunctionReturn(0);
1519 }
1520 
1521 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1522 {
1523   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1524 
1525   PetscFunctionBegin;
1526   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1527   baij->getrowactive = PETSC_FALSE;
1528   PetscFunctionReturn(0);
1529 }
1530 
1531 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1532 {
1533   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1534   PetscErrorCode ierr;
1535 
1536   PetscFunctionBegin;
1537   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1538   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1539   PetscFunctionReturn(0);
1540 }
1541 
1542 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1543 {
1544   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1545   Mat            A  = a->A,B = a->B;
1546   PetscErrorCode ierr;
1547   PetscReal      isend[5],irecv[5];
1548 
1549   PetscFunctionBegin;
1550   info->block_size = (PetscReal)matin->rmap->bs;
1551 
1552   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1553 
1554   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1555   isend[3] = info->memory;  isend[4] = info->mallocs;
1556 
1557   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1558 
1559   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1560   isend[3] += info->memory;  isend[4] += info->mallocs;
1561 
1562   if (flag == MAT_LOCAL) {
1563     info->nz_used      = isend[0];
1564     info->nz_allocated = isend[1];
1565     info->nz_unneeded  = isend[2];
1566     info->memory       = isend[3];
1567     info->mallocs      = isend[4];
1568   } else if (flag == MAT_GLOBAL_MAX) {
1569     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1570 
1571     info->nz_used      = irecv[0];
1572     info->nz_allocated = irecv[1];
1573     info->nz_unneeded  = irecv[2];
1574     info->memory       = irecv[3];
1575     info->mallocs      = irecv[4];
1576   } else if (flag == MAT_GLOBAL_SUM) {
1577     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1578 
1579     info->nz_used      = irecv[0];
1580     info->nz_allocated = irecv[1];
1581     info->nz_unneeded  = irecv[2];
1582     info->memory       = irecv[3];
1583     info->mallocs      = irecv[4];
1584   } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1585   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1586   info->fill_ratio_needed = 0;
1587   info->factor_mallocs    = 0;
1588   PetscFunctionReturn(0);
1589 }
1590 
1591 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1592 {
1593   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1594   PetscErrorCode ierr;
1595 
1596   PetscFunctionBegin;
1597   switch (op) {
1598   case MAT_NEW_NONZERO_LOCATIONS:
1599   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1600   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1601   case MAT_KEEP_NONZERO_PATTERN:
1602   case MAT_NEW_NONZERO_LOCATION_ERR:
1603     MatCheckPreallocated(A,1);
1604     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1605     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1606     break;
1607   case MAT_ROW_ORIENTED:
1608     MatCheckPreallocated(A,1);
1609     a->roworiented = flg;
1610 
1611     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1612     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1613     break;
1614   case MAT_NEW_DIAGONALS:
1615     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1616     break;
1617   case MAT_IGNORE_OFF_PROC_ENTRIES:
1618     a->donotstash = flg;
1619     break;
1620   case MAT_USE_HASH_TABLE:
1621     a->ht_flag = flg;
1622     a->ht_fact = 1.39;
1623     break;
1624   case MAT_SYMMETRIC:
1625   case MAT_STRUCTURALLY_SYMMETRIC:
1626   case MAT_HERMITIAN:
1627   case MAT_SUBMAT_SINGLEIS:
1628   case MAT_SYMMETRY_ETERNAL:
1629     MatCheckPreallocated(A,1);
1630     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1631     break;
1632   default:
1633     SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1634   }
1635   PetscFunctionReturn(0);
1636 }
1637 
1638 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1639 {
1640   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1641   Mat_SeqBAIJ    *Aloc;
1642   Mat            B;
1643   PetscErrorCode ierr;
1644   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1645   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1646   MatScalar      *a;
1647 
1648   PetscFunctionBegin;
1649   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1650     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1651     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1652     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1653     /* Do not know preallocation information, but must set block size */
1654     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr);
1655   } else {
1656     B = *matout;
1657   }
1658 
1659   /* copy over the A part */
1660   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1661   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1662   ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1663 
1664   for (i=0; i<mbs; i++) {
1665     rvals[0] = bs*(baij->rstartbs + i);
1666     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1667     for (j=ai[i]; j<ai[i+1]; j++) {
1668       col = (baij->cstartbs+aj[j])*bs;
1669       for (k=0; k<bs; k++) {
1670         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1671 
1672         col++; a += bs;
1673       }
1674     }
1675   }
1676   /* copy over the B part */
1677   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1678   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1679   for (i=0; i<mbs; i++) {
1680     rvals[0] = bs*(baij->rstartbs + i);
1681     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1682     for (j=ai[i]; j<ai[i+1]; j++) {
1683       col = baij->garray[aj[j]]*bs;
1684       for (k=0; k<bs; k++) {
1685         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1686         col++;
1687         a += bs;
1688       }
1689     }
1690   }
1691   ierr = PetscFree(rvals);CHKERRQ(ierr);
1692   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1693   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1694 
1695   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B;
1696   else {
1697     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1698   }
1699   PetscFunctionReturn(0);
1700 }
1701 
1702 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1703 {
1704   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1705   Mat            a     = baij->A,b = baij->B;
1706   PetscErrorCode ierr;
1707   PetscInt       s1,s2,s3;
1708 
1709   PetscFunctionBegin;
1710   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1711   if (rr) {
1712     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1713     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1714     /* Overlap communication with computation. */
1715     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1716   }
1717   if (ll) {
1718     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1719     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1720     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1721   }
1722   /* scale  the diagonal block */
1723   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1724 
1725   if (rr) {
1726     /* Do a scatter end and then right scale the off-diagonal block */
1727     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1728     ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1729   }
1730   PetscFunctionReturn(0);
1731 }
1732 
1733 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1734 {
1735   Mat_MPIBAIJ   *l      = (Mat_MPIBAIJ *) A->data;
1736   PetscInt      *lrows;
1737   PetscInt       r, len;
1738   PetscBool      cong;
1739   PetscErrorCode ierr;
1740 
1741   PetscFunctionBegin;
1742   /* get locally owned rows */
1743   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
1744   /* fix right hand side if needed */
1745   if (x && b) {
1746     const PetscScalar *xx;
1747     PetscScalar       *bb;
1748 
1749     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1750     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1751     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
1752     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1753     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1754   }
1755 
1756   /* actually zap the local rows */
1757   /*
1758         Zero the required rows. If the "diagonal block" of the matrix
1759      is square and the user wishes to set the diagonal we use separate
1760      code so that MatSetValues() is not called for each diagonal allocating
1761      new memory, thus calling lots of mallocs and slowing things down.
1762 
1763   */
1764   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1765   ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1766   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
1767   if ((diag != 0.0) && cong) {
1768     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr);
1769   } else if (diag != 0.0) {
1770     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr);
1771     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1772        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1773     for (r = 0; r < len; ++r) {
1774       const PetscInt row = lrows[r] + A->rmap->rstart;
1775       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1776     }
1777     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1778     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1779   } else {
1780     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1781   }
1782   ierr = PetscFree(lrows);CHKERRQ(ierr);
1783 
1784   /* only change matrix nonzero state if pattern was allowed to be changed */
1785   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1786     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1787     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1788   }
1789   PetscFunctionReturn(0);
1790 }
1791 
1792 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1793 {
1794   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1795   PetscErrorCode    ierr;
1796   PetscMPIInt       n = A->rmap->n;
1797   PetscInt          i,j,k,r,p = 0,len = 0,row,col,count;
1798   PetscInt          *lrows,*owners = A->rmap->range;
1799   PetscSFNode       *rrows;
1800   PetscSF           sf;
1801   const PetscScalar *xx;
1802   PetscScalar       *bb,*mask;
1803   Vec               xmask,lmask;
1804   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ*)l->B->data;
1805   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1806   PetscScalar       *aa;
1807 
1808   PetscFunctionBegin;
1809   /* Create SF where leaves are input rows and roots are owned rows */
1810   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1811   for (r = 0; r < n; ++r) lrows[r] = -1;
1812   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1813   for (r = 0; r < N; ++r) {
1814     const PetscInt idx   = rows[r];
1815     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1816     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1817       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1818     }
1819     rrows[r].rank  = p;
1820     rrows[r].index = rows[r] - owners[p];
1821   }
1822   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1823   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1824   /* Collect flags for rows to be zeroed */
1825   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1826   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1827   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1828   /* Compress and put in row numbers */
1829   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1830   /* zero diagonal part of matrix */
1831   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1832   /* handle off diagonal part of matrix */
1833   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
1834   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1835   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1836   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1837   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1838   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1839   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1840   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1841   if (x) {
1842     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1843     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1844     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1845     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1846   }
1847   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1848   /* remove zeroed rows of off diagonal matrix */
1849   for (i = 0; i < len; ++i) {
1850     row   = lrows[i];
1851     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1852     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1853     for (k = 0; k < count; ++k) {
1854       aa[0] = 0.0;
1855       aa   += bs;
1856     }
1857   }
1858   /* loop over all elements of off process part of matrix zeroing removed columns*/
1859   for (i = 0; i < l->B->rmap->N; ++i) {
1860     row = i/bs;
1861     for (j = baij->i[row]; j < baij->i[row+1]; ++j) {
1862       for (k = 0; k < bs; ++k) {
1863         col = bs*baij->j[j] + k;
1864         if (PetscAbsScalar(mask[col])) {
1865           aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
1866           if (x) bb[i] -= aa[0]*xx[col];
1867           aa[0] = 0.0;
1868         }
1869       }
1870     }
1871   }
1872   if (x) {
1873     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1874     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1875   }
1876   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1877   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1878   ierr = PetscFree(lrows);CHKERRQ(ierr);
1879 
1880   /* only change matrix nonzero state if pattern was allowed to be changed */
1881   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1882     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1883     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1884   }
1885   PetscFunctionReturn(0);
1886 }
1887 
1888 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1889 {
1890   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1891   PetscErrorCode ierr;
1892 
1893   PetscFunctionBegin;
1894   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1895   PetscFunctionReturn(0);
1896 }
1897 
1898 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
1899 
1900 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1901 {
1902   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1903   Mat            a,b,c,d;
1904   PetscBool      flg;
1905   PetscErrorCode ierr;
1906 
1907   PetscFunctionBegin;
1908   a = matA->A; b = matA->B;
1909   c = matB->A; d = matB->B;
1910 
1911   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1912   if (flg) {
1913     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1914   }
1915   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1916   PetscFunctionReturn(0);
1917 }
1918 
1919 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1920 {
1921   PetscErrorCode ierr;
1922   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1923   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
1924 
1925   PetscFunctionBegin;
1926   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1927   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1928     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1929   } else {
1930     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1931     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1932   }
1933   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
1934   PetscFunctionReturn(0);
1935 }
1936 
1937 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1938 {
1939   PetscErrorCode ierr;
1940 
1941   PetscFunctionBegin;
1942   ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1943   PetscFunctionReturn(0);
1944 }
1945 
1946 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
1947 {
1948   PetscErrorCode ierr;
1949   PetscInt       bs = Y->rmap->bs,m = Y->rmap->N/bs;
1950   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
1951   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
1952 
1953   PetscFunctionBegin;
1954   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
1955   PetscFunctionReturn(0);
1956 }
1957 
1958 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1959 {
1960   PetscErrorCode ierr;
1961   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
1962   PetscBLASInt   bnz,one=1;
1963   Mat_SeqBAIJ    *x,*y;
1964   PetscInt       bs2 = Y->rmap->bs*Y->rmap->bs;
1965 
1966   PetscFunctionBegin;
1967   if (str == SAME_NONZERO_PATTERN) {
1968     PetscScalar alpha = a;
1969     x    = (Mat_SeqBAIJ*)xx->A->data;
1970     y    = (Mat_SeqBAIJ*)yy->A->data;
1971     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
1972     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1973     x    = (Mat_SeqBAIJ*)xx->B->data;
1974     y    = (Mat_SeqBAIJ*)yy->B->data;
1975     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
1976     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1977     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1978   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1979     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1980   } else {
1981     Mat      B;
1982     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1983     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1984     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1985     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1986     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1987     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1988     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1989     ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr);
1990     ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1991     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1992     ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1993     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
1994     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1995     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
1996     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1997     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1998   }
1999   PetscFunctionReturn(0);
2000 }
2001 
2002 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
2003 {
2004   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2005   PetscErrorCode ierr;
2006 
2007   PetscFunctionBegin;
2008   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2009   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2010   PetscFunctionReturn(0);
2011 }
2012 
2013 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2014 {
2015   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2016   PetscErrorCode ierr;
2017 
2018   PetscFunctionBegin;
2019   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2020   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2021   PetscFunctionReturn(0);
2022 }
2023 
2024 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2025 {
2026   PetscErrorCode ierr;
2027   IS             iscol_local;
2028   PetscInt       csize;
2029 
2030   PetscFunctionBegin;
2031   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2032   if (call == MAT_REUSE_MATRIX) {
2033     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2034     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2035   } else {
2036     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2037   }
2038   ierr = MatCreateSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2039   if (call == MAT_INITIAL_MATRIX) {
2040     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2041     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2042   }
2043   PetscFunctionReturn(0);
2044 }
2045 
2046 /*
2047   Not great since it makes two copies of the submatrix, first an SeqBAIJ
2048   in local and then by concatenating the local matrices the end result.
2049   Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ().
2050   This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency).
2051 */
2052 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2053 {
2054   PetscErrorCode ierr;
2055   PetscMPIInt    rank,size;
2056   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2057   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2058   Mat            M,Mreuse;
2059   MatScalar      *vwork,*aa;
2060   MPI_Comm       comm;
2061   IS             isrow_new, iscol_new;
2062   Mat_SeqBAIJ    *aij;
2063 
2064   PetscFunctionBegin;
2065   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2066   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2067   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2068   /* The compression and expansion should be avoided. Doesn't point
2069      out errors, might change the indices, hence buggey */
2070   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
2071   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
2072 
2073   if (call ==  MAT_REUSE_MATRIX) {
2074     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
2075     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2076     ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&Mreuse);CHKERRQ(ierr);
2077   } else {
2078     ierr = MatCreateSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&Mreuse);CHKERRQ(ierr);
2079   }
2080   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
2081   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
2082   /*
2083       m - number of local rows
2084       n - number of columns (same on all processors)
2085       rstart - first row in new global matrix generated
2086   */
2087   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2088   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2089   m    = m/bs;
2090   n    = n/bs;
2091 
2092   if (call == MAT_INITIAL_MATRIX) {
2093     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2094     ii  = aij->i;
2095     jj  = aij->j;
2096 
2097     /*
2098         Determine the number of non-zeros in the diagonal and off-diagonal
2099         portions of the matrix in order to do correct preallocation
2100     */
2101 
2102     /* first get start and end of "diagonal" columns */
2103     if (csize == PETSC_DECIDE) {
2104       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2105       if (mglobal == n*bs) { /* square matrix */
2106         nlocal = m;
2107       } else {
2108         nlocal = n/size + ((n % size) > rank);
2109       }
2110     } else {
2111       nlocal = csize/bs;
2112     }
2113     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2114     rstart = rend - nlocal;
2115     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2116 
2117     /* next, compute all the lengths */
2118     ierr  = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr);
2119     for (i=0; i<m; i++) {
2120       jend = ii[i+1] - ii[i];
2121       olen = 0;
2122       dlen = 0;
2123       for (j=0; j<jend; j++) {
2124         if (*jj < rstart || *jj >= rend) olen++;
2125         else dlen++;
2126         jj++;
2127       }
2128       olens[i] = olen;
2129       dlens[i] = dlen;
2130     }
2131     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2132     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2133     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2134     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2135     ierr = MatMPISBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2136     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
2137   } else {
2138     PetscInt ml,nl;
2139 
2140     M    = *newmat;
2141     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2142     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2143     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2144     /*
2145          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2146        rather than the slower MatSetValues().
2147     */
2148     M->was_assembled = PETSC_TRUE;
2149     M->assembled     = PETSC_FALSE;
2150   }
2151   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2152   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2153   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
2154   ii   = aij->i;
2155   jj   = aij->j;
2156   aa   = aij->a;
2157   for (i=0; i<m; i++) {
2158     row   = rstart/bs + i;
2159     nz    = ii[i+1] - ii[i];
2160     cwork = jj;     jj += nz;
2161     vwork = aa;     aa += nz*bs*bs;
2162     ierr  = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2163   }
2164 
2165   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2166   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2167   *newmat = M;
2168 
2169   /* save submatrix used in processor for next request */
2170   if (call ==  MAT_INITIAL_MATRIX) {
2171     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2172     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2173   }
2174   PetscFunctionReturn(0);
2175 }
2176 
2177 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2178 {
2179   MPI_Comm       comm,pcomm;
2180   PetscInt       clocal_size,nrows;
2181   const PetscInt *rows;
2182   PetscMPIInt    size;
2183   IS             crowp,lcolp;
2184   PetscErrorCode ierr;
2185 
2186   PetscFunctionBegin;
2187   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2188   /* make a collective version of 'rowp' */
2189   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2190   if (pcomm==comm) {
2191     crowp = rowp;
2192   } else {
2193     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2194     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2195     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2196     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2197   }
2198   ierr = ISSetPermutation(crowp);CHKERRQ(ierr);
2199   /* make a local version of 'colp' */
2200   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2201   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2202   if (size==1) {
2203     lcolp = colp;
2204   } else {
2205     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2206   }
2207   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2208   /* now we just get the submatrix */
2209   ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr);
2210   ierr = MatCreateSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2211   /* clean up */
2212   if (pcomm!=comm) {
2213     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2214   }
2215   if (size>1) {
2216     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2217   }
2218   PetscFunctionReturn(0);
2219 }
2220 
2221 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2222 {
2223   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2224   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
2225 
2226   PetscFunctionBegin;
2227   if (nghosts) *nghosts = B->nbs;
2228   if (ghosts) *ghosts = baij->garray;
2229   PetscFunctionReturn(0);
2230 }
2231 
2232 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2233 {
2234   Mat            B;
2235   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2236   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2237   Mat_SeqAIJ     *b;
2238   PetscErrorCode ierr;
2239   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2240   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2241   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2242 
2243   PetscFunctionBegin;
2244   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2245   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2246 
2247   /* ----------------------------------------------------------------
2248      Tell every processor the number of nonzeros per row
2249   */
2250   ierr = PetscMalloc1(A->rmap->N/bs,&lens);CHKERRQ(ierr);
2251   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2252     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2253   }
2254   ierr      = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr);
2255   displs    = recvcounts + size;
2256   for (i=0; i<size; i++) {
2257     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2258     displs[i]     = A->rmap->range[i]/bs;
2259   }
2260 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2261   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2262 #else
2263   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2264   ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2265 #endif
2266   /* ---------------------------------------------------------------
2267      Create the sequential matrix of the same type as the local block diagonal
2268   */
2269   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2270   ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2271   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2272   ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2273   b    = (Mat_SeqAIJ*)B->data;
2274 
2275   /*--------------------------------------------------------------------
2276     Copy my part of matrix column indices over
2277   */
2278   sendcount  = ad->nz + bd->nz;
2279   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2280   a_jsendbuf = ad->j;
2281   b_jsendbuf = bd->j;
2282   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2283   cnt        = 0;
2284   for (i=0; i<n; i++) {
2285 
2286     /* put in lower diagonal portion */
2287     m = bd->i[i+1] - bd->i[i];
2288     while (m > 0) {
2289       /* is it above diagonal (in bd (compressed) numbering) */
2290       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2291       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2292       m--;
2293     }
2294 
2295     /* put in diagonal portion */
2296     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2297       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2298     }
2299 
2300     /* put in upper diagonal portion */
2301     while (m-- > 0) {
2302       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2303     }
2304   }
2305   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2306 
2307   /*--------------------------------------------------------------------
2308     Gather all column indices to all processors
2309   */
2310   for (i=0; i<size; i++) {
2311     recvcounts[i] = 0;
2312     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2313       recvcounts[i] += lens[j];
2314     }
2315   }
2316   displs[0] = 0;
2317   for (i=1; i<size; i++) {
2318     displs[i] = displs[i-1] + recvcounts[i-1];
2319   }
2320 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2321   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2322 #else
2323   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2324 #endif
2325   /*--------------------------------------------------------------------
2326     Assemble the matrix into useable form (note numerical values not yet set)
2327   */
2328   /* set the b->ilen (length of each row) values */
2329   ierr = PetscArraycpy(b->ilen,lens,A->rmap->N/bs);CHKERRQ(ierr);
2330   /* set the b->i indices */
2331   b->i[0] = 0;
2332   for (i=1; i<=A->rmap->N/bs; i++) {
2333     b->i[i] = b->i[i-1] + lens[i-1];
2334   }
2335   ierr = PetscFree(lens);CHKERRQ(ierr);
2336   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2337   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2338   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2339 
2340   if (A->symmetric) {
2341     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2342   } else if (A->hermitian) {
2343     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2344   } else if (A->structurally_symmetric) {
2345     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2346   }
2347   *newmat = B;
2348   PetscFunctionReturn(0);
2349 }
2350 
2351 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2352 {
2353   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2354   PetscErrorCode ierr;
2355   Vec            bb1 = 0;
2356 
2357   PetscFunctionBegin;
2358   if (flag == SOR_APPLY_UPPER) {
2359     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2360     PetscFunctionReturn(0);
2361   }
2362 
2363   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2364     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2365   }
2366 
2367   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2368     if (flag & SOR_ZERO_INITIAL_GUESS) {
2369       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2370       its--;
2371     }
2372 
2373     while (its--) {
2374       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2375       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2376 
2377       /* update rhs: bb1 = bb - B*x */
2378       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2379       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2380 
2381       /* local sweep */
2382       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2383     }
2384   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2385     if (flag & SOR_ZERO_INITIAL_GUESS) {
2386       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2387       its--;
2388     }
2389     while (its--) {
2390       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2391       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2392 
2393       /* update rhs: bb1 = bb - B*x */
2394       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2395       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2396 
2397       /* local sweep */
2398       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2399     }
2400   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2401     if (flag & SOR_ZERO_INITIAL_GUESS) {
2402       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2403       its--;
2404     }
2405     while (its--) {
2406       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2407       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2408 
2409       /* update rhs: bb1 = bb - B*x */
2410       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2411       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2412 
2413       /* local sweep */
2414       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2415     }
2416   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2417 
2418   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2419   PetscFunctionReturn(0);
2420 }
2421 
2422 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms)
2423 {
2424   PetscErrorCode ierr;
2425   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)A->data;
2426   PetscInt       N,i,*garray = aij->garray;
2427   PetscInt       ib,jb,bs = A->rmap->bs;
2428   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) aij->A->data;
2429   MatScalar      *a_val = a_aij->a;
2430   Mat_SeqBAIJ    *b_aij = (Mat_SeqBAIJ*) aij->B->data;
2431   MatScalar      *b_val = b_aij->a;
2432   PetscReal      *work;
2433 
2434   PetscFunctionBegin;
2435   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
2436   ierr = PetscCalloc1(N,&work);CHKERRQ(ierr);
2437   if (type == NORM_2) {
2438     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2439       for (jb=0; jb<bs; jb++) {
2440         for (ib=0; ib<bs; ib++) {
2441           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2442           a_val++;
2443         }
2444       }
2445     }
2446     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2447       for (jb=0; jb<bs; jb++) {
2448         for (ib=0; ib<bs; ib++) {
2449           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2450           b_val++;
2451         }
2452       }
2453     }
2454   } else if (type == NORM_1) {
2455     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2456       for (jb=0; jb<bs; jb++) {
2457         for (ib=0; ib<bs; ib++) {
2458           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2459           a_val++;
2460         }
2461       }
2462     }
2463     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2464       for (jb=0; jb<bs; jb++) {
2465        for (ib=0; ib<bs; ib++) {
2466           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2467           b_val++;
2468         }
2469       }
2470     }
2471   } else if (type == NORM_INFINITY) {
2472     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2473       for (jb=0; jb<bs; jb++) {
2474         for (ib=0; ib<bs; ib++) {
2475           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2476           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2477           a_val++;
2478         }
2479       }
2480     }
2481     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2482       for (jb=0; jb<bs; jb++) {
2483         for (ib=0; ib<bs; ib++) {
2484           int col = garray[b_aij->j[i]] * bs + jb;
2485           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2486           b_val++;
2487         }
2488       }
2489     }
2490   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
2491   if (type == NORM_INFINITY) {
2492     ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2493   } else {
2494     ierr = MPIU_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2495   }
2496   ierr = PetscFree(work);CHKERRQ(ierr);
2497   if (type == NORM_2) {
2498     for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]);
2499   }
2500   PetscFunctionReturn(0);
2501 }
2502 
2503 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2504 {
2505   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2506   PetscErrorCode ierr;
2507 
2508   PetscFunctionBegin;
2509   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2510   A->factorerrortype             = a->A->factorerrortype;
2511   A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value;
2512   A->factorerror_zeropivot_row   = a->A->factorerror_zeropivot_row;
2513   PetscFunctionReturn(0);
2514 }
2515 
2516 PetscErrorCode MatShift_MPIBAIJ(Mat Y,PetscScalar a)
2517 {
2518   PetscErrorCode ierr;
2519   Mat_MPIBAIJ    *maij = (Mat_MPIBAIJ*)Y->data;
2520   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)maij->A->data;
2521 
2522   PetscFunctionBegin;
2523   if (!Y->preallocated) {
2524     ierr = MatMPIBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL,0,NULL);CHKERRQ(ierr);
2525   } else if (!aij->nz) {
2526     PetscInt nonew = aij->nonew;
2527     ierr = MatSeqBAIJSetPreallocation(maij->A,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2528     aij->nonew = nonew;
2529   }
2530   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2531   PetscFunctionReturn(0);
2532 }
2533 
2534 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2535 {
2536   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2537   PetscErrorCode ierr;
2538 
2539   PetscFunctionBegin;
2540   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2541   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2542   if (d) {
2543     PetscInt rstart;
2544     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2545     *d += rstart/A->rmap->bs;
2546 
2547   }
2548   PetscFunctionReturn(0);
2549 }
2550 
2551 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2552 {
2553   PetscFunctionBegin;
2554   *a = ((Mat_MPIBAIJ*)A->data)->A;
2555   PetscFunctionReturn(0);
2556 }
2557 
2558 /* -------------------------------------------------------------------*/
2559 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2560                                        MatGetRow_MPIBAIJ,
2561                                        MatRestoreRow_MPIBAIJ,
2562                                        MatMult_MPIBAIJ,
2563                                 /* 4*/ MatMultAdd_MPIBAIJ,
2564                                        MatMultTranspose_MPIBAIJ,
2565                                        MatMultTransposeAdd_MPIBAIJ,
2566                                        0,
2567                                        0,
2568                                        0,
2569                                 /*10*/ 0,
2570                                        0,
2571                                        0,
2572                                        MatSOR_MPIBAIJ,
2573                                        MatTranspose_MPIBAIJ,
2574                                 /*15*/ MatGetInfo_MPIBAIJ,
2575                                        MatEqual_MPIBAIJ,
2576                                        MatGetDiagonal_MPIBAIJ,
2577                                        MatDiagonalScale_MPIBAIJ,
2578                                        MatNorm_MPIBAIJ,
2579                                 /*20*/ MatAssemblyBegin_MPIBAIJ,
2580                                        MatAssemblyEnd_MPIBAIJ,
2581                                        MatSetOption_MPIBAIJ,
2582                                        MatZeroEntries_MPIBAIJ,
2583                                 /*24*/ MatZeroRows_MPIBAIJ,
2584                                        0,
2585                                        0,
2586                                        0,
2587                                        0,
2588                                 /*29*/ MatSetUp_MPIBAIJ,
2589                                        0,
2590                                        0,
2591                                        MatGetDiagonalBlock_MPIBAIJ,
2592                                        0,
2593                                 /*34*/ MatDuplicate_MPIBAIJ,
2594                                        0,
2595                                        0,
2596                                        0,
2597                                        0,
2598                                 /*39*/ MatAXPY_MPIBAIJ,
2599                                        MatCreateSubMatrices_MPIBAIJ,
2600                                        MatIncreaseOverlap_MPIBAIJ,
2601                                        MatGetValues_MPIBAIJ,
2602                                        MatCopy_MPIBAIJ,
2603                                 /*44*/ 0,
2604                                        MatScale_MPIBAIJ,
2605                                        MatShift_MPIBAIJ,
2606                                        0,
2607                                        MatZeroRowsColumns_MPIBAIJ,
2608                                 /*49*/ 0,
2609                                        0,
2610                                        0,
2611                                        0,
2612                                        0,
2613                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2614                                        0,
2615                                        MatSetUnfactored_MPIBAIJ,
2616                                        MatPermute_MPIBAIJ,
2617                                        MatSetValuesBlocked_MPIBAIJ,
2618                                 /*59*/ MatCreateSubMatrix_MPIBAIJ,
2619                                        MatDestroy_MPIBAIJ,
2620                                        MatView_MPIBAIJ,
2621                                        0,
2622                                        0,
2623                                 /*64*/ 0,
2624                                        0,
2625                                        0,
2626                                        0,
2627                                        0,
2628                                 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2629                                        0,
2630                                        0,
2631                                        0,
2632                                        0,
2633                                 /*74*/ 0,
2634                                        MatFDColoringApply_BAIJ,
2635                                        0,
2636                                        0,
2637                                        0,
2638                                 /*79*/ 0,
2639                                        0,
2640                                        0,
2641                                        0,
2642                                        MatLoad_MPIBAIJ,
2643                                 /*84*/ 0,
2644                                        0,
2645                                        0,
2646                                        0,
2647                                        0,
2648                                 /*89*/ 0,
2649                                        0,
2650                                        0,
2651                                        0,
2652                                        0,
2653                                 /*94*/ 0,
2654                                        0,
2655                                        0,
2656                                        0,
2657                                        0,
2658                                 /*99*/ 0,
2659                                        0,
2660                                        0,
2661                                        0,
2662                                        0,
2663                                 /*104*/0,
2664                                        MatRealPart_MPIBAIJ,
2665                                        MatImaginaryPart_MPIBAIJ,
2666                                        0,
2667                                        0,
2668                                 /*109*/0,
2669                                        0,
2670                                        0,
2671                                        0,
2672                                        MatMissingDiagonal_MPIBAIJ,
2673                                 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2674                                        0,
2675                                        MatGetGhosts_MPIBAIJ,
2676                                        0,
2677                                        0,
2678                                 /*119*/0,
2679                                        0,
2680                                        0,
2681                                        0,
2682                                        MatGetMultiProcBlock_MPIBAIJ,
2683                                 /*124*/0,
2684                                        MatGetColumnNorms_MPIBAIJ,
2685                                        MatInvertBlockDiagonal_MPIBAIJ,
2686                                        0,
2687                                        0,
2688                                /*129*/ 0,
2689                                        0,
2690                                        0,
2691                                        0,
2692                                        0,
2693                                /*134*/ 0,
2694                                        0,
2695                                        0,
2696                                        0,
2697                                        0,
2698                                /*139*/ MatSetBlockSizes_Default,
2699                                        0,
2700                                        0,
2701                                        MatFDColoringSetUp_MPIXAIJ,
2702                                        0,
2703                                 /*144*/MatCreateMPIMatConcatenateSeqMat_MPIBAIJ
2704 };
2705 
2706 
2707 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
2708 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
2709 
2710 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2711 {
2712   PetscInt       m,rstart,cstart,cend;
2713   PetscInt       i,j,dlen,olen,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2714   const PetscInt *JJ    =0;
2715   PetscScalar    *values=0;
2716   PetscBool      roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2717   PetscErrorCode ierr;
2718 
2719   PetscFunctionBegin;
2720   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2721   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2722   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2723   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2724   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2725   m      = B->rmap->n/bs;
2726   rstart = B->rmap->rstart/bs;
2727   cstart = B->cmap->rstart/bs;
2728   cend   = B->cmap->rend/bs;
2729 
2730   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2731   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2732   for (i=0; i<m; i++) {
2733     nz = ii[i+1] - ii[i];
2734     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2735     nz_max = PetscMax(nz_max,nz);
2736     dlen   = 0;
2737     olen   = 0;
2738     JJ     = jj + ii[i];
2739     for (j=0; j<nz; j++) {
2740       if (*JJ < cstart || *JJ >= cend) olen++;
2741       else dlen++;
2742       JJ++;
2743     }
2744     d_nnz[i] = dlen;
2745     o_nnz[i] = olen;
2746   }
2747   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2748   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2749 
2750   values = (PetscScalar*)V;
2751   if (!values) {
2752     ierr = PetscCalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2753   }
2754   for (i=0; i<m; i++) {
2755     PetscInt          row    = i + rstart;
2756     PetscInt          ncols  = ii[i+1] - ii[i];
2757     const PetscInt    *icols = jj + ii[i];
2758     if (!roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2759       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2760       ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2761     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2762       PetscInt j;
2763       for (j=0; j<ncols; j++) {
2764         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2765         ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2766       }
2767     }
2768   }
2769 
2770   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2771   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2772   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2773   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2774   PetscFunctionReturn(0);
2775 }
2776 
2777 /*@C
2778    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2779    (the default parallel PETSc format).
2780 
2781    Collective
2782 
2783    Input Parameters:
2784 +  B - the matrix
2785 .  bs - the block size
2786 .  i - the indices into j for the start of each local row (starts with zero)
2787 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2788 -  v - optional values in the matrix
2789 
2790    Level: developer
2791 
2792    Notes:
2793     The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
2794    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
2795    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2796    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2797    block column and the second index is over columns within a block.
2798 
2799 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ
2800 @*/
2801 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2802 {
2803   PetscErrorCode ierr;
2804 
2805   PetscFunctionBegin;
2806   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2807   PetscValidType(B,1);
2808   PetscValidLogicalCollectiveInt(B,bs,2);
2809   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2810   PetscFunctionReturn(0);
2811 }
2812 
2813 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2814 {
2815   Mat_MPIBAIJ    *b;
2816   PetscErrorCode ierr;
2817   PetscInt       i;
2818   PetscMPIInt    size;
2819 
2820   PetscFunctionBegin;
2821   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2822   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2823   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2824   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2825 
2826   if (d_nnz) {
2827     for (i=0; i<B->rmap->n/bs; i++) {
2828       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2829     }
2830   }
2831   if (o_nnz) {
2832     for (i=0; i<B->rmap->n/bs; i++) {
2833       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2834     }
2835   }
2836 
2837   b      = (Mat_MPIBAIJ*)B->data;
2838   b->bs2 = bs*bs;
2839   b->mbs = B->rmap->n/bs;
2840   b->nbs = B->cmap->n/bs;
2841   b->Mbs = B->rmap->N/bs;
2842   b->Nbs = B->cmap->N/bs;
2843 
2844   for (i=0; i<=b->size; i++) {
2845     b->rangebs[i] = B->rmap->range[i]/bs;
2846   }
2847   b->rstartbs = B->rmap->rstart/bs;
2848   b->rendbs   = B->rmap->rend/bs;
2849   b->cstartbs = B->cmap->rstart/bs;
2850   b->cendbs   = B->cmap->rend/bs;
2851 
2852 #if defined(PETSC_USE_CTABLE)
2853   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2854 #else
2855   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2856 #endif
2857   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2858   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2859   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2860 
2861   /* Because the B will have been resized we simply destroy it and create a new one each time */
2862   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2863   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2864   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2865   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2866   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2867   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2868 
2869   if (!B->preallocated) {
2870     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2871     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2872     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2873     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2874     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2875   }
2876 
2877   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2878   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2879   B->preallocated  = PETSC_TRUE;
2880   B->was_assembled = PETSC_FALSE;
2881   B->assembled     = PETSC_FALSE;
2882   PetscFunctionReturn(0);
2883 }
2884 
2885 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2886 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2887 
2888 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
2889 {
2890   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2891   PetscErrorCode ierr;
2892   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2893   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2894   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2895 
2896   PetscFunctionBegin;
2897   ierr  = PetscMalloc1(M+1,&ii);CHKERRQ(ierr);
2898   ii[0] = 0;
2899   for (i=0; i<M; i++) {
2900     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2901     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
2902     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2903     /* remove one from count of matrix has diagonal */
2904     for (j=id[i]; j<id[i+1]; j++) {
2905       if (jd[j] == i) {ii[i+1]--;break;}
2906     }
2907   }
2908   ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr);
2909   cnt  = 0;
2910   for (i=0; i<M; i++) {
2911     for (j=io[i]; j<io[i+1]; j++) {
2912       if (garray[jo[j]] > rstart) break;
2913       jj[cnt++] = garray[jo[j]];
2914     }
2915     for (k=id[i]; k<id[i+1]; k++) {
2916       if (jd[k] != i) {
2917         jj[cnt++] = rstart + jd[k];
2918       }
2919     }
2920     for (; j<io[i+1]; j++) {
2921       jj[cnt++] = garray[jo[j]];
2922     }
2923   }
2924   ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr);
2925   PetscFunctionReturn(0);
2926 }
2927 
2928 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2929 
2930 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
2931 
2932 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
2933 {
2934   PetscErrorCode ierr;
2935   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2936   Mat            B;
2937   Mat_MPIAIJ     *b;
2938 
2939   PetscFunctionBegin;
2940   if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
2941 
2942   if (reuse == MAT_REUSE_MATRIX) {
2943     B = *newmat;
2944   } else {
2945     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2946     ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2947     ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2948     ierr = MatSetBlockSizes(B,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
2949     ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
2950     ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
2951   }
2952   b = (Mat_MPIAIJ*) B->data;
2953 
2954   if (reuse == MAT_REUSE_MATRIX) {
2955     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A);CHKERRQ(ierr);
2956     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B);CHKERRQ(ierr);
2957   } else {
2958     ierr = MatDestroy(&b->A);CHKERRQ(ierr);
2959     ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2960     ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
2961     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
2962     ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
2963     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2964     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2965   }
2966   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2967   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2968 
2969   if (reuse == MAT_INPLACE_MATRIX) {
2970     ierr = MatHeaderReplace(A,&B);CHKERRQ(ierr);
2971   } else {
2972    *newmat = B;
2973   }
2974   PetscFunctionReturn(0);
2975 }
2976 
2977 /*MC
2978    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2979 
2980    Options Database Keys:
2981 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2982 . -mat_block_size <bs> - set the blocksize used to store the matrix
2983 - -mat_use_hash_table <fact>
2984 
2985   Level: beginner
2986 
2987 .seealso: MatCreateMPIBAIJ
2988 M*/
2989 
2990 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
2991 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
2992 
2993 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2994 {
2995   Mat_MPIBAIJ    *b;
2996   PetscErrorCode ierr;
2997   PetscBool      flg = PETSC_FALSE;
2998 
2999   PetscFunctionBegin;
3000   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3001   B->data = (void*)b;
3002 
3003   ierr         = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3004   B->assembled = PETSC_FALSE;
3005 
3006   B->insertmode = NOT_SET_VALUES;
3007   ierr          = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
3008   ierr          = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
3009 
3010   /* build local table of row and column ownerships */
3011   ierr = PetscMalloc1(b->size+1,&b->rangebs);CHKERRQ(ierr);
3012 
3013   /* build cache for off array entries formed */
3014   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
3015 
3016   b->donotstash  = PETSC_FALSE;
3017   b->colmap      = NULL;
3018   b->garray      = NULL;
3019   b->roworiented = PETSC_TRUE;
3020 
3021   /* stuff used in block assembly */
3022   b->barray = 0;
3023 
3024   /* stuff used for matrix vector multiply */
3025   b->lvec  = 0;
3026   b->Mvctx = 0;
3027 
3028   /* stuff for MatGetRow() */
3029   b->rowindices   = 0;
3030   b->rowvalues    = 0;
3031   b->getrowactive = PETSC_FALSE;
3032 
3033   /* hash table stuff */
3034   b->ht           = 0;
3035   b->hd           = 0;
3036   b->ht_size      = 0;
3037   b->ht_flag      = PETSC_FALSE;
3038   b->ht_fact      = 0;
3039   b->ht_total_ct  = 0;
3040   b->ht_insert_ct = 0;
3041 
3042   /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
3043   b->ijonly = PETSC_FALSE;
3044 
3045 
3046   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3047   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3048   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3049 #if defined(PETSC_HAVE_HYPRE)
3050   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
3051 #endif
3052   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3053   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3054   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3055   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3056   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3057   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3058   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_mpibaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
3059   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
3060   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3061 
3062   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3063   ierr = PetscOptionsName("-mat_use_hash_table","Use hash table to save time in constructing matrix","MatSetOption",&flg);CHKERRQ(ierr);
3064   if (flg) {
3065     PetscReal fact = 1.39;
3066     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3067     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
3068     if (fact <= 1.0) fact = 1.39;
3069     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3070     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3071   }
3072   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3073   PetscFunctionReturn(0);
3074 }
3075 
3076 /*MC
3077    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3078 
3079    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3080    and MATMPIBAIJ otherwise.
3081 
3082    Options Database Keys:
3083 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3084 
3085   Level: beginner
3086 
3087 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3088 M*/
3089 
3090 /*@C
3091    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3092    (block compressed row).  For good matrix assembly performance
3093    the user should preallocate the matrix storage by setting the parameters
3094    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3095    performance can be increased by more than a factor of 50.
3096 
3097    Collective on Mat
3098 
3099    Input Parameters:
3100 +  B - the matrix
3101 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3102           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3103 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3104            submatrix  (same for all local rows)
3105 .  d_nnz - array containing the number of block nonzeros in the various block rows
3106            of the in diagonal portion of the local (possibly different for each block
3107            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3108            set it even if it is zero.
3109 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3110            submatrix (same for all local rows).
3111 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3112            off-diagonal portion of the local submatrix (possibly different for
3113            each block row) or NULL.
3114 
3115    If the *_nnz parameter is given then the *_nz parameter is ignored
3116 
3117    Options Database Keys:
3118 +   -mat_block_size - size of the blocks to use
3119 -   -mat_use_hash_table <fact>
3120 
3121    Notes:
3122    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3123    than it must be used on all processors that share the object for that argument.
3124 
3125    Storage Information:
3126    For a square global matrix we define each processor's diagonal portion
3127    to be its local rows and the corresponding columns (a square submatrix);
3128    each processor's off-diagonal portion encompasses the remainder of the
3129    local matrix (a rectangular submatrix).
3130 
3131    The user can specify preallocated storage for the diagonal part of
3132    the local submatrix with either d_nz or d_nnz (not both).  Set
3133    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3134    memory allocation.  Likewise, specify preallocated storage for the
3135    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3136 
3137    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3138    the figure below we depict these three local rows and all columns (0-11).
3139 
3140 .vb
3141            0 1 2 3 4 5 6 7 8 9 10 11
3142           --------------------------
3143    row 3  |o o o d d d o o o o  o  o
3144    row 4  |o o o d d d o o o o  o  o
3145    row 5  |o o o d d d o o o o  o  o
3146           --------------------------
3147 .ve
3148 
3149    Thus, any entries in the d locations are stored in the d (diagonal)
3150    submatrix, and any entries in the o locations are stored in the
3151    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3152    stored simply in the MATSEQBAIJ format for compressed row storage.
3153 
3154    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3155    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3156    In general, for PDE problems in which most nonzeros are near the diagonal,
3157    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3158    or you will get TERRIBLE performance; see the users' manual chapter on
3159    matrices.
3160 
3161    You can call MatGetInfo() to get information on how effective the preallocation was;
3162    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3163    You can also run with the option -info and look for messages with the string
3164    malloc in them to see if additional memory allocation was needed.
3165 
3166    Level: intermediate
3167 
3168 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3169 @*/
3170 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3171 {
3172   PetscErrorCode ierr;
3173 
3174   PetscFunctionBegin;
3175   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3176   PetscValidType(B,1);
3177   PetscValidLogicalCollectiveInt(B,bs,2);
3178   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3179   PetscFunctionReturn(0);
3180 }
3181 
3182 /*@C
3183    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3184    (block compressed row).  For good matrix assembly performance
3185    the user should preallocate the matrix storage by setting the parameters
3186    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3187    performance can be increased by more than a factor of 50.
3188 
3189    Collective
3190 
3191    Input Parameters:
3192 +  comm - MPI communicator
3193 .  bs   - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3194           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3195 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3196            This value should be the same as the local size used in creating the
3197            y vector for the matrix-vector product y = Ax.
3198 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3199            This value should be the same as the local size used in creating the
3200            x vector for the matrix-vector product y = Ax.
3201 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3202 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3203 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3204            submatrix  (same for all local rows)
3205 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3206            of the in diagonal portion of the local (possibly different for each block
3207            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3208            and set it even if it is zero.
3209 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3210            submatrix (same for all local rows).
3211 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3212            off-diagonal portion of the local submatrix (possibly different for
3213            each block row) or NULL.
3214 
3215    Output Parameter:
3216 .  A - the matrix
3217 
3218    Options Database Keys:
3219 +   -mat_block_size - size of the blocks to use
3220 -   -mat_use_hash_table <fact>
3221 
3222    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3223    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3224    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3225 
3226    Notes:
3227    If the *_nnz parameter is given then the *_nz parameter is ignored
3228 
3229    A nonzero block is any block that as 1 or more nonzeros in it
3230 
3231    The user MUST specify either the local or global matrix dimensions
3232    (possibly both).
3233 
3234    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3235    than it must be used on all processors that share the object for that argument.
3236 
3237    Storage Information:
3238    For a square global matrix we define each processor's diagonal portion
3239    to be its local rows and the corresponding columns (a square submatrix);
3240    each processor's off-diagonal portion encompasses the remainder of the
3241    local matrix (a rectangular submatrix).
3242 
3243    The user can specify preallocated storage for the diagonal part of
3244    the local submatrix with either d_nz or d_nnz (not both).  Set
3245    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3246    memory allocation.  Likewise, specify preallocated storage for the
3247    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3248 
3249    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3250    the figure below we depict these three local rows and all columns (0-11).
3251 
3252 .vb
3253            0 1 2 3 4 5 6 7 8 9 10 11
3254           --------------------------
3255    row 3  |o o o d d d o o o o  o  o
3256    row 4  |o o o d d d o o o o  o  o
3257    row 5  |o o o d d d o o o o  o  o
3258           --------------------------
3259 .ve
3260 
3261    Thus, any entries in the d locations are stored in the d (diagonal)
3262    submatrix, and any entries in the o locations are stored in the
3263    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3264    stored simply in the MATSEQBAIJ format for compressed row storage.
3265 
3266    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3267    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3268    In general, for PDE problems in which most nonzeros are near the diagonal,
3269    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3270    or you will get TERRIBLE performance; see the users' manual chapter on
3271    matrices.
3272 
3273    Level: intermediate
3274 
3275 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3276 @*/
3277 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3278 {
3279   PetscErrorCode ierr;
3280   PetscMPIInt    size;
3281 
3282   PetscFunctionBegin;
3283   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3284   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3285   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3286   if (size > 1) {
3287     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3288     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3289   } else {
3290     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3291     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3292   }
3293   PetscFunctionReturn(0);
3294 }
3295 
3296 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3297 {
3298   Mat            mat;
3299   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3300   PetscErrorCode ierr;
3301   PetscInt       len=0;
3302 
3303   PetscFunctionBegin;
3304   *newmat = 0;
3305   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3306   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3307   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3308 
3309   mat->factortype   = matin->factortype;
3310   mat->preallocated = PETSC_TRUE;
3311   mat->assembled    = PETSC_TRUE;
3312   mat->insertmode   = NOT_SET_VALUES;
3313 
3314   a             = (Mat_MPIBAIJ*)mat->data;
3315   mat->rmap->bs = matin->rmap->bs;
3316   a->bs2        = oldmat->bs2;
3317   a->mbs        = oldmat->mbs;
3318   a->nbs        = oldmat->nbs;
3319   a->Mbs        = oldmat->Mbs;
3320   a->Nbs        = oldmat->Nbs;
3321 
3322   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3323   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3324 
3325   a->size         = oldmat->size;
3326   a->rank         = oldmat->rank;
3327   a->donotstash   = oldmat->donotstash;
3328   a->roworiented  = oldmat->roworiented;
3329   a->rowindices   = 0;
3330   a->rowvalues    = 0;
3331   a->getrowactive = PETSC_FALSE;
3332   a->barray       = 0;
3333   a->rstartbs     = oldmat->rstartbs;
3334   a->rendbs       = oldmat->rendbs;
3335   a->cstartbs     = oldmat->cstartbs;
3336   a->cendbs       = oldmat->cendbs;
3337 
3338   /* hash table stuff */
3339   a->ht           = 0;
3340   a->hd           = 0;
3341   a->ht_size      = 0;
3342   a->ht_flag      = oldmat->ht_flag;
3343   a->ht_fact      = oldmat->ht_fact;
3344   a->ht_total_ct  = 0;
3345   a->ht_insert_ct = 0;
3346 
3347   ierr = PetscArraycpy(a->rangebs,oldmat->rangebs,a->size+1);CHKERRQ(ierr);
3348   if (oldmat->colmap) {
3349 #if defined(PETSC_USE_CTABLE)
3350     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3351 #else
3352     ierr = PetscMalloc1(a->Nbs,&a->colmap);CHKERRQ(ierr);
3353     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3354     ierr = PetscArraycpy(a->colmap,oldmat->colmap,a->Nbs);CHKERRQ(ierr);
3355 #endif
3356   } else a->colmap = 0;
3357 
3358   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3359     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
3360     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3361     ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr);
3362   } else a->garray = 0;
3363 
3364   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3365   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3366   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3367   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3368   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3369 
3370   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3371   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3372   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3373   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3374   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3375   *newmat = mat;
3376   PetscFunctionReturn(0);
3377 }
3378 
3379 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3380 {
3381   PetscErrorCode ierr;
3382   int            fd;
3383   PetscInt       i,nz,j,rstart,rend;
3384   PetscScalar    *vals,*buf;
3385   MPI_Comm       comm;
3386   MPI_Status     status;
3387   PetscMPIInt    rank,size,maxnz;
3388   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3389   PetscInt       *locrowlens = NULL,*procsnz = NULL,*browners = NULL;
3390   PetscInt       jj,*mycols,*ibuf,bs = newmat->rmap->bs,Mbs,mbs,extra_rows,mmax;
3391   PetscMPIInt    tag    = ((PetscObject)viewer)->tag;
3392   PetscInt       *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount;
3393   PetscInt       dcount,kmax,k,nzcount,tmp,mend;
3394   PetscBool      isbinary;
3395 
3396   PetscFunctionBegin;
3397   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3398   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name);
3399 
3400   /* force binary viewer to load .info file if it has not yet done so */
3401   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3402   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3403   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3404   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3405   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3406   if (bs < 0) bs = 1;
3407 
3408   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3409   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3410   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3411   if (!rank) {
3412     ierr = PetscBinaryRead(fd,(char*)header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3413     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3414     if (header[3] < 0) SETERRQ(PetscObjectComm((PetscObject)newmat),PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
3415   }
3416   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3417   M    = header[1]; N = header[2];
3418 
3419   /* If global sizes are set, check if they are consistent with that given in the file */
3420   if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M);
3421   if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N);
3422 
3423   if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices");
3424 
3425   /*
3426      This code adds extra rows to make sure the number of rows is
3427      divisible by the blocksize
3428   */
3429   Mbs        = M/bs;
3430   extra_rows = bs - M + bs*Mbs;
3431   if (extra_rows == bs) extra_rows = 0;
3432   else                  Mbs++;
3433   if (extra_rows && !rank) {
3434     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3435   }
3436 
3437   /* determine ownership of all rows */
3438   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3439     mbs = Mbs/size + ((Mbs % size) > rank);
3440     m   = mbs*bs;
3441   } else { /* User set */
3442     m   = newmat->rmap->n;
3443     mbs = m/bs;
3444   }
3445   ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
3446   ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3447 
3448   /* process 0 needs enough room for process with most rows */
3449   if (!rank) {
3450     mmax = rowners[1];
3451     for (i=2; i<=size; i++) {
3452       mmax = PetscMax(mmax,rowners[i]);
3453     }
3454     mmax*=bs;
3455   } else mmax = -1;             /* unused, but compiler warns anyway */
3456 
3457   rowners[0] = 0;
3458   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
3459   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
3460   rstart = rowners[rank];
3461   rend   = rowners[rank+1];
3462 
3463   /* distribute row lengths to all processors */
3464   ierr = PetscMalloc1(m,&locrowlens);CHKERRQ(ierr);
3465   if (!rank) {
3466     mend = m;
3467     if (size == 1) mend = mend - extra_rows;
3468     ierr = PetscBinaryRead(fd,locrowlens,mend,NULL,PETSC_INT);CHKERRQ(ierr);
3469     for (j=mend; j<m; j++) locrowlens[j] = 1;
3470     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3471     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3472     for (j=0; j<m; j++) {
3473       procsnz[0] += locrowlens[j];
3474     }
3475     for (i=1; i<size; i++) {
3476       mend = browners[i+1] - browners[i];
3477       if (i == size-1) mend = mend - extra_rows;
3478       ierr = PetscBinaryRead(fd,rowlengths,mend,NULL,PETSC_INT);CHKERRQ(ierr);
3479       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3480       /* calculate the number of nonzeros on each processor */
3481       for (j=0; j<browners[i+1]-browners[i]; j++) {
3482         procsnz[i] += rowlengths[j];
3483       }
3484       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3485     }
3486     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3487   } else {
3488     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3489   }
3490 
3491   if (!rank) {
3492     /* determine max buffer needed and allocate it */
3493     maxnz = procsnz[0];
3494     for (i=1; i<size; i++) {
3495       maxnz = PetscMax(maxnz,procsnz[i]);
3496     }
3497     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3498 
3499     /* read in my part of the matrix column indices  */
3500     nz     = procsnz[0];
3501     ierr   = PetscMalloc1(nz+1,&ibuf);CHKERRQ(ierr);
3502     mycols = ibuf;
3503     if (size == 1) nz -= extra_rows;
3504     ierr = PetscBinaryRead(fd,mycols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3505     if (size == 1) {
3506       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
3507     }
3508 
3509     /* read in every ones (except the last) and ship off */
3510     for (i=1; i<size-1; i++) {
3511       nz   = procsnz[i];
3512       ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3513       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3514     }
3515     /* read in the stuff for the last proc */
3516     if (size != 1) {
3517       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3518       ierr = PetscBinaryRead(fd,cols,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3519       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3520       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3521     }
3522     ierr = PetscFree(cols);CHKERRQ(ierr);
3523   } else {
3524     /* determine buffer space needed for message */
3525     nz = 0;
3526     for (i=0; i<m; i++) {
3527       nz += locrowlens[i];
3528     }
3529     ierr   = PetscMalloc1(nz+1,&ibuf);CHKERRQ(ierr);
3530     mycols = ibuf;
3531     /* receive message of column indices*/
3532     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3533     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3534     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3535   }
3536 
3537   /* loop over local rows, determining number of off diagonal entries */
3538   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
3539   ierr     = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
3540   rowcount = 0; nzcount = 0;
3541   for (i=0; i<mbs; i++) {
3542     dcount  = 0;
3543     odcount = 0;
3544     for (j=0; j<bs; j++) {
3545       kmax = locrowlens[rowcount];
3546       for (k=0; k<kmax; k++) {
3547         tmp = mycols[nzcount++]/bs;
3548         if (!mask[tmp]) {
3549           mask[tmp] = 1;
3550           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3551           else masked1[dcount++] = tmp;
3552         }
3553       }
3554       rowcount++;
3555     }
3556 
3557     dlens[i]  = dcount;
3558     odlens[i] = odcount;
3559 
3560     /* zero out the mask elements we set */
3561     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3562     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3563   }
3564 
3565   ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3566   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3567 
3568   if (!rank) {
3569     ierr = PetscMalloc1(maxnz+1,&buf);CHKERRQ(ierr);
3570     /* read in my part of the matrix numerical values  */
3571     nz     = procsnz[0];
3572     vals   = buf;
3573     mycols = ibuf;
3574     if (size == 1) nz -= extra_rows;
3575     ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3576     if (size == 1) {
3577       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
3578     }
3579 
3580     /* insert into matrix */
3581     jj = rstart*bs;
3582     for (i=0; i<m; i++) {
3583       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3584       mycols += locrowlens[i];
3585       vals   += locrowlens[i];
3586       jj++;
3587     }
3588     /* read in other processors (except the last one) and ship out */
3589     for (i=1; i<size-1; i++) {
3590       nz   = procsnz[i];
3591       vals = buf;
3592       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3593       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3594     }
3595     /* the last proc */
3596     if (size != 1) {
3597       nz   = procsnz[i] - extra_rows;
3598       vals = buf;
3599       ierr = PetscBinaryRead(fd,vals,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3600       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3601       ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3602     }
3603     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3604   } else {
3605     /* receive numeric values */
3606     ierr = PetscMalloc1(nz+1,&buf);CHKERRQ(ierr);
3607 
3608     /* receive message of values*/
3609     vals   = buf;
3610     mycols = ibuf;
3611     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3612 
3613     /* insert into matrix */
3614     jj = rstart*bs;
3615     for (i=0; i<m; i++) {
3616       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3617       mycols += locrowlens[i];
3618       vals   += locrowlens[i];
3619       jj++;
3620     }
3621   }
3622   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3623   ierr = PetscFree(buf);CHKERRQ(ierr);
3624   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3625   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3626   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3627   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3628   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3629   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3630   PetscFunctionReturn(0);
3631 }
3632 
3633 /*@
3634    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3635 
3636    Input Parameters:
3637 .  mat  - the matrix
3638 .  fact - factor
3639 
3640    Not Collective, each process can use a different factor
3641 
3642    Level: advanced
3643 
3644   Notes:
3645    This can also be set by the command line option: -mat_use_hash_table <fact>
3646 
3647 .seealso: MatSetOption()
3648 @*/
3649 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3650 {
3651   PetscErrorCode ierr;
3652 
3653   PetscFunctionBegin;
3654   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3655   PetscFunctionReturn(0);
3656 }
3657 
3658 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3659 {
3660   Mat_MPIBAIJ *baij;
3661 
3662   PetscFunctionBegin;
3663   baij          = (Mat_MPIBAIJ*)mat->data;
3664   baij->ht_fact = fact;
3665   PetscFunctionReturn(0);
3666 }
3667 
3668 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3669 {
3670   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3671   PetscBool      flg;
3672   PetscErrorCode ierr;
3673 
3674   PetscFunctionBegin;
3675   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIBAIJ,&flg);CHKERRQ(ierr);
3676   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIBAIJ matrix as input");
3677   if (Ad)     *Ad     = a->A;
3678   if (Ao)     *Ao     = a->B;
3679   if (colmap) *colmap = a->garray;
3680   PetscFunctionReturn(0);
3681 }
3682 
3683 /*
3684     Special version for direct calls from Fortran (to eliminate two function call overheads
3685 */
3686 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3687 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3688 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3689 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3690 #endif
3691 
3692 /*@C
3693   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3694 
3695   Collective on Mat
3696 
3697   Input Parameters:
3698 + mat - the matrix
3699 . min - number of input rows
3700 . im - input rows
3701 . nin - number of input columns
3702 . in - input columns
3703 . v - numerical values input
3704 - addvin - INSERT_VALUES or ADD_VALUES
3705 
3706   Notes:
3707     This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3708 
3709   Level: advanced
3710 
3711 .seealso:   MatSetValuesBlocked()
3712 @*/
3713 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3714 {
3715   /* convert input arguments to C version */
3716   Mat        mat  = *matin;
3717   PetscInt   m    = *min, n = *nin;
3718   InsertMode addv = *addvin;
3719 
3720   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3721   const MatScalar *value;
3722   MatScalar       *barray     = baij->barray;
3723   PetscBool       roworiented = baij->roworiented;
3724   PetscErrorCode  ierr;
3725   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3726   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3727   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3728 
3729   PetscFunctionBegin;
3730   /* tasks normally handled by MatSetValuesBlocked() */
3731   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3732 #if defined(PETSC_USE_DEBUG)
3733   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3734   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3735 #endif
3736   if (mat->assembled) {
3737     mat->was_assembled = PETSC_TRUE;
3738     mat->assembled     = PETSC_FALSE;
3739   }
3740   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3741 
3742 
3743   if (!barray) {
3744     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
3745     baij->barray = barray;
3746   }
3747 
3748   if (roworiented) stepval = (n-1)*bs;
3749   else stepval = (m-1)*bs;
3750 
3751   for (i=0; i<m; i++) {
3752     if (im[i] < 0) continue;
3753 #if defined(PETSC_USE_DEBUG)
3754     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3755 #endif
3756     if (im[i] >= rstart && im[i] < rend) {
3757       row = im[i] - rstart;
3758       for (j=0; j<n; j++) {
3759         /* If NumCol = 1 then a copy is not required */
3760         if ((roworiented) && (n == 1)) {
3761           barray = (MatScalar*)v + i*bs2;
3762         } else if ((!roworiented) && (m == 1)) {
3763           barray = (MatScalar*)v + j*bs2;
3764         } else { /* Here a copy is required */
3765           if (roworiented) {
3766             value = v + i*(stepval+bs)*bs + j*bs;
3767           } else {
3768             value = v + j*(stepval+bs)*bs + i*bs;
3769           }
3770           for (ii=0; ii<bs; ii++,value+=stepval) {
3771             for (jj=0; jj<bs; jj++) {
3772               *barray++ = *value++;
3773             }
3774           }
3775           barray -=bs2;
3776         }
3777 
3778         if (in[j] >= cstart && in[j] < cend) {
3779           col  = in[j] - cstart;
3780           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
3781         } else if (in[j] < 0) continue;
3782 #if defined(PETSC_USE_DEBUG)
3783         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3784 #endif
3785         else {
3786           if (mat->was_assembled) {
3787             if (!baij->colmap) {
3788               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3789             }
3790 
3791 #if defined(PETSC_USE_DEBUG)
3792 #if defined(PETSC_USE_CTABLE)
3793             { PetscInt data;
3794               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3795               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3796             }
3797 #else
3798             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3799 #endif
3800 #endif
3801 #if defined(PETSC_USE_CTABLE)
3802             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3803             col  = (col - 1)/bs;
3804 #else
3805             col = (baij->colmap[in[j]] - 1)/bs;
3806 #endif
3807             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3808               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3809               col  =  in[j];
3810             }
3811           } else col = in[j];
3812           ierr = MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B,row,col,barray,addv,im[i],in[j]);CHKERRQ(ierr);
3813         }
3814       }
3815     } else {
3816       if (!baij->donotstash) {
3817         if (roworiented) {
3818           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3819         } else {
3820           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3821         }
3822       }
3823     }
3824   }
3825 
3826   /* task normally handled by MatSetValuesBlocked() */
3827   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3828   PetscFunctionReturn(0);
3829 }
3830 
3831 /*@
3832      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard block
3833          CSR format the local rows.
3834 
3835    Collective
3836 
3837    Input Parameters:
3838 +  comm - MPI communicator
3839 .  bs - the block size, only a block size of 1 is supported
3840 .  m - number of local rows (Cannot be PETSC_DECIDE)
3841 .  n - This value should be the same as the local size used in creating the
3842        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3843        calculated if N is given) For square matrices n is almost always m.
3844 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3845 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3846 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix
3847 .   j - column indices
3848 -   a - matrix values
3849 
3850    Output Parameter:
3851 .   mat - the matrix
3852 
3853    Level: intermediate
3854 
3855    Notes:
3856        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3857      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3858      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3859 
3860      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3861      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3862      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3863      with column-major ordering within blocks.
3864 
3865        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3866 
3867 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3868           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3869 @*/
3870 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3871 {
3872   PetscErrorCode ierr;
3873 
3874   PetscFunctionBegin;
3875   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3876   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3877   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3878   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3879   ierr = MatSetType(*mat,MATMPIBAIJ);CHKERRQ(ierr);
3880   ierr = MatSetBlockSize(*mat,bs);CHKERRQ(ierr);
3881   ierr = MatSetUp(*mat);CHKERRQ(ierr);
3882   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3883   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3884   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3885   PetscFunctionReturn(0);
3886 }
3887 
3888 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3889 {
3890   PetscErrorCode ierr;
3891   PetscInt       m,N,i,rstart,nnz,Ii,bs,cbs;
3892   PetscInt       *indx;
3893   PetscScalar    *values;
3894 
3895   PetscFunctionBegin;
3896   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
3897   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
3898     Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inmat->data;
3899     PetscInt       *dnz,*onz,mbs,Nbs,nbs;
3900     PetscInt       *bindx,rmax=a->rmax,j;
3901     PetscMPIInt    rank,size;
3902 
3903     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3904     mbs = m/bs; Nbs = N/cbs;
3905     if (n == PETSC_DECIDE) {
3906       nbs  = n;
3907       ierr = PetscSplitOwnership(comm,&nbs,&Nbs);CHKERRQ(ierr);
3908       n    = nbs*cbs;
3909     } else {
3910       nbs = n/cbs;
3911     }
3912 
3913     ierr = PetscMalloc1(rmax,&bindx);CHKERRQ(ierr);
3914     ierr = MatPreallocateInitialize(comm,mbs,nbs,dnz,onz);CHKERRQ(ierr); /* inline function, output __end and __rstart are used below */
3915 
3916     ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3917     ierr = MPI_Comm_rank(comm,&size);CHKERRQ(ierr);
3918     if (rank == size-1) {
3919       /* Check sum(nbs) = Nbs */
3920       if (__end != Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local block columns %D != global block columns %D",__end,Nbs);
3921     }
3922 
3923     rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateInitialize */
3924     for (i=0; i<mbs; i++) {
3925       ierr = MatGetRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr); /* non-blocked nnz and indx */
3926       nnz = nnz/bs;
3927       for (j=0; j<nnz; j++) bindx[j] = indx[j*bs]/bs;
3928       ierr = MatPreallocateSet(i+rstart,nnz,bindx,dnz,onz);CHKERRQ(ierr);
3929       ierr = MatRestoreRow_SeqBAIJ(inmat,i*bs,&nnz,&indx,NULL);CHKERRQ(ierr);
3930     }
3931     ierr = PetscFree(bindx);CHKERRQ(ierr);
3932 
3933     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
3934     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
3935     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
3936     ierr = MatSetType(*outmat,MATBAIJ);CHKERRQ(ierr);
3937     ierr = MatSeqBAIJSetPreallocation(*outmat,bs,0,dnz);CHKERRQ(ierr);
3938     ierr = MatMPIBAIJSetPreallocation(*outmat,bs,0,dnz,0,onz);CHKERRQ(ierr);
3939     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
3940   }
3941 
3942   /* numeric phase */
3943   ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
3944   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
3945 
3946   for (i=0; i<m; i++) {
3947     ierr = MatGetRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3948     Ii   = i + rstart;
3949     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
3950     ierr = MatRestoreRow_SeqBAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
3951   }
3952   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3953   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3954   PetscFunctionReturn(0);
3955 }
3956