xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision a3fa217bfefef3d54995bb82b2b5ea8a3c3e9019)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 #include <petscblaslapack.h>
4 #include <petscsf.h>
5 
6 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
7 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat);
8 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
9 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
10 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
11 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
12 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec);
14 
15 #undef __FUNCT__
16 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
17 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
18 {
19   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
20   PetscErrorCode ierr;
21   PetscInt       i,*idxb = 0;
22   PetscScalar    *va,*vb;
23   Vec            vtmp;
24 
25   PetscFunctionBegin;
26   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
27   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
28   if (idx) {
29     for (i=0; i<A->rmap->n; i++) {
30       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
31     }
32   }
33 
34   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
35   if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);}
36   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
37   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
38 
39   for (i=0; i<A->rmap->n; i++) {
40     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
41       va[i] = vb[i];
42       if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);
43     }
44   }
45 
46   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
47   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
48   ierr = PetscFree(idxb);CHKERRQ(ierr);
49   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
50   PetscFunctionReturn(0);
51 }
52 
53 #undef __FUNCT__
54 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
55 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
56 {
57   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
58   PetscErrorCode ierr;
59 
60   PetscFunctionBegin;
61   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
62   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
63   PetscFunctionReturn(0);
64 }
65 
66 #undef __FUNCT__
67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
68 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
69 {
70   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
71   PetscErrorCode ierr;
72 
73   PetscFunctionBegin;
74   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
75   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
76   PetscFunctionReturn(0);
77 }
78 
79 /*
80      Local utility routine that creates a mapping from the global column
81    number to the local number in the off-diagonal part of the local
82    storage of the matrix.  This is done in a non scalable way since the
83    length of colmap equals the global matrix length.
84 */
85 #undef __FUNCT__
86 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private"
87 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
88 {
89   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
90   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
91   PetscErrorCode ierr;
92   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
93 
94   PetscFunctionBegin;
95 #if defined(PETSC_USE_CTABLE)
96   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
97   for (i=0; i<nbs; i++) {
98     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
99   }
100 #else
101   ierr = PetscMalloc1((baij->Nbs+1),&baij->colmap);CHKERRQ(ierr);
102   ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
103   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
104   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
105 #endif
106   PetscFunctionReturn(0);
107 }
108 
109 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
110   { \
111  \
112     brow = row/bs;  \
113     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
114     rmax = aimax[brow]; nrow = ailen[brow]; \
115     bcol = col/bs; \
116     ridx = row % bs; cidx = col % bs; \
117     low  = 0; high = nrow; \
118     while (high-low > 3) { \
119       t = (low+high)/2; \
120       if (rp[t] > bcol) high = t; \
121       else              low  = t; \
122     } \
123     for (_i=low; _i<high; _i++) { \
124       if (rp[_i] > bcol) break; \
125       if (rp[_i] == bcol) { \
126         bap = ap +  bs2*_i + bs*cidx + ridx; \
127         if (addv == ADD_VALUES) *bap += value;  \
128         else                    *bap  = value;  \
129         goto a_noinsert; \
130       } \
131     } \
132     if (a->nonew == 1) goto a_noinsert; \
133     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
134     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
135     N = nrow++ - 1;  \
136     /* shift up all the later entries in this row */ \
137     for (ii=N; ii>=_i; ii--) { \
138       rp[ii+1] = rp[ii]; \
139       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
140     } \
141     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
142     rp[_i]                      = bcol;  \
143     ap[bs2*_i + bs*cidx + ridx] = value;  \
144 a_noinsert:; \
145     ailen[brow] = nrow; \
146   }
147 
148 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
149   { \
150     brow = row/bs;  \
151     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
152     rmax = bimax[brow]; nrow = bilen[brow]; \
153     bcol = col/bs; \
154     ridx = row % bs; cidx = col % bs; \
155     low  = 0; high = nrow; \
156     while (high-low > 3) { \
157       t = (low+high)/2; \
158       if (rp[t] > bcol) high = t; \
159       else              low  = t; \
160     } \
161     for (_i=low; _i<high; _i++) { \
162       if (rp[_i] > bcol) break; \
163       if (rp[_i] == bcol) { \
164         bap = ap +  bs2*_i + bs*cidx + ridx; \
165         if (addv == ADD_VALUES) *bap += value;  \
166         else                    *bap  = value;  \
167         goto b_noinsert; \
168       } \
169     } \
170     if (b->nonew == 1) goto b_noinsert; \
171     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
172     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
173     N = nrow++ - 1;  \
174     /* shift up all the later entries in this row */ \
175     for (ii=N; ii>=_i; ii--) { \
176       rp[ii+1] = rp[ii]; \
177       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
178     } \
179     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
180     rp[_i]                      = bcol;  \
181     ap[bs2*_i + bs*cidx + ridx] = value;  \
182 b_noinsert:; \
183     bilen[brow] = nrow; \
184   }
185 
186 #undef __FUNCT__
187 #define __FUNCT__ "MatSetValues_MPIBAIJ"
188 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
189 {
190   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
191   MatScalar      value;
192   PetscBool      roworiented = baij->roworiented;
193   PetscErrorCode ierr;
194   PetscInt       i,j,row,col;
195   PetscInt       rstart_orig=mat->rmap->rstart;
196   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
197   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
198 
199   /* Some Variables required in the macro */
200   Mat         A     = baij->A;
201   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
202   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
203   MatScalar   *aa   =a->a;
204 
205   Mat         B     = baij->B;
206   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
207   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
208   MatScalar   *ba   =b->a;
209 
210   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
211   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
212   MatScalar *ap,*bap;
213 
214   PetscFunctionBegin;
215   if (v) PetscValidScalarPointer(v,6);
216   for (i=0; i<m; i++) {
217     if (im[i] < 0) continue;
218 #if defined(PETSC_USE_DEBUG)
219     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
220 #endif
221     if (im[i] >= rstart_orig && im[i] < rend_orig) {
222       row = im[i] - rstart_orig;
223       for (j=0; j<n; j++) {
224         if (in[j] >= cstart_orig && in[j] < cend_orig) {
225           col = in[j] - cstart_orig;
226           if (roworiented) value = v[i*n+j];
227           else             value = v[i+j*m];
228           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
229           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
230         } else if (in[j] < 0) continue;
231 #if defined(PETSC_USE_DEBUG)
232         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
233 #endif
234         else {
235           if (mat->was_assembled) {
236             if (!baij->colmap) {
237               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
238             }
239 #if defined(PETSC_USE_CTABLE)
240             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
241             col  = col - 1;
242 #else
243             col = baij->colmap[in[j]/bs] - 1;
244 #endif
245             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
246               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
247               col  =  in[j];
248               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
249               B    = baij->B;
250               b    = (Mat_SeqBAIJ*)(B)->data;
251               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
252               ba   =b->a;
253             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
254             else col += in[j]%bs;
255           } else col = in[j];
256           if (roworiented) value = v[i*n+j];
257           else             value = v[i+j*m];
258           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
259           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
260         }
261       }
262     } else {
263       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
264       if (!baij->donotstash) {
265         mat->assembled = PETSC_FALSE;
266         if (roworiented) {
267           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
268         } else {
269           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
270         }
271       }
272     }
273   }
274   PetscFunctionReturn(0);
275 }
276 
277 #undef __FUNCT__
278 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
279 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
280 {
281   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
282   const PetscScalar *value;
283   MatScalar         *barray     = baij->barray;
284   PetscBool         roworiented = baij->roworiented;
285   PetscErrorCode    ierr;
286   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
287   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
288   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
289 
290   PetscFunctionBegin;
291   if (!barray) {
292     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
293     baij->barray = barray;
294   }
295 
296   if (roworiented) stepval = (n-1)*bs;
297   else stepval = (m-1)*bs;
298 
299   for (i=0; i<m; i++) {
300     if (im[i] < 0) continue;
301 #if defined(PETSC_USE_DEBUG)
302     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
303 #endif
304     if (im[i] >= rstart && im[i] < rend) {
305       row = im[i] - rstart;
306       for (j=0; j<n; j++) {
307         /* If NumCol = 1 then a copy is not required */
308         if ((roworiented) && (n == 1)) {
309           barray = (MatScalar*)v + i*bs2;
310         } else if ((!roworiented) && (m == 1)) {
311           barray = (MatScalar*)v + j*bs2;
312         } else { /* Here a copy is required */
313           if (roworiented) {
314             value = v + (i*(stepval+bs) + j)*bs;
315           } else {
316             value = v + (j*(stepval+bs) + i)*bs;
317           }
318           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
319             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
320             barray += bs;
321           }
322           barray -= bs2;
323         }
324 
325         if (in[j] >= cstart && in[j] < cend) {
326           col  = in[j] - cstart;
327           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
328         } else if (in[j] < 0) continue;
329 #if defined(PETSC_USE_DEBUG)
330         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
331 #endif
332         else {
333           if (mat->was_assembled) {
334             if (!baij->colmap) {
335               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
336             }
337 
338 #if defined(PETSC_USE_DEBUG)
339 #if defined(PETSC_USE_CTABLE)
340             { PetscInt data;
341               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
342               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
343             }
344 #else
345             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
346 #endif
347 #endif
348 #if defined(PETSC_USE_CTABLE)
349             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
350             col  = (col - 1)/bs;
351 #else
352             col = (baij->colmap[in[j]] - 1)/bs;
353 #endif
354             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
355               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
356               col  =  in[j];
357             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]);
358           } else col = in[j];
359           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
360         }
361       }
362     } else {
363       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
364       if (!baij->donotstash) {
365         if (roworiented) {
366           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
367         } else {
368           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
369         }
370       }
371     }
372   }
373   PetscFunctionReturn(0);
374 }
375 
376 #define HASH_KEY 0.6180339887
377 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
378 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
379 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
380 #undef __FUNCT__
381 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
382 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
383 {
384   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
385   PetscBool      roworiented = baij->roworiented;
386   PetscErrorCode ierr;
387   PetscInt       i,j,row,col;
388   PetscInt       rstart_orig=mat->rmap->rstart;
389   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
390   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
391   PetscReal      tmp;
392   MatScalar      **HD = baij->hd,value;
393 #if defined(PETSC_USE_DEBUG)
394   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
395 #endif
396 
397   PetscFunctionBegin;
398   if (v) PetscValidScalarPointer(v,6);
399   for (i=0; i<m; i++) {
400 #if defined(PETSC_USE_DEBUG)
401     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
402     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
403 #endif
404     row = im[i];
405     if (row >= rstart_orig && row < rend_orig) {
406       for (j=0; j<n; j++) {
407         col = in[j];
408         if (roworiented) value = v[i*n+j];
409         else             value = v[i+j*m];
410         /* Look up PetscInto the Hash Table */
411         key = (row/bs)*Nbs+(col/bs)+1;
412         h1  = HASH(size,key,tmp);
413 
414 
415         idx = h1;
416 #if defined(PETSC_USE_DEBUG)
417         insert_ct++;
418         total_ct++;
419         if (HT[idx] != key) {
420           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
421           if (idx == size) {
422             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
423             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
424           }
425         }
426 #else
427         if (HT[idx] != key) {
428           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
429           if (idx == size) {
430             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
431             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
432           }
433         }
434 #endif
435         /* A HASH table entry is found, so insert the values at the correct address */
436         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
437         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
438       }
439     } else if (!baij->donotstash) {
440       if (roworiented) {
441         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
442       } else {
443         ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
444       }
445     }
446   }
447 #if defined(PETSC_USE_DEBUG)
448   baij->ht_total_ct  = total_ct;
449   baij->ht_insert_ct = insert_ct;
450 #endif
451   PetscFunctionReturn(0);
452 }
453 
454 #undef __FUNCT__
455 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
456 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
457 {
458   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
459   PetscBool         roworiented = baij->roworiented;
460   PetscErrorCode    ierr;
461   PetscInt          i,j,ii,jj,row,col;
462   PetscInt          rstart=baij->rstartbs;
463   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
464   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
465   PetscReal         tmp;
466   MatScalar         **HD = baij->hd,*baij_a;
467   const PetscScalar *v_t,*value;
468 #if defined(PETSC_USE_DEBUG)
469   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
470 #endif
471 
472   PetscFunctionBegin;
473   if (roworiented) stepval = (n-1)*bs;
474   else stepval = (m-1)*bs;
475 
476   for (i=0; i<m; i++) {
477 #if defined(PETSC_USE_DEBUG)
478     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
479     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
480 #endif
481     row = im[i];
482     v_t = v + i*nbs2;
483     if (row >= rstart && row < rend) {
484       for (j=0; j<n; j++) {
485         col = in[j];
486 
487         /* Look up into the Hash Table */
488         key = row*Nbs+col+1;
489         h1  = HASH(size,key,tmp);
490 
491         idx = h1;
492 #if defined(PETSC_USE_DEBUG)
493         total_ct++;
494         insert_ct++;
495         if (HT[idx] != key) {
496           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
497           if (idx == size) {
498             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
499             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
500           }
501         }
502 #else
503         if (HT[idx] != key) {
504           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
505           if (idx == size) {
506             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
507             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
508           }
509         }
510 #endif
511         baij_a = HD[idx];
512         if (roworiented) {
513           /*value = v + i*(stepval+bs)*bs + j*bs;*/
514           /* value = v + (i*(stepval+bs)+j)*bs; */
515           value = v_t;
516           v_t  += bs;
517           if (addv == ADD_VALUES) {
518             for (ii=0; ii<bs; ii++,value+=stepval) {
519               for (jj=ii; jj<bs2; jj+=bs) {
520                 baij_a[jj] += *value++;
521               }
522             }
523           } else {
524             for (ii=0; ii<bs; ii++,value+=stepval) {
525               for (jj=ii; jj<bs2; jj+=bs) {
526                 baij_a[jj] = *value++;
527               }
528             }
529           }
530         } else {
531           value = v + j*(stepval+bs)*bs + i*bs;
532           if (addv == ADD_VALUES) {
533             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
534               for (jj=0; jj<bs; jj++) {
535                 baij_a[jj] += *value++;
536               }
537             }
538           } else {
539             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
540               for (jj=0; jj<bs; jj++) {
541                 baij_a[jj] = *value++;
542               }
543             }
544           }
545         }
546       }
547     } else {
548       if (!baij->donotstash) {
549         if (roworiented) {
550           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
551         } else {
552           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
553         }
554       }
555     }
556   }
557 #if defined(PETSC_USE_DEBUG)
558   baij->ht_total_ct  = total_ct;
559   baij->ht_insert_ct = insert_ct;
560 #endif
561   PetscFunctionReturn(0);
562 }
563 
564 #undef __FUNCT__
565 #define __FUNCT__ "MatGetValues_MPIBAIJ"
566 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
567 {
568   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
569   PetscErrorCode ierr;
570   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
571   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
572 
573   PetscFunctionBegin;
574   for (i=0; i<m; i++) {
575     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
576     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
577     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
578       row = idxm[i] - bsrstart;
579       for (j=0; j<n; j++) {
580         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
581         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
582         if (idxn[j] >= bscstart && idxn[j] < bscend) {
583           col  = idxn[j] - bscstart;
584           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
585         } else {
586           if (!baij->colmap) {
587             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
588           }
589 #if defined(PETSC_USE_CTABLE)
590           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
591           data--;
592 #else
593           data = baij->colmap[idxn[j]/bs]-1;
594 #endif
595           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
596           else {
597             col  = data + idxn[j]%bs;
598             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
599           }
600         }
601       }
602     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
603   }
604   PetscFunctionReturn(0);
605 }
606 
607 #undef __FUNCT__
608 #define __FUNCT__ "MatNorm_MPIBAIJ"
609 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
610 {
611   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
612   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
613   PetscErrorCode ierr;
614   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
615   PetscReal      sum = 0.0;
616   MatScalar      *v;
617 
618   PetscFunctionBegin;
619   if (baij->size == 1) {
620     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
621   } else {
622     if (type == NORM_FROBENIUS) {
623       v  = amat->a;
624       nz = amat->nz*bs2;
625       for (i=0; i<nz; i++) {
626         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
627       }
628       v  = bmat->a;
629       nz = bmat->nz*bs2;
630       for (i=0; i<nz; i++) {
631         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
632       }
633       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
634       *nrm = PetscSqrtReal(*nrm);
635     } else if (type == NORM_1) { /* max column sum */
636       PetscReal *tmp,*tmp2;
637       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
638       ierr = PetscMalloc2(mat->cmap->N,&tmp,mat->cmap->N,&tmp2);CHKERRQ(ierr);
639       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
640       v    = amat->a; jj = amat->j;
641       for (i=0; i<amat->nz; i++) {
642         for (j=0; j<bs; j++) {
643           col = bs*(cstart + *jj) + j; /* column index */
644           for (row=0; row<bs; row++) {
645             tmp[col] += PetscAbsScalar(*v);  v++;
646           }
647         }
648         jj++;
649       }
650       v = bmat->a; jj = bmat->j;
651       for (i=0; i<bmat->nz; i++) {
652         for (j=0; j<bs; j++) {
653           col = bs*garray[*jj] + j;
654           for (row=0; row<bs; row++) {
655             tmp[col] += PetscAbsScalar(*v); v++;
656           }
657         }
658         jj++;
659       }
660       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
661       *nrm = 0.0;
662       for (j=0; j<mat->cmap->N; j++) {
663         if (tmp2[j] > *nrm) *nrm = tmp2[j];
664       }
665       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
666     } else if (type == NORM_INFINITY) { /* max row sum */
667       PetscReal *sums;
668       ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr);
669       sum  = 0.0;
670       for (j=0; j<amat->mbs; j++) {
671         for (row=0; row<bs; row++) sums[row] = 0.0;
672         v  = amat->a + bs2*amat->i[j];
673         nz = amat->i[j+1]-amat->i[j];
674         for (i=0; i<nz; i++) {
675           for (col=0; col<bs; col++) {
676             for (row=0; row<bs; row++) {
677               sums[row] += PetscAbsScalar(*v); v++;
678             }
679           }
680         }
681         v  = bmat->a + bs2*bmat->i[j];
682         nz = bmat->i[j+1]-bmat->i[j];
683         for (i=0; i<nz; i++) {
684           for (col=0; col<bs; col++) {
685             for (row=0; row<bs; row++) {
686               sums[row] += PetscAbsScalar(*v); v++;
687             }
688           }
689         }
690         for (row=0; row<bs; row++) {
691           if (sums[row] > sum) sum = sums[row];
692         }
693       }
694       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
695       ierr = PetscFree(sums);CHKERRQ(ierr);
696     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
697   }
698   PetscFunctionReturn(0);
699 }
700 
701 /*
702   Creates the hash table, and sets the table
703   This table is created only once.
704   If new entried need to be added to the matrix
705   then the hash table has to be destroyed and
706   recreated.
707 */
708 #undef __FUNCT__
709 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
710 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
711 {
712   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
713   Mat            A     = baij->A,B=baij->B;
714   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
715   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
716   PetscErrorCode ierr;
717   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
718   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
719   PetscInt       *HT,key;
720   MatScalar      **HD;
721   PetscReal      tmp;
722 #if defined(PETSC_USE_INFO)
723   PetscInt ct=0,max=0;
724 #endif
725 
726   PetscFunctionBegin;
727   if (baij->ht) PetscFunctionReturn(0);
728 
729   baij->ht_size = (PetscInt)(factor*nz);
730   ht_size       = baij->ht_size;
731 
732   /* Allocate Memory for Hash Table */
733   ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr);
734   HD   = baij->hd;
735   HT   = baij->ht;
736 
737   /* Loop Over A */
738   for (i=0; i<a->mbs; i++) {
739     for (j=ai[i]; j<ai[i+1]; j++) {
740       row = i+rstart;
741       col = aj[j]+cstart;
742 
743       key = row*Nbs + col + 1;
744       h1  = HASH(ht_size,key,tmp);
745       for (k=0; k<ht_size; k++) {
746         if (!HT[(h1+k)%ht_size]) {
747           HT[(h1+k)%ht_size] = key;
748           HD[(h1+k)%ht_size] = a->a + j*bs2;
749           break;
750 #if defined(PETSC_USE_INFO)
751         } else {
752           ct++;
753 #endif
754         }
755       }
756 #if defined(PETSC_USE_INFO)
757       if (k> max) max = k;
758 #endif
759     }
760   }
761   /* Loop Over B */
762   for (i=0; i<b->mbs; i++) {
763     for (j=bi[i]; j<bi[i+1]; j++) {
764       row = i+rstart;
765       col = garray[bj[j]];
766       key = row*Nbs + col + 1;
767       h1  = HASH(ht_size,key,tmp);
768       for (k=0; k<ht_size; k++) {
769         if (!HT[(h1+k)%ht_size]) {
770           HT[(h1+k)%ht_size] = key;
771           HD[(h1+k)%ht_size] = b->a + j*bs2;
772           break;
773 #if defined(PETSC_USE_INFO)
774         } else {
775           ct++;
776 #endif
777         }
778       }
779 #if defined(PETSC_USE_INFO)
780       if (k> max) max = k;
781 #endif
782     }
783   }
784 
785   /* Print Summary */
786 #if defined(PETSC_USE_INFO)
787   for (i=0,j=0; i<ht_size; i++) {
788     if (HT[i]) j++;
789   }
790   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
791 #endif
792   PetscFunctionReturn(0);
793 }
794 
795 #undef __FUNCT__
796 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
797 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
798 {
799   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
800   PetscErrorCode ierr;
801   PetscInt       nstash,reallocs;
802   InsertMode     addv;
803 
804   PetscFunctionBegin;
805   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
806 
807   /* make sure all processors are either in INSERTMODE or ADDMODE */
808   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
809   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
810   mat->insertmode = addv; /* in case this processor had no cache */
811 
812   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
813   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
814   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
815   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
816   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
817   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
818   PetscFunctionReturn(0);
819 }
820 
821 #undef __FUNCT__
822 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
823 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
824 {
825   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
826   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
827   PetscErrorCode ierr;
828   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
829   PetscInt       *row,*col;
830   PetscBool      r1,r2,r3,other_disassembled;
831   MatScalar      *val;
832   InsertMode     addv = mat->insertmode;
833   PetscMPIInt    n;
834 
835   PetscFunctionBegin;
836   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
837   if (!baij->donotstash && !mat->nooffprocentries) {
838     while (1) {
839       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
840       if (!flg) break;
841 
842       for (i=0; i<n;) {
843         /* Now identify the consecutive vals belonging to the same row */
844         for (j=i,rstart=row[j]; j<n; j++) {
845           if (row[j] != rstart) break;
846         }
847         if (j < n) ncols = j-i;
848         else       ncols = n-i;
849         /* Now assemble all these values with a single function call */
850         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
851         i    = j;
852       }
853     }
854     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
855     /* Now process the block-stash. Since the values are stashed column-oriented,
856        set the roworiented flag to column oriented, and after MatSetValues()
857        restore the original flags */
858     r1 = baij->roworiented;
859     r2 = a->roworiented;
860     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
861 
862     baij->roworiented = PETSC_FALSE;
863     a->roworiented    = PETSC_FALSE;
864 
865     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
866     while (1) {
867       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
868       if (!flg) break;
869 
870       for (i=0; i<n;) {
871         /* Now identify the consecutive vals belonging to the same row */
872         for (j=i,rstart=row[j]; j<n; j++) {
873           if (row[j] != rstart) break;
874         }
875         if (j < n) ncols = j-i;
876         else       ncols = n-i;
877         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
878         i    = j;
879       }
880     }
881     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
882 
883     baij->roworiented = r1;
884     a->roworiented    = r2;
885 
886     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
887   }
888 
889   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
890   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
891 
892   /* determine if any processor has disassembled, if so we must
893      also disassemble ourselfs, in order that we may reassemble. */
894   /*
895      if nonzero structure of submatrix B cannot change then we know that
896      no processor disassembled thus we can skip this stuff
897   */
898   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
899     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
900     if (mat->was_assembled && !other_disassembled) {
901       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
902     }
903   }
904 
905   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
906     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
907   }
908   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
909   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
910 
911 #if defined(PETSC_USE_INFO)
912   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
913     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
914 
915     baij->ht_total_ct  = 0;
916     baij->ht_insert_ct = 0;
917   }
918 #endif
919   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
920     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
921 
922     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
923     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
924   }
925 
926   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
927 
928   baij->rowvalues = 0;
929   {
930     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
931     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
932   }
933   PetscFunctionReturn(0);
934 }
935 
936 #include <petscdraw.h>
937 #undef __FUNCT__
938 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
939 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
940 {
941   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
942   PetscErrorCode    ierr;
943   PetscMPIInt       size = baij->size,rank = baij->rank;
944   PetscInt          bs   = mat->rmap->bs;
945   PetscBool         iascii,isdraw;
946   PetscViewer       sviewer;
947   PetscViewerFormat format;
948 
949   PetscFunctionBegin;
950   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
951   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
952   if (iascii) {
953     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
954     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
955       MatInfo info;
956       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
957       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
958       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
959       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
960                                                 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
961       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
962       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
963       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
964       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
965       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
966       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
967       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
968       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
969       PetscFunctionReturn(0);
970     } else if (format == PETSC_VIEWER_ASCII_INFO) {
971       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
972       PetscFunctionReturn(0);
973     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
974       PetscFunctionReturn(0);
975     }
976   }
977 
978   if (isdraw) {
979     PetscDraw draw;
980     PetscBool isnull;
981     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
982     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
983   }
984 
985   if (size == 1) {
986     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
987     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
988   } else {
989     /* assemble the entire matrix onto first processor. */
990     Mat         A;
991     Mat_SeqBAIJ *Aloc;
992     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
993     MatScalar   *a;
994 
995     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
996     /* Perhaps this should be the type of mat? */
997     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
998     if (!rank) {
999       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1000     } else {
1001       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1002     }
1003     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1004     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
1005     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1006     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1007 
1008     /* copy over the A part */
1009     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1010     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1011     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1012 
1013     for (i=0; i<mbs; i++) {
1014       rvals[0] = bs*(baij->rstartbs + i);
1015       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1016       for (j=ai[i]; j<ai[i+1]; j++) {
1017         col = (baij->cstartbs+aj[j])*bs;
1018         for (k=0; k<bs; k++) {
1019           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1020           col++; a += bs;
1021         }
1022       }
1023     }
1024     /* copy over the B part */
1025     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1026     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1027     for (i=0; i<mbs; i++) {
1028       rvals[0] = bs*(baij->rstartbs + i);
1029       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1030       for (j=ai[i]; j<ai[i+1]; j++) {
1031         col = baij->garray[aj[j]]*bs;
1032         for (k=0; k<bs; k++) {
1033           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1034           col++; a += bs;
1035         }
1036       }
1037     }
1038     ierr = PetscFree(rvals);CHKERRQ(ierr);
1039     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1040     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1041     /*
1042        Everyone has to call to draw the matrix since the graphics waits are
1043        synchronized across all processors that share the PetscDraw object
1044     */
1045     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1046     if (!rank) {
1047       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1048       /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/
1049       PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ);
1050       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1051     }
1052     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1053     ierr = MatDestroy(&A);CHKERRQ(ierr);
1054   }
1055   PetscFunctionReturn(0);
1056 }
1057 
1058 #undef __FUNCT__
1059 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1060 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1061 {
1062   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1063   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)a->A->data;
1064   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
1065   PetscErrorCode ierr;
1066   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1067   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1068   int            fd;
1069   PetscScalar    *column_values;
1070   FILE           *file;
1071   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1072   PetscInt       message_count,flowcontrolcount;
1073 
1074   PetscFunctionBegin;
1075   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1076   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1077   nz   = bs2*(A->nz + B->nz);
1078   rlen = mat->rmap->n;
1079   if (!rank) {
1080     header[0] = MAT_FILE_CLASSID;
1081     header[1] = mat->rmap->N;
1082     header[2] = mat->cmap->N;
1083 
1084     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1085     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1086     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1087     /* get largest number of rows any processor has */
1088     range = mat->rmap->range;
1089     for (i=1; i<size; i++) {
1090       rlen = PetscMax(rlen,range[i+1] - range[i]);
1091     }
1092   } else {
1093     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1094   }
1095 
1096   ierr = PetscMalloc1((rlen/bs),&crow_lens);CHKERRQ(ierr);
1097   /* compute lengths of each row  */
1098   for (i=0; i<a->mbs; i++) {
1099     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1100   }
1101   /* store the row lengths to the file */
1102   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1103   if (!rank) {
1104     MPI_Status status;
1105     ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr);
1106     rlen = (range[1] - range[0])/bs;
1107     for (i=0; i<rlen; i++) {
1108       for (j=0; j<bs; j++) {
1109         row_lens[i*bs+j] = bs*crow_lens[i];
1110       }
1111     }
1112     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1113     for (i=1; i<size; i++) {
1114       rlen = (range[i+1] - range[i])/bs;
1115       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1116       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1117       for (k=0; k<rlen; k++) {
1118         for (j=0; j<bs; j++) {
1119           row_lens[k*bs+j] = bs*crow_lens[k];
1120         }
1121       }
1122       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1123     }
1124     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1125     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1126   } else {
1127     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1128     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1129     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1130   }
1131   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1132 
1133   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1134      information needed to make it for each row from a block row. This does require more communication but still not more than
1135      the communication needed for the nonzero values  */
1136   nzmax = nz; /*  space a largest processor needs */
1137   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1138   ierr  = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr);
1139   cnt   = 0;
1140   for (i=0; i<a->mbs; i++) {
1141     pcnt = cnt;
1142     for (j=B->i[i]; j<B->i[i+1]; j++) {
1143       if ((col = garray[B->j[j]]) > cstart) break;
1144       for (l=0; l<bs; l++) {
1145         column_indices[cnt++] = bs*col+l;
1146       }
1147     }
1148     for (k=A->i[i]; k<A->i[i+1]; k++) {
1149       for (l=0; l<bs; l++) {
1150         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1151       }
1152     }
1153     for (; j<B->i[i+1]; j++) {
1154       for (l=0; l<bs; l++) {
1155         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1156       }
1157     }
1158     len = cnt - pcnt;
1159     for (k=1; k<bs; k++) {
1160       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1161       cnt += len;
1162     }
1163   }
1164   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1165 
1166   /* store the columns to the file */
1167   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1168   if (!rank) {
1169     MPI_Status status;
1170     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1171     for (i=1; i<size; i++) {
1172       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1173       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1174       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1175       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1176     }
1177     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1178   } else {
1179     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1180     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1181     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1182     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1183   }
1184   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1185 
1186   /* load up the numerical values */
1187   ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr);
1188   cnt  = 0;
1189   for (i=0; i<a->mbs; i++) {
1190     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1191     for (j=B->i[i]; j<B->i[i+1]; j++) {
1192       if (garray[B->j[j]] > cstart) break;
1193       for (l=0; l<bs; l++) {
1194         for (ll=0; ll<bs; ll++) {
1195           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1196         }
1197       }
1198       cnt += bs;
1199     }
1200     for (k=A->i[i]; k<A->i[i+1]; k++) {
1201       for (l=0; l<bs; l++) {
1202         for (ll=0; ll<bs; ll++) {
1203           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1204         }
1205       }
1206       cnt += bs;
1207     }
1208     for (; j<B->i[i+1]; j++) {
1209       for (l=0; l<bs; l++) {
1210         for (ll=0; ll<bs; ll++) {
1211           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1212         }
1213       }
1214       cnt += bs;
1215     }
1216     cnt += (bs-1)*rlen;
1217   }
1218   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1219 
1220   /* store the column values to the file */
1221   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1222   if (!rank) {
1223     MPI_Status status;
1224     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1225     for (i=1; i<size; i++) {
1226       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1227       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1228       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1229       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1230     }
1231     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1232   } else {
1233     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1234     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1235     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1236     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1237   }
1238   ierr = PetscFree(column_values);CHKERRQ(ierr);
1239 
1240   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1241   if (file) {
1242     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1243   }
1244   PetscFunctionReturn(0);
1245 }
1246 
1247 #undef __FUNCT__
1248 #define __FUNCT__ "MatView_MPIBAIJ"
1249 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1250 {
1251   PetscErrorCode ierr;
1252   PetscBool      iascii,isdraw,issocket,isbinary;
1253 
1254   PetscFunctionBegin;
1255   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1256   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1257   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1258   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1259   if (iascii || isdraw || issocket) {
1260     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1261   } else if (isbinary) {
1262     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1263   }
1264   PetscFunctionReturn(0);
1265 }
1266 
1267 #undef __FUNCT__
1268 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1269 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1270 {
1271   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1272   PetscErrorCode ierr;
1273 
1274   PetscFunctionBegin;
1275 #if defined(PETSC_USE_LOG)
1276   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1277 #endif
1278   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1279   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1280   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1281   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1282 #if defined(PETSC_USE_CTABLE)
1283   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1284 #else
1285   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1286 #endif
1287   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1288   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1289   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1290   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1291   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1292   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1293   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1294   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1295 
1296   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr);
1304   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1305   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr);
1306   PetscFunctionReturn(0);
1307 }
1308 
1309 #undef __FUNCT__
1310 #define __FUNCT__ "MatMult_MPIBAIJ"
1311 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1312 {
1313   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1314   PetscErrorCode ierr;
1315   PetscInt       nt;
1316 
1317   PetscFunctionBegin;
1318   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1319   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1320   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1321   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1322   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1323   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1324   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1325   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1326   PetscFunctionReturn(0);
1327 }
1328 
1329 #undef __FUNCT__
1330 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1331 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1332 {
1333   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1334   PetscErrorCode ierr;
1335 
1336   PetscFunctionBegin;
1337   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1338   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1339   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1340   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1341   PetscFunctionReturn(0);
1342 }
1343 
1344 #undef __FUNCT__
1345 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1346 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1347 {
1348   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1349   PetscErrorCode ierr;
1350   PetscBool      merged;
1351 
1352   PetscFunctionBegin;
1353   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1354   /* do nondiagonal part */
1355   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1356   if (!merged) {
1357     /* send it on its way */
1358     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1359     /* do local part */
1360     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1361     /* receive remote parts: note this assumes the values are not actually */
1362     /* inserted in yy until the next line */
1363     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1364   } else {
1365     /* do local part */
1366     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1367     /* send it on its way */
1368     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1369     /* values actually were received in the Begin() but we need to call this nop */
1370     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1371   }
1372   PetscFunctionReturn(0);
1373 }
1374 
1375 #undef __FUNCT__
1376 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1377 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1378 {
1379   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1380   PetscErrorCode ierr;
1381 
1382   PetscFunctionBegin;
1383   /* do nondiagonal part */
1384   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1385   /* send it on its way */
1386   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1387   /* do local part */
1388   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1389   /* receive remote parts: note this assumes the values are not actually */
1390   /* inserted in yy until the next line, which is true for my implementation*/
1391   /* but is not perhaps always true. */
1392   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1393   PetscFunctionReturn(0);
1394 }
1395 
1396 /*
1397   This only works correctly for square matrices where the subblock A->A is the
1398    diagonal block
1399 */
1400 #undef __FUNCT__
1401 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1402 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1403 {
1404   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1405   PetscErrorCode ierr;
1406 
1407   PetscFunctionBegin;
1408   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1409   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1410   PetscFunctionReturn(0);
1411 }
1412 
1413 #undef __FUNCT__
1414 #define __FUNCT__ "MatScale_MPIBAIJ"
1415 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1416 {
1417   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1418   PetscErrorCode ierr;
1419 
1420   PetscFunctionBegin;
1421   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1422   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1423   PetscFunctionReturn(0);
1424 }
1425 
1426 #undef __FUNCT__
1427 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1428 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1429 {
1430   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1431   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1432   PetscErrorCode ierr;
1433   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1434   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1435   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1436 
1437   PetscFunctionBegin;
1438   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1439   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1440   mat->getrowactive = PETSC_TRUE;
1441 
1442   if (!mat->rowvalues && (idx || v)) {
1443     /*
1444         allocate enough space to hold information from the longest row.
1445     */
1446     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1447     PetscInt    max = 1,mbs = mat->mbs,tmp;
1448     for (i=0; i<mbs; i++) {
1449       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1450       if (max < tmp) max = tmp;
1451     }
1452     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1453   }
1454   lrow = row - brstart;
1455 
1456   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1457   if (!v)   {pvA = 0; pvB = 0;}
1458   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1459   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1460   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1461   nztot = nzA + nzB;
1462 
1463   cmap = mat->garray;
1464   if (v  || idx) {
1465     if (nztot) {
1466       /* Sort by increasing column numbers, assuming A and B already sorted */
1467       PetscInt imark = -1;
1468       if (v) {
1469         *v = v_p = mat->rowvalues;
1470         for (i=0; i<nzB; i++) {
1471           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1472           else break;
1473         }
1474         imark = i;
1475         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1476         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1477       }
1478       if (idx) {
1479         *idx = idx_p = mat->rowindices;
1480         if (imark > -1) {
1481           for (i=0; i<imark; i++) {
1482             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1483           }
1484         } else {
1485           for (i=0; i<nzB; i++) {
1486             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1487             else break;
1488           }
1489           imark = i;
1490         }
1491         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1492         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1493       }
1494     } else {
1495       if (idx) *idx = 0;
1496       if (v)   *v   = 0;
1497     }
1498   }
1499   *nz  = nztot;
1500   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1501   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1502   PetscFunctionReturn(0);
1503 }
1504 
1505 #undef __FUNCT__
1506 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1507 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1508 {
1509   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1510 
1511   PetscFunctionBegin;
1512   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1513   baij->getrowactive = PETSC_FALSE;
1514   PetscFunctionReturn(0);
1515 }
1516 
1517 #undef __FUNCT__
1518 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1519 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1520 {
1521   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1522   PetscErrorCode ierr;
1523 
1524   PetscFunctionBegin;
1525   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1526   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1527   PetscFunctionReturn(0);
1528 }
1529 
1530 #undef __FUNCT__
1531 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1532 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1533 {
1534   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1535   Mat            A  = a->A,B = a->B;
1536   PetscErrorCode ierr;
1537   PetscReal      isend[5],irecv[5];
1538 
1539   PetscFunctionBegin;
1540   info->block_size = (PetscReal)matin->rmap->bs;
1541 
1542   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1543 
1544   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1545   isend[3] = info->memory;  isend[4] = info->mallocs;
1546 
1547   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1548 
1549   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1550   isend[3] += info->memory;  isend[4] += info->mallocs;
1551 
1552   if (flag == MAT_LOCAL) {
1553     info->nz_used      = isend[0];
1554     info->nz_allocated = isend[1];
1555     info->nz_unneeded  = isend[2];
1556     info->memory       = isend[3];
1557     info->mallocs      = isend[4];
1558   } else if (flag == MAT_GLOBAL_MAX) {
1559     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1560 
1561     info->nz_used      = irecv[0];
1562     info->nz_allocated = irecv[1];
1563     info->nz_unneeded  = irecv[2];
1564     info->memory       = irecv[3];
1565     info->mallocs      = irecv[4];
1566   } else if (flag == MAT_GLOBAL_SUM) {
1567     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1568 
1569     info->nz_used      = irecv[0];
1570     info->nz_allocated = irecv[1];
1571     info->nz_unneeded  = irecv[2];
1572     info->memory       = irecv[3];
1573     info->mallocs      = irecv[4];
1574   } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1575   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1576   info->fill_ratio_needed = 0;
1577   info->factor_mallocs    = 0;
1578   PetscFunctionReturn(0);
1579 }
1580 
1581 #undef __FUNCT__
1582 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1583 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1584 {
1585   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1586   PetscErrorCode ierr;
1587 
1588   PetscFunctionBegin;
1589   switch (op) {
1590   case MAT_NEW_NONZERO_LOCATIONS:
1591   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1592   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1593   case MAT_KEEP_NONZERO_PATTERN:
1594   case MAT_NEW_NONZERO_LOCATION_ERR:
1595     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1596     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1597     break;
1598   case MAT_ROW_ORIENTED:
1599     a->roworiented = flg;
1600 
1601     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1602     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1603     break;
1604   case MAT_NEW_DIAGONALS:
1605     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1606     break;
1607   case MAT_IGNORE_OFF_PROC_ENTRIES:
1608     a->donotstash = flg;
1609     break;
1610   case MAT_USE_HASH_TABLE:
1611     a->ht_flag = flg;
1612     break;
1613   case MAT_SYMMETRIC:
1614   case MAT_STRUCTURALLY_SYMMETRIC:
1615   case MAT_HERMITIAN:
1616   case MAT_SYMMETRY_ETERNAL:
1617     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1618     break;
1619   default:
1620     SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1621   }
1622   PetscFunctionReturn(0);
1623 }
1624 
1625 #undef __FUNCT__
1626 #define __FUNCT__ "MatTranspose_MPIBAIJ"
1627 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1628 {
1629   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1630   Mat_SeqBAIJ    *Aloc;
1631   Mat            B;
1632   PetscErrorCode ierr;
1633   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1634   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1635   MatScalar      *a;
1636 
1637   PetscFunctionBegin;
1638   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1639   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1640     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1641     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1642     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1643     /* Do not know preallocation information, but must set block size */
1644     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr);
1645   } else {
1646     B = *matout;
1647   }
1648 
1649   /* copy over the A part */
1650   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1651   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1652   ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1653 
1654   for (i=0; i<mbs; i++) {
1655     rvals[0] = bs*(baij->rstartbs + i);
1656     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1657     for (j=ai[i]; j<ai[i+1]; j++) {
1658       col = (baij->cstartbs+aj[j])*bs;
1659       for (k=0; k<bs; k++) {
1660         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1661 
1662         col++; a += bs;
1663       }
1664     }
1665   }
1666   /* copy over the B part */
1667   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1668   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1669   for (i=0; i<mbs; i++) {
1670     rvals[0] = bs*(baij->rstartbs + i);
1671     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1672     for (j=ai[i]; j<ai[i+1]; j++) {
1673       col = baij->garray[aj[j]]*bs;
1674       for (k=0; k<bs; k++) {
1675         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1676         col++;
1677         a += bs;
1678       }
1679     }
1680   }
1681   ierr = PetscFree(rvals);CHKERRQ(ierr);
1682   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1683   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1684 
1685   if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B;
1686   else {
1687     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1688   }
1689   PetscFunctionReturn(0);
1690 }
1691 
1692 #undef __FUNCT__
1693 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1694 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1695 {
1696   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1697   Mat            a     = baij->A,b = baij->B;
1698   PetscErrorCode ierr;
1699   PetscInt       s1,s2,s3;
1700 
1701   PetscFunctionBegin;
1702   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1703   if (rr) {
1704     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1705     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1706     /* Overlap communication with computation. */
1707     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1708   }
1709   if (ll) {
1710     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1711     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1712     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1713   }
1714   /* scale  the diagonal block */
1715   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1716 
1717   if (rr) {
1718     /* Do a scatter end and then right scale the off-diagonal block */
1719     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1720     ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1721   }
1722   PetscFunctionReturn(0);
1723 }
1724 
1725 #undef __FUNCT__
1726 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1727 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1728 {
1729   Mat_MPIBAIJ   *l      = (Mat_MPIBAIJ *) A->data;
1730   PetscInt      *owners = A->rmap->range;
1731   PetscInt       n      = A->rmap->n;
1732   PetscMPIInt    size   = l->size;
1733   PetscSF        sf;
1734   PetscInt      *lrows;
1735   PetscSFNode   *rrows;
1736   PetscInt       lastidx = -1, r, p = 0, len = 0;
1737   PetscErrorCode ierr;
1738 
1739   PetscFunctionBegin;
1740   /* Create SF where leaves are input rows and roots are owned rows */
1741   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1742   for (r = 0; r < n; ++r) lrows[r] = -1;
1743   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1744   for (r = 0; r < N; ++r) {
1745     const PetscInt idx   = rows[r];
1746     PetscBool      found = PETSC_FALSE;
1747     /* Trick for efficient searching for sorted rows */
1748     if (lastidx > idx) p = 0;
1749     lastidx = idx;
1750     for (; p < size; ++p) {
1751       if (idx >= owners[p] && idx < owners[p+1]) {
1752         rrows[r].rank  = p;
1753         rrows[r].index = rows[r] - owners[p];
1754         found = PETSC_TRUE;
1755         break;
1756       }
1757     }
1758     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
1759   }
1760   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1761   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1762   /* Collect flags for rows to be zeroed */
1763   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1764   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1765   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1766   /* Compress and put in row numbers */
1767   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1768   /* fix right hand side if needed */
1769   if (x && b) {
1770     const PetscScalar *xx;
1771     PetscScalar       *bb;
1772 
1773     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1774     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1775     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
1776     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1777     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1778   }
1779 
1780   /* actually zap the local rows */
1781   /*
1782         Zero the required rows. If the "diagonal block" of the matrix
1783      is square and the user wishes to set the diagonal we use separate
1784      code so that MatSetValues() is not called for each diagonal allocating
1785      new memory, thus calling lots of mallocs and slowing things down.
1786 
1787   */
1788   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1789   ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,0,0);CHKERRQ(ierr);
1790   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1791     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,0,0);CHKERRQ(ierr);
1792   } else if (diag != 0.0) {
1793     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr);
1794     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1795        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1796     for (r = 0; r < len; ++r) {
1797       const PetscInt row = lrows[r] + A->rmap->rstart;
1798       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1799     }
1800     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1801     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1802   } else {
1803     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr);
1804   }
1805   ierr = PetscFree(lrows);CHKERRQ(ierr);
1806   {
1807     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1808     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1809   }
1810   PetscFunctionReturn(0);
1811 }
1812 
1813 #undef __FUNCT__
1814 #define __FUNCT__ "MatZeroRowsColumns_MPIBAIJ"
1815 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1816 {
1817   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1818   PetscErrorCode    ierr;
1819   PetscMPIInt       size = l->size,n = A->rmap->n,lastidx = -1;
1820   PetscInt          i,j,k,r,p = 0,len = 0,row,col,count;
1821   PetscInt          *lrows,*owners = A->rmap->range;
1822   PetscSFNode       *rrows;
1823   PetscSF           sf;
1824   const PetscScalar *xx;
1825   PetscScalar       *bb,*mask;
1826   Vec               xmask,lmask;
1827   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ*)l->B->data;
1828   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1829   PetscScalar       *aa;
1830 #if defined(PETSC_DEBUG)
1831   PetscBool found = PETSC_FALSE;
1832 #endif
1833 
1834   PetscFunctionBegin;
1835   /* Create SF where leaves are input rows and roots are owned rows */
1836   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1837   for (r = 0; r < n; ++r) lrows[r] = -1;
1838   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1839   for (r = 0; r < N; ++r) {
1840     const PetscInt idx   = rows[r];
1841     PetscBool      found = PETSC_FALSE;
1842     /* Trick for efficient searching for sorted rows */
1843     if (lastidx > idx) p = 0;
1844     lastidx = idx;
1845     for (; p < size; ++p) {
1846       if (idx >= owners[p] && idx < owners[p+1]) {
1847         rrows[r].rank  = p;
1848         rrows[r].index = rows[r] - owners[p];
1849         found = PETSC_TRUE;
1850         break;
1851       }
1852     }
1853     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
1854   }
1855   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1856   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1857   /* Collect flags for rows to be zeroed */
1858   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1859   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1860   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1861   /* Compress and put in row numbers */
1862   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1863   /* zero diagonal part of matrix */
1864   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1865   /* handle off diagonal part of matrix */
1866   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
1867   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1868   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1869   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1870   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1871   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1872   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1873   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1874   if (x) {
1875     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1876     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1877     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1878     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1879   }
1880   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1881   /* remove zeroed rows of off diagonal matrix */
1882   for (i = 0; i < len; ++i) {
1883     row   = lrows[i];
1884     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1885     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1886     for (k = 0; k < count; ++k) {
1887       aa[0] = 0.0;
1888       aa   += bs;
1889     }
1890   }
1891   /* loop over all elements of off process part of matrix zeroing removed columns*/
1892   for (i = 0; i < l->B->rmap->N; ++i) {
1893     row = i/bs;
1894     for (j = baij->i[row]; j < baij->i[row+1]; ++j) {
1895       for (k = 0; k < bs; ++k) {
1896         col = bs*baij->j[j] + k;
1897         if (PetscAbsScalar(mask[col])) {
1898           aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
1899           if (b) bb[i] -= aa[0]*xx[col];
1900           aa[0] = 0.0;
1901         }
1902       }
1903     }
1904   }
1905   if (x) {
1906     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1907     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1908   }
1909   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1910   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1911   ierr = PetscFree(lrows);CHKERRQ(ierr);
1912   PetscFunctionReturn(0);
1913 }
1914 
1915 #undef __FUNCT__
1916 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1917 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1918 {
1919   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1920   PetscErrorCode ierr;
1921 
1922   PetscFunctionBegin;
1923   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1924   PetscFunctionReturn(0);
1925 }
1926 
1927 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
1928 
1929 #undef __FUNCT__
1930 #define __FUNCT__ "MatEqual_MPIBAIJ"
1931 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1932 {
1933   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1934   Mat            a,b,c,d;
1935   PetscBool      flg;
1936   PetscErrorCode ierr;
1937 
1938   PetscFunctionBegin;
1939   a = matA->A; b = matA->B;
1940   c = matB->A; d = matB->B;
1941 
1942   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1943   if (flg) {
1944     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1945   }
1946   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1947   PetscFunctionReturn(0);
1948 }
1949 
1950 #undef __FUNCT__
1951 #define __FUNCT__ "MatCopy_MPIBAIJ"
1952 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1953 {
1954   PetscErrorCode ierr;
1955   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1956   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
1957 
1958   PetscFunctionBegin;
1959   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1960   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1961     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1962   } else {
1963     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1964     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1965   }
1966   PetscFunctionReturn(0);
1967 }
1968 
1969 #undef __FUNCT__
1970 #define __FUNCT__ "MatSetUp_MPIBAIJ"
1971 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1972 {
1973   PetscErrorCode ierr;
1974 
1975   PetscFunctionBegin;
1976   ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1977   PetscFunctionReturn(0);
1978 }
1979 
1980 #undef __FUNCT__
1981 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1982 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1983 {
1984   PetscErrorCode ierr;
1985   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
1986   PetscBLASInt   bnz,one=1;
1987   Mat_SeqBAIJ    *x,*y;
1988 
1989   PetscFunctionBegin;
1990   if (str == SAME_NONZERO_PATTERN) {
1991     PetscScalar alpha = a;
1992     x    = (Mat_SeqBAIJ*)xx->A->data;
1993     y    = (Mat_SeqBAIJ*)yy->A->data;
1994     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
1995     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1996     x    = (Mat_SeqBAIJ*)xx->B->data;
1997     y    = (Mat_SeqBAIJ*)yy->B->data;
1998     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
1999     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2000     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2001   } else {
2002     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2003   }
2004   PetscFunctionReturn(0);
2005 }
2006 
2007 #undef __FUNCT__
2008 #define __FUNCT__ "MatRealPart_MPIBAIJ"
2009 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
2010 {
2011   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2012   PetscErrorCode ierr;
2013 
2014   PetscFunctionBegin;
2015   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2016   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2017   PetscFunctionReturn(0);
2018 }
2019 
2020 #undef __FUNCT__
2021 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
2022 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2023 {
2024   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2025   PetscErrorCode ierr;
2026 
2027   PetscFunctionBegin;
2028   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2029   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2030   PetscFunctionReturn(0);
2031 }
2032 
2033 #undef __FUNCT__
2034 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
2035 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2036 {
2037   PetscErrorCode ierr;
2038   IS             iscol_local;
2039   PetscInt       csize;
2040 
2041   PetscFunctionBegin;
2042   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2043   if (call == MAT_REUSE_MATRIX) {
2044     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2045     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2046   } else {
2047     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2048   }
2049   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2050   if (call == MAT_INITIAL_MATRIX) {
2051     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2052     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2053   }
2054   PetscFunctionReturn(0);
2055 }
2056 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*);
2057 #undef __FUNCT__
2058 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
2059 /*
2060   Not great since it makes two copies of the submatrix, first an SeqBAIJ
2061   in local and then by concatenating the local matrices the end result.
2062   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
2063 */
2064 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2065 {
2066   PetscErrorCode ierr;
2067   PetscMPIInt    rank,size;
2068   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2069   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow;
2070   Mat            M,Mreuse;
2071   MatScalar      *vwork,*aa;
2072   MPI_Comm       comm;
2073   IS             isrow_new, iscol_new;
2074   PetscBool      idflag,allrows, allcols;
2075   Mat_SeqBAIJ    *aij;
2076 
2077   PetscFunctionBegin;
2078   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2079   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2080   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2081   /* The compression and expansion should be avoided. Doesn't point
2082      out errors, might change the indices, hence buggey */
2083   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
2084   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
2085 
2086   /* Check for special case: each processor gets entire matrix columns */
2087   ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr);
2088   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
2089   if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE;
2090   else allcols = PETSC_FALSE;
2091 
2092   ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr);
2093   ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr);
2094   if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE;
2095   else allrows = PETSC_FALSE;
2096 
2097   if (call ==  MAT_REUSE_MATRIX) {
2098     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
2099     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2100     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2101   } else {
2102     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2103   }
2104   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
2105   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
2106   /*
2107       m - number of local rows
2108       n - number of columns (same on all processors)
2109       rstart - first row in new global matrix generated
2110   */
2111   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2112   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2113   m    = m/bs;
2114   n    = n/bs;
2115 
2116   if (call == MAT_INITIAL_MATRIX) {
2117     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2118     ii  = aij->i;
2119     jj  = aij->j;
2120 
2121     /*
2122         Determine the number of non-zeros in the diagonal and off-diagonal
2123         portions of the matrix in order to do correct preallocation
2124     */
2125 
2126     /* first get start and end of "diagonal" columns */
2127     if (csize == PETSC_DECIDE) {
2128       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2129       if (mglobal == n*bs) { /* square matrix */
2130         nlocal = m;
2131       } else {
2132         nlocal = n/size + ((n % size) > rank);
2133       }
2134     } else {
2135       nlocal = csize/bs;
2136     }
2137     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2138     rstart = rend - nlocal;
2139     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2140 
2141     /* next, compute all the lengths */
2142     ierr  = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr);
2143     for (i=0; i<m; i++) {
2144       jend = ii[i+1] - ii[i];
2145       olen = 0;
2146       dlen = 0;
2147       for (j=0; j<jend; j++) {
2148         if (*jj < rstart || *jj >= rend) olen++;
2149         else dlen++;
2150         jj++;
2151       }
2152       olens[i] = olen;
2153       dlens[i] = dlen;
2154     }
2155     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2156     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2157     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2158     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2159     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
2160   } else {
2161     PetscInt ml,nl;
2162 
2163     M    = *newmat;
2164     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2165     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2166     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2167     /*
2168          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2169        rather than the slower MatSetValues().
2170     */
2171     M->was_assembled = PETSC_TRUE;
2172     M->assembled     = PETSC_FALSE;
2173   }
2174   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2175   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2176   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
2177   ii   = aij->i;
2178   jj   = aij->j;
2179   aa   = aij->a;
2180   for (i=0; i<m; i++) {
2181     row   = rstart/bs + i;
2182     nz    = ii[i+1] - ii[i];
2183     cwork = jj;     jj += nz;
2184     vwork = aa;     aa += nz*bs*bs;
2185     ierr  = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2186   }
2187 
2188   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2189   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2190   *newmat = M;
2191 
2192   /* save submatrix used in processor for next request */
2193   if (call ==  MAT_INITIAL_MATRIX) {
2194     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2195     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2196   }
2197   PetscFunctionReturn(0);
2198 }
2199 
2200 #undef __FUNCT__
2201 #define __FUNCT__ "MatPermute_MPIBAIJ"
2202 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2203 {
2204   MPI_Comm       comm,pcomm;
2205   PetscInt       clocal_size,nrows;
2206   const PetscInt *rows;
2207   PetscMPIInt    size;
2208   IS             crowp,lcolp;
2209   PetscErrorCode ierr;
2210 
2211   PetscFunctionBegin;
2212   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2213   /* make a collective version of 'rowp' */
2214   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2215   if (pcomm==comm) {
2216     crowp = rowp;
2217   } else {
2218     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2219     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2220     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2221     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2222   }
2223   ierr = ISSetPermutation(crowp);CHKERRQ(ierr);
2224   /* make a local version of 'colp' */
2225   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2226   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2227   if (size==1) {
2228     lcolp = colp;
2229   } else {
2230     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2231   }
2232   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2233   /* now we just get the submatrix */
2234   ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr);
2235   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2236   /* clean up */
2237   if (pcomm!=comm) {
2238     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2239   }
2240   if (size>1) {
2241     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2242   }
2243   PetscFunctionReturn(0);
2244 }
2245 
2246 #undef __FUNCT__
2247 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2248 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2249 {
2250   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2251   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
2252 
2253   PetscFunctionBegin;
2254   if (nghosts) *nghosts = B->nbs;
2255   if (ghosts) *ghosts = baij->garray;
2256   PetscFunctionReturn(0);
2257 }
2258 
2259 #undef __FUNCT__
2260 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ"
2261 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2262 {
2263   Mat            B;
2264   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2265   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2266   Mat_SeqAIJ     *b;
2267   PetscErrorCode ierr;
2268   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2269   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2270   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2271 
2272   PetscFunctionBegin;
2273   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2274   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2275 
2276   /* ----------------------------------------------------------------
2277      Tell every processor the number of nonzeros per row
2278   */
2279   ierr = PetscMalloc1((A->rmap->N/bs),&lens);CHKERRQ(ierr);
2280   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2281     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2282   }
2283   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2284   ierr      = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr);
2285   displs    = recvcounts + size;
2286   for (i=0; i<size; i++) {
2287     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2288     displs[i]     = A->rmap->range[i]/bs;
2289   }
2290 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2291   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2292 #else
2293   ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2294 #endif
2295   /* ---------------------------------------------------------------
2296      Create the sequential matrix of the same type as the local block diagonal
2297   */
2298   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2299   ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2300   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2301   ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2302   b    = (Mat_SeqAIJ*)B->data;
2303 
2304   /*--------------------------------------------------------------------
2305     Copy my part of matrix column indices over
2306   */
2307   sendcount  = ad->nz + bd->nz;
2308   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2309   a_jsendbuf = ad->j;
2310   b_jsendbuf = bd->j;
2311   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2312   cnt        = 0;
2313   for (i=0; i<n; i++) {
2314 
2315     /* put in lower diagonal portion */
2316     m = bd->i[i+1] - bd->i[i];
2317     while (m > 0) {
2318       /* is it above diagonal (in bd (compressed) numbering) */
2319       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2320       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2321       m--;
2322     }
2323 
2324     /* put in diagonal portion */
2325     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2326       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2327     }
2328 
2329     /* put in upper diagonal portion */
2330     while (m-- > 0) {
2331       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2332     }
2333   }
2334   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2335 
2336   /*--------------------------------------------------------------------
2337     Gather all column indices to all processors
2338   */
2339   for (i=0; i<size; i++) {
2340     recvcounts[i] = 0;
2341     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2342       recvcounts[i] += lens[j];
2343     }
2344   }
2345   displs[0] = 0;
2346   for (i=1; i<size; i++) {
2347     displs[i] = displs[i-1] + recvcounts[i-1];
2348   }
2349 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2350   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2351 #else
2352   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2353 #endif
2354   /*--------------------------------------------------------------------
2355     Assemble the matrix into useable form (note numerical values not yet set)
2356   */
2357   /* set the b->ilen (length of each row) values */
2358   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2359   /* set the b->i indices */
2360   b->i[0] = 0;
2361   for (i=1; i<=A->rmap->N/bs; i++) {
2362     b->i[i] = b->i[i-1] + lens[i-1];
2363   }
2364   ierr = PetscFree(lens);CHKERRQ(ierr);
2365   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2366   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2367   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2368 
2369   if (A->symmetric) {
2370     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2371   } else if (A->hermitian) {
2372     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2373   } else if (A->structurally_symmetric) {
2374     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2375   }
2376   *newmat = B;
2377   PetscFunctionReturn(0);
2378 }
2379 
2380 #undef __FUNCT__
2381 #define __FUNCT__ "MatSOR_MPIBAIJ"
2382 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2383 {
2384   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2385   PetscErrorCode ierr;
2386   Vec            bb1 = 0;
2387 
2388   PetscFunctionBegin;
2389   if (flag == SOR_APPLY_UPPER) {
2390     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2391     PetscFunctionReturn(0);
2392   }
2393 
2394   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2395     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2396   }
2397 
2398   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2399     if (flag & SOR_ZERO_INITIAL_GUESS) {
2400       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2401       its--;
2402     }
2403 
2404     while (its--) {
2405       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2406       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2407 
2408       /* update rhs: bb1 = bb - B*x */
2409       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2410       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2411 
2412       /* local sweep */
2413       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2414     }
2415   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2416     if (flag & SOR_ZERO_INITIAL_GUESS) {
2417       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2418       its--;
2419     }
2420     while (its--) {
2421       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2422       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2423 
2424       /* update rhs: bb1 = bb - B*x */
2425       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2426       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2427 
2428       /* local sweep */
2429       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2430     }
2431   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2432     if (flag & SOR_ZERO_INITIAL_GUESS) {
2433       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2434       its--;
2435     }
2436     while (its--) {
2437       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2438       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2439 
2440       /* update rhs: bb1 = bb - B*x */
2441       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2442       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2443 
2444       /* local sweep */
2445       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2446     }
2447   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2448 
2449   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2450   PetscFunctionReturn(0);
2451 }
2452 
2453 #undef __FUNCT__
2454 #define __FUNCT__ "MatGetColumnNorms_MPIBAIJ"
2455 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms)
2456 {
2457   PetscErrorCode ierr;
2458   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)A->data;
2459   PetscInt       N,i,*garray = aij->garray;
2460   PetscInt       ib,jb,bs = A->rmap->bs;
2461   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) aij->A->data;
2462   MatScalar      *a_val = a_aij->a;
2463   Mat_SeqBAIJ    *b_aij = (Mat_SeqBAIJ*) aij->B->data;
2464   MatScalar      *b_val = b_aij->a;
2465   PetscReal      *work;
2466 
2467   PetscFunctionBegin;
2468   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
2469   ierr = PetscCalloc1(N,&work);CHKERRQ(ierr);
2470   if (type == NORM_2) {
2471     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2472       for (jb=0; jb<bs; jb++) {
2473         for (ib=0; ib<bs; ib++) {
2474           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2475           a_val++;
2476         }
2477       }
2478     }
2479     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2480       for (jb=0; jb<bs; jb++) {
2481         for (ib=0; ib<bs; ib++) {
2482           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2483           b_val++;
2484         }
2485       }
2486     }
2487   } else if (type == NORM_1) {
2488     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2489       for (jb=0; jb<bs; jb++) {
2490         for (ib=0; ib<bs; ib++) {
2491           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2492           a_val++;
2493         }
2494       }
2495     }
2496     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2497       for (jb=0; jb<bs; jb++) {
2498        for (ib=0; ib<bs; ib++) {
2499           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2500           b_val++;
2501         }
2502       }
2503     }
2504   } else if (type == NORM_INFINITY) {
2505     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2506       for (jb=0; jb<bs; jb++) {
2507         for (ib=0; ib<bs; ib++) {
2508           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2509           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2510           a_val++;
2511         }
2512       }
2513     }
2514     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2515       for (jb=0; jb<bs; jb++) {
2516         for (ib=0; ib<bs; ib++) {
2517           int col = garray[b_aij->j[i]] * bs + jb;
2518           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2519           b_val++;
2520         }
2521       }
2522     }
2523   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
2524   if (type == NORM_INFINITY) {
2525     ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2526   } else {
2527     ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2528   }
2529   ierr = PetscFree(work);CHKERRQ(ierr);
2530   if (type == NORM_2) {
2531     for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]);
2532   }
2533   PetscFunctionReturn(0);
2534 }
2535 
2536 #undef __FUNCT__
2537 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ"
2538 PetscErrorCode  MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2539 {
2540   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2541   PetscErrorCode ierr;
2542 
2543   PetscFunctionBegin;
2544   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2545   PetscFunctionReturn(0);
2546 }
2547 
2548 
2549 /* -------------------------------------------------------------------*/
2550 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2551                                        MatGetRow_MPIBAIJ,
2552                                        MatRestoreRow_MPIBAIJ,
2553                                        MatMult_MPIBAIJ,
2554                                 /* 4*/ MatMultAdd_MPIBAIJ,
2555                                        MatMultTranspose_MPIBAIJ,
2556                                        MatMultTransposeAdd_MPIBAIJ,
2557                                        0,
2558                                        0,
2559                                        0,
2560                                 /*10*/ 0,
2561                                        0,
2562                                        0,
2563                                        MatSOR_MPIBAIJ,
2564                                        MatTranspose_MPIBAIJ,
2565                                 /*15*/ MatGetInfo_MPIBAIJ,
2566                                        MatEqual_MPIBAIJ,
2567                                        MatGetDiagonal_MPIBAIJ,
2568                                        MatDiagonalScale_MPIBAIJ,
2569                                        MatNorm_MPIBAIJ,
2570                                 /*20*/ MatAssemblyBegin_MPIBAIJ,
2571                                        MatAssemblyEnd_MPIBAIJ,
2572                                        MatSetOption_MPIBAIJ,
2573                                        MatZeroEntries_MPIBAIJ,
2574                                 /*24*/ MatZeroRows_MPIBAIJ,
2575                                        0,
2576                                        0,
2577                                        0,
2578                                        0,
2579                                 /*29*/ MatSetUp_MPIBAIJ,
2580                                        0,
2581                                        0,
2582                                        0,
2583                                        0,
2584                                 /*34*/ MatDuplicate_MPIBAIJ,
2585                                        0,
2586                                        0,
2587                                        0,
2588                                        0,
2589                                 /*39*/ MatAXPY_MPIBAIJ,
2590                                        MatGetSubMatrices_MPIBAIJ,
2591                                        MatIncreaseOverlap_MPIBAIJ,
2592                                        MatGetValues_MPIBAIJ,
2593                                        MatCopy_MPIBAIJ,
2594                                 /*44*/ 0,
2595                                        MatScale_MPIBAIJ,
2596                                        0,
2597                                        0,
2598                                        MatZeroRowsColumns_MPIBAIJ,
2599                                 /*49*/ 0,
2600                                        0,
2601                                        0,
2602                                        0,
2603                                        0,
2604                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2605                                        0,
2606                                        MatSetUnfactored_MPIBAIJ,
2607                                        MatPermute_MPIBAIJ,
2608                                        MatSetValuesBlocked_MPIBAIJ,
2609                                 /*59*/ MatGetSubMatrix_MPIBAIJ,
2610                                        MatDestroy_MPIBAIJ,
2611                                        MatView_MPIBAIJ,
2612                                        0,
2613                                        0,
2614                                 /*64*/ 0,
2615                                        0,
2616                                        0,
2617                                        0,
2618                                        0,
2619                                 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2620                                        0,
2621                                        0,
2622                                        0,
2623                                        0,
2624                                 /*74*/ 0,
2625                                        MatFDColoringApply_BAIJ,
2626                                        0,
2627                                        0,
2628                                        0,
2629                                 /*79*/ 0,
2630                                        0,
2631                                        0,
2632                                        0,
2633                                        MatLoad_MPIBAIJ,
2634                                 /*84*/ 0,
2635                                        0,
2636                                        0,
2637                                        0,
2638                                        0,
2639                                 /*89*/ 0,
2640                                        0,
2641                                        0,
2642                                        0,
2643                                        0,
2644                                 /*94*/ 0,
2645                                        0,
2646                                        0,
2647                                        0,
2648                                        0,
2649                                 /*99*/ 0,
2650                                        0,
2651                                        0,
2652                                        0,
2653                                        0,
2654                                 /*104*/0,
2655                                        MatRealPart_MPIBAIJ,
2656                                        MatImaginaryPart_MPIBAIJ,
2657                                        0,
2658                                        0,
2659                                 /*109*/0,
2660                                        0,
2661                                        0,
2662                                        0,
2663                                        0,
2664                                 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2665                                        0,
2666                                        MatGetGhosts_MPIBAIJ,
2667                                        0,
2668                                        0,
2669                                 /*119*/0,
2670                                        0,
2671                                        0,
2672                                        0,
2673                                        MatGetMultiProcBlock_MPIBAIJ,
2674                                 /*124*/0,
2675                                        MatGetColumnNorms_MPIBAIJ,
2676                                        MatInvertBlockDiagonal_MPIBAIJ,
2677                                        0,
2678                                        0,
2679                                /*129*/ 0,
2680                                        0,
2681                                        0,
2682                                        0,
2683                                        0,
2684                                /*134*/ 0,
2685                                        0,
2686                                        0,
2687                                        0,
2688                                        0,
2689                                /*139*/ 0,
2690                                        0,
2691                                        0,
2692                                        MatFDColoringSetUp_MPIXAIJ
2693 };
2694 
2695 #undef __FUNCT__
2696 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2697 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2698 {
2699   PetscFunctionBegin;
2700   *a = ((Mat_MPIBAIJ*)A->data)->A;
2701   PetscFunctionReturn(0);
2702 }
2703 
2704 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2705 
2706 #undef __FUNCT__
2707 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2708 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2709 {
2710   PetscInt       m,rstart,cstart,cend;
2711   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2712   const PetscInt *JJ    =0;
2713   PetscScalar    *values=0;
2714   PetscBool      roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2715   PetscErrorCode ierr;
2716 
2717   PetscFunctionBegin;
2718   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2719   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2720   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2721   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2722   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2723   m      = B->rmap->n/bs;
2724   rstart = B->rmap->rstart/bs;
2725   cstart = B->cmap->rstart/bs;
2726   cend   = B->cmap->rend/bs;
2727 
2728   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2729   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2730   for (i=0; i<m; i++) {
2731     nz = ii[i+1] - ii[i];
2732     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2733     nz_max = PetscMax(nz_max,nz);
2734     JJ     = jj + ii[i];
2735     for (j=0; j<nz; j++) {
2736       if (*JJ >= cstart) break;
2737       JJ++;
2738     }
2739     d = 0;
2740     for (; j<nz; j++) {
2741       if (*JJ++ >= cend) break;
2742       d++;
2743     }
2744     d_nnz[i] = d;
2745     o_nnz[i] = nz - d;
2746   }
2747   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2748   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2749 
2750   values = (PetscScalar*)V;
2751   if (!values) {
2752     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2753     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2754   }
2755   for (i=0; i<m; i++) {
2756     PetscInt          row    = i + rstart;
2757     PetscInt          ncols  = ii[i+1] - ii[i];
2758     const PetscInt    *icols = jj + ii[i];
2759     if (!roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2760       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2761       ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2762     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2763       PetscInt j;
2764       for (j=0; j<ncols; j++) {
2765         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2766         ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2767       }
2768     }
2769   }
2770 
2771   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2772   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2773   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2774   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2775   PetscFunctionReturn(0);
2776 }
2777 
2778 #undef __FUNCT__
2779 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2780 /*@C
2781    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2782    (the default parallel PETSc format).
2783 
2784    Collective on MPI_Comm
2785 
2786    Input Parameters:
2787 +  A - the matrix
2788 .  bs - the block size
2789 .  i - the indices into j for the start of each local row (starts with zero)
2790 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2791 -  v - optional values in the matrix
2792 
2793    Level: developer
2794 
2795    Notes: The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
2796    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
2797    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2798    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2799    block column and the second index is over columns within a block.
2800 
2801 .keywords: matrix, aij, compressed row, sparse, parallel
2802 
2803 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ
2804 @*/
2805 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2806 {
2807   PetscErrorCode ierr;
2808 
2809   PetscFunctionBegin;
2810   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2811   PetscValidType(B,1);
2812   PetscValidLogicalCollectiveInt(B,bs,2);
2813   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2814   PetscFunctionReturn(0);
2815 }
2816 
2817 #undef __FUNCT__
2818 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2819 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2820 {
2821   Mat_MPIBAIJ    *b;
2822   PetscErrorCode ierr;
2823   PetscInt       i;
2824 
2825   PetscFunctionBegin;
2826   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2827   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2828   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2829   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2830 
2831   if (d_nnz) {
2832     for (i=0; i<B->rmap->n/bs; i++) {
2833       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2834     }
2835   }
2836   if (o_nnz) {
2837     for (i=0; i<B->rmap->n/bs; i++) {
2838       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2839     }
2840   }
2841 
2842   b      = (Mat_MPIBAIJ*)B->data;
2843   b->bs2 = bs*bs;
2844   b->mbs = B->rmap->n/bs;
2845   b->nbs = B->cmap->n/bs;
2846   b->Mbs = B->rmap->N/bs;
2847   b->Nbs = B->cmap->N/bs;
2848 
2849   for (i=0; i<=b->size; i++) {
2850     b->rangebs[i] = B->rmap->range[i]/bs;
2851   }
2852   b->rstartbs = B->rmap->rstart/bs;
2853   b->rendbs   = B->rmap->rend/bs;
2854   b->cstartbs = B->cmap->rstart/bs;
2855   b->cendbs   = B->cmap->rend/bs;
2856 
2857   if (!B->preallocated) {
2858     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2859     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2860     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2861     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2862     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2863     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2864     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2865     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2866     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2867   }
2868 
2869   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2870   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2871   B->preallocated = PETSC_TRUE;
2872   PetscFunctionReturn(0);
2873 }
2874 
2875 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2876 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2877 
2878 #undef __FUNCT__
2879 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
2880 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
2881 {
2882   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2883   PetscErrorCode ierr;
2884   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2885   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2886   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2887 
2888   PetscFunctionBegin;
2889   ierr  = PetscMalloc1((M+1),&ii);CHKERRQ(ierr);
2890   ii[0] = 0;
2891   for (i=0; i<M; i++) {
2892     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2893     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
2894     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2895     /* remove one from count of matrix has diagonal */
2896     for (j=id[i]; j<id[i+1]; j++) {
2897       if (jd[j] == i) {ii[i+1]--;break;}
2898     }
2899   }
2900   ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr);
2901   cnt  = 0;
2902   for (i=0; i<M; i++) {
2903     for (j=io[i]; j<io[i+1]; j++) {
2904       if (garray[jo[j]] > rstart) break;
2905       jj[cnt++] = garray[jo[j]];
2906     }
2907     for (k=id[i]; k<id[i+1]; k++) {
2908       if (jd[k] != i) {
2909         jj[cnt++] = rstart + jd[k];
2910       }
2911     }
2912     for (; j<io[i+1]; j++) {
2913       jj[cnt++] = garray[jo[j]];
2914     }
2915   }
2916   ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr);
2917   PetscFunctionReturn(0);
2918 }
2919 
2920 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2921 
2922 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
2923 
2924 #undef __FUNCT__
2925 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ"
2926 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
2927 {
2928   PetscErrorCode ierr;
2929   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2930   Mat            B;
2931   Mat_MPIAIJ     *b;
2932 
2933   PetscFunctionBegin;
2934   if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
2935 
2936   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2937   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2938   ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2939   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
2940   ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
2941   b    = (Mat_MPIAIJ*) B->data;
2942 
2943   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
2944   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2945   ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
2946   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
2947   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
2948   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2949   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2950   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2951   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2952   if (reuse == MAT_REUSE_MATRIX) {
2953     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
2954   } else {
2955    *newmat = B;
2956   }
2957   PetscFunctionReturn(0);
2958 }
2959 
2960 #if defined(PETSC_HAVE_MUMPS)
2961 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
2962 #endif
2963 
2964 /*MC
2965    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2966 
2967    Options Database Keys:
2968 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2969 . -mat_block_size <bs> - set the blocksize used to store the matrix
2970 - -mat_use_hash_table <fact>
2971 
2972   Level: beginner
2973 
2974 .seealso: MatCreateMPIBAIJ
2975 M*/
2976 
2977 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
2978 
2979 #undef __FUNCT__
2980 #define __FUNCT__ "MatCreate_MPIBAIJ"
2981 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2982 {
2983   Mat_MPIBAIJ    *b;
2984   PetscErrorCode ierr;
2985   PetscBool      flg;
2986 
2987   PetscFunctionBegin;
2988   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2989   B->data = (void*)b;
2990 
2991   ierr         = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2992   B->assembled = PETSC_FALSE;
2993 
2994   B->insertmode = NOT_SET_VALUES;
2995   ierr          = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
2996   ierr          = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
2997 
2998   /* build local table of row and column ownerships */
2999   ierr = PetscMalloc1((b->size+1),&b->rangebs);CHKERRQ(ierr);
3000 
3001   /* build cache for off array entries formed */
3002   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
3003 
3004   b->donotstash  = PETSC_FALSE;
3005   b->colmap      = NULL;
3006   b->garray      = NULL;
3007   b->roworiented = PETSC_TRUE;
3008 
3009   /* stuff used in block assembly */
3010   b->barray = 0;
3011 
3012   /* stuff used for matrix vector multiply */
3013   b->lvec  = 0;
3014   b->Mvctx = 0;
3015 
3016   /* stuff for MatGetRow() */
3017   b->rowindices   = 0;
3018   b->rowvalues    = 0;
3019   b->getrowactive = PETSC_FALSE;
3020 
3021   /* hash table stuff */
3022   b->ht           = 0;
3023   b->hd           = 0;
3024   b->ht_size      = 0;
3025   b->ht_flag      = PETSC_FALSE;
3026   b->ht_fact      = 0;
3027   b->ht_total_ct  = 0;
3028   b->ht_insert_ct = 0;
3029 
3030   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
3031   b->ijonly = PETSC_FALSE;
3032 
3033   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3034   ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr);
3035   if (flg) {
3036     PetscReal fact = 1.39;
3037     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3038     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
3039     if (fact <= 1.0) fact = 1.39;
3040     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3041     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3042   }
3043   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3044 
3045 #if defined(PETSC_HAVE_MUMPS)
3046   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_baij_mumps);CHKERRQ(ierr);
3047 #endif
3048   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3049   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3050   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3051   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3052   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3053   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3054   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3055   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3056   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3057   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3058   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr);
3059   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3060   PetscFunctionReturn(0);
3061 }
3062 
3063 /*MC
3064    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3065 
3066    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3067    and MATMPIBAIJ otherwise.
3068 
3069    Options Database Keys:
3070 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3071 
3072   Level: beginner
3073 
3074 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3075 M*/
3076 
3077 #undef __FUNCT__
3078 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3079 /*@C
3080    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3081    (block compressed row).  For good matrix assembly performance
3082    the user should preallocate the matrix storage by setting the parameters
3083    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3084    performance can be increased by more than a factor of 50.
3085 
3086    Collective on Mat
3087 
3088    Input Parameters:
3089 +  A - the matrix
3090 .  bs   - size of block
3091 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3092            submatrix  (same for all local rows)
3093 .  d_nnz - array containing the number of block nonzeros in the various block rows
3094            of the in diagonal portion of the local (possibly different for each block
3095            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3096            set it even if it is zero.
3097 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3098            submatrix (same for all local rows).
3099 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3100            off-diagonal portion of the local submatrix (possibly different for
3101            each block row) or NULL.
3102 
3103    If the *_nnz parameter is given then the *_nz parameter is ignored
3104 
3105    Options Database Keys:
3106 +   -mat_block_size - size of the blocks to use
3107 -   -mat_use_hash_table <fact>
3108 
3109    Notes:
3110    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3111    than it must be used on all processors that share the object for that argument.
3112 
3113    Storage Information:
3114    For a square global matrix we define each processor's diagonal portion
3115    to be its local rows and the corresponding columns (a square submatrix);
3116    each processor's off-diagonal portion encompasses the remainder of the
3117    local matrix (a rectangular submatrix).
3118 
3119    The user can specify preallocated storage for the diagonal part of
3120    the local submatrix with either d_nz or d_nnz (not both).  Set
3121    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3122    memory allocation.  Likewise, specify preallocated storage for the
3123    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3124 
3125    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3126    the figure below we depict these three local rows and all columns (0-11).
3127 
3128 .vb
3129            0 1 2 3 4 5 6 7 8 9 10 11
3130           --------------------------
3131    row 3  |o o o d d d o o o o  o  o
3132    row 4  |o o o d d d o o o o  o  o
3133    row 5  |o o o d d d o o o o  o  o
3134           --------------------------
3135 .ve
3136 
3137    Thus, any entries in the d locations are stored in the d (diagonal)
3138    submatrix, and any entries in the o locations are stored in the
3139    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3140    stored simply in the MATSEQBAIJ format for compressed row storage.
3141 
3142    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3143    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3144    In general, for PDE problems in which most nonzeros are near the diagonal,
3145    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3146    or you will get TERRIBLE performance; see the users' manual chapter on
3147    matrices.
3148 
3149    You can call MatGetInfo() to get information on how effective the preallocation was;
3150    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3151    You can also run with the option -info and look for messages with the string
3152    malloc in them to see if additional memory allocation was needed.
3153 
3154    Level: intermediate
3155 
3156 .keywords: matrix, block, aij, compressed row, sparse, parallel
3157 
3158 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3159 @*/
3160 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3161 {
3162   PetscErrorCode ierr;
3163 
3164   PetscFunctionBegin;
3165   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3166   PetscValidType(B,1);
3167   PetscValidLogicalCollectiveInt(B,bs,2);
3168   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3169   PetscFunctionReturn(0);
3170 }
3171 
3172 #undef __FUNCT__
3173 #define __FUNCT__ "MatCreateBAIJ"
3174 /*@C
3175    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3176    (block compressed row).  For good matrix assembly performance
3177    the user should preallocate the matrix storage by setting the parameters
3178    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3179    performance can be increased by more than a factor of 50.
3180 
3181    Collective on MPI_Comm
3182 
3183    Input Parameters:
3184 +  comm - MPI communicator
3185 .  bs   - size of blockk
3186 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3187            This value should be the same as the local size used in creating the
3188            y vector for the matrix-vector product y = Ax.
3189 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3190            This value should be the same as the local size used in creating the
3191            x vector for the matrix-vector product y = Ax.
3192 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3193 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3194 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3195            submatrix  (same for all local rows)
3196 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3197            of the in diagonal portion of the local (possibly different for each block
3198            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3199            and set it even if it is zero.
3200 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3201            submatrix (same for all local rows).
3202 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3203            off-diagonal portion of the local submatrix (possibly different for
3204            each block row) or NULL.
3205 
3206    Output Parameter:
3207 .  A - the matrix
3208 
3209    Options Database Keys:
3210 +   -mat_block_size - size of the blocks to use
3211 -   -mat_use_hash_table <fact>
3212 
3213    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3214    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3215    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3216 
3217    Notes:
3218    If the *_nnz parameter is given then the *_nz parameter is ignored
3219 
3220    A nonzero block is any block that as 1 or more nonzeros in it
3221 
3222    The user MUST specify either the local or global matrix dimensions
3223    (possibly both).
3224 
3225    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3226    than it must be used on all processors that share the object for that argument.
3227 
3228    Storage Information:
3229    For a square global matrix we define each processor's diagonal portion
3230    to be its local rows and the corresponding columns (a square submatrix);
3231    each processor's off-diagonal portion encompasses the remainder of the
3232    local matrix (a rectangular submatrix).
3233 
3234    The user can specify preallocated storage for the diagonal part of
3235    the local submatrix with either d_nz or d_nnz (not both).  Set
3236    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3237    memory allocation.  Likewise, specify preallocated storage for the
3238    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3239 
3240    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3241    the figure below we depict these three local rows and all columns (0-11).
3242 
3243 .vb
3244            0 1 2 3 4 5 6 7 8 9 10 11
3245           --------------------------
3246    row 3  |o o o d d d o o o o  o  o
3247    row 4  |o o o d d d o o o o  o  o
3248    row 5  |o o o d d d o o o o  o  o
3249           --------------------------
3250 .ve
3251 
3252    Thus, any entries in the d locations are stored in the d (diagonal)
3253    submatrix, and any entries in the o locations are stored in the
3254    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3255    stored simply in the MATSEQBAIJ format for compressed row storage.
3256 
3257    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3258    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3259    In general, for PDE problems in which most nonzeros are near the diagonal,
3260    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3261    or you will get TERRIBLE performance; see the users' manual chapter on
3262    matrices.
3263 
3264    Level: intermediate
3265 
3266 .keywords: matrix, block, aij, compressed row, sparse, parallel
3267 
3268 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3269 @*/
3270 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3271 {
3272   PetscErrorCode ierr;
3273   PetscMPIInt    size;
3274 
3275   PetscFunctionBegin;
3276   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3277   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3278   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3279   if (size > 1) {
3280     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3281     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3282   } else {
3283     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3284     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3285   }
3286   PetscFunctionReturn(0);
3287 }
3288 
3289 #undef __FUNCT__
3290 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3291 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3292 {
3293   Mat            mat;
3294   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3295   PetscErrorCode ierr;
3296   PetscInt       len=0;
3297 
3298   PetscFunctionBegin;
3299   *newmat = 0;
3300   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3301   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3302   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3303   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3304 
3305   mat->factortype   = matin->factortype;
3306   mat->preallocated = PETSC_TRUE;
3307   mat->assembled    = PETSC_TRUE;
3308   mat->insertmode   = NOT_SET_VALUES;
3309 
3310   a             = (Mat_MPIBAIJ*)mat->data;
3311   mat->rmap->bs = matin->rmap->bs;
3312   a->bs2        = oldmat->bs2;
3313   a->mbs        = oldmat->mbs;
3314   a->nbs        = oldmat->nbs;
3315   a->Mbs        = oldmat->Mbs;
3316   a->Nbs        = oldmat->Nbs;
3317 
3318   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3319   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3320 
3321   a->size         = oldmat->size;
3322   a->rank         = oldmat->rank;
3323   a->donotstash   = oldmat->donotstash;
3324   a->roworiented  = oldmat->roworiented;
3325   a->rowindices   = 0;
3326   a->rowvalues    = 0;
3327   a->getrowactive = PETSC_FALSE;
3328   a->barray       = 0;
3329   a->rstartbs     = oldmat->rstartbs;
3330   a->rendbs       = oldmat->rendbs;
3331   a->cstartbs     = oldmat->cstartbs;
3332   a->cendbs       = oldmat->cendbs;
3333 
3334   /* hash table stuff */
3335   a->ht           = 0;
3336   a->hd           = 0;
3337   a->ht_size      = 0;
3338   a->ht_flag      = oldmat->ht_flag;
3339   a->ht_fact      = oldmat->ht_fact;
3340   a->ht_total_ct  = 0;
3341   a->ht_insert_ct = 0;
3342 
3343   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3344   if (oldmat->colmap) {
3345 #if defined(PETSC_USE_CTABLE)
3346     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3347 #else
3348     ierr = PetscMalloc1((a->Nbs),&a->colmap);CHKERRQ(ierr);
3349     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3350     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3351 #endif
3352   } else a->colmap = 0;
3353 
3354   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3355     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
3356     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3357     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3358   } else a->garray = 0;
3359 
3360   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3361   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3362   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3363   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3364   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3365 
3366   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3367   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3368   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3369   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3370   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3371   *newmat = mat;
3372   PetscFunctionReturn(0);
3373 }
3374 
3375 #undef __FUNCT__
3376 #define __FUNCT__ "MatLoad_MPIBAIJ"
3377 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3378 {
3379   PetscErrorCode ierr;
3380   int            fd;
3381   PetscInt       i,nz,j,rstart,rend;
3382   PetscScalar    *vals,*buf;
3383   MPI_Comm       comm;
3384   MPI_Status     status;
3385   PetscMPIInt    rank,size,maxnz;
3386   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3387   PetscInt       *locrowlens = NULL,*procsnz = NULL,*browners = NULL;
3388   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3389   PetscMPIInt    tag    = ((PetscObject)viewer)->tag;
3390   PetscInt       *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount;
3391   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3392 
3393   PetscFunctionBegin;
3394   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3395   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3396   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3397   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3398 
3399   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3400   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3401   if (!rank) {
3402     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3403     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3404     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3405   }
3406 
3407   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3408 
3409   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3410   M    = header[1]; N = header[2];
3411 
3412   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3413   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3414   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3415 
3416   /* If global sizes are set, check if they are consistent with that given in the file */
3417   if (sizesset) {
3418     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3419   }
3420   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3421   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3422 
3423   if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices");
3424 
3425   /*
3426      This code adds extra rows to make sure the number of rows is
3427      divisible by the blocksize
3428   */
3429   Mbs        = M/bs;
3430   extra_rows = bs - M + bs*Mbs;
3431   if (extra_rows == bs) extra_rows = 0;
3432   else                  Mbs++;
3433   if (extra_rows && !rank) {
3434     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3435   }
3436 
3437   /* determine ownership of all rows */
3438   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3439     mbs = Mbs/size + ((Mbs % size) > rank);
3440     m   = mbs*bs;
3441   } else { /* User set */
3442     m   = newmat->rmap->n;
3443     mbs = m/bs;
3444   }
3445   ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
3446   ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3447 
3448   /* process 0 needs enough room for process with most rows */
3449   if (!rank) {
3450     mmax = rowners[1];
3451     for (i=2; i<=size; i++) {
3452       mmax = PetscMax(mmax,rowners[i]);
3453     }
3454     mmax*=bs;
3455   } else mmax = -1;             /* unused, but compiler warns anyway */
3456 
3457   rowners[0] = 0;
3458   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
3459   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
3460   rstart = rowners[rank];
3461   rend   = rowners[rank+1];
3462 
3463   /* distribute row lengths to all processors */
3464   ierr = PetscMalloc1(m,&locrowlens);CHKERRQ(ierr);
3465   if (!rank) {
3466     mend = m;
3467     if (size == 1) mend = mend - extra_rows;
3468     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3469     for (j=mend; j<m; j++) locrowlens[j] = 1;
3470     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3471     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3472     for (j=0; j<m; j++) {
3473       procsnz[0] += locrowlens[j];
3474     }
3475     for (i=1; i<size; i++) {
3476       mend = browners[i+1] - browners[i];
3477       if (i == size-1) mend = mend - extra_rows;
3478       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3479       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3480       /* calculate the number of nonzeros on each processor */
3481       for (j=0; j<browners[i+1]-browners[i]; j++) {
3482         procsnz[i] += rowlengths[j];
3483       }
3484       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3485     }
3486     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3487   } else {
3488     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3489   }
3490 
3491   if (!rank) {
3492     /* determine max buffer needed and allocate it */
3493     maxnz = procsnz[0];
3494     for (i=1; i<size; i++) {
3495       maxnz = PetscMax(maxnz,procsnz[i]);
3496     }
3497     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3498 
3499     /* read in my part of the matrix column indices  */
3500     nz     = procsnz[0];
3501     ierr   = PetscMalloc1((nz+1),&ibuf);CHKERRQ(ierr);
3502     mycols = ibuf;
3503     if (size == 1) nz -= extra_rows;
3504     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3505     if (size == 1) {
3506       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
3507     }
3508 
3509     /* read in every ones (except the last) and ship off */
3510     for (i=1; i<size-1; i++) {
3511       nz   = procsnz[i];
3512       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3513       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3514     }
3515     /* read in the stuff for the last proc */
3516     if (size != 1) {
3517       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3518       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3519       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3520       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3521     }
3522     ierr = PetscFree(cols);CHKERRQ(ierr);
3523   } else {
3524     /* determine buffer space needed for message */
3525     nz = 0;
3526     for (i=0; i<m; i++) {
3527       nz += locrowlens[i];
3528     }
3529     ierr   = PetscMalloc1((nz+1),&ibuf);CHKERRQ(ierr);
3530     mycols = ibuf;
3531     /* receive message of column indices*/
3532     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3533     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3534     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3535   }
3536 
3537   /* loop over local rows, determining number of off diagonal entries */
3538   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
3539   ierr     = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
3540   rowcount = 0; nzcount = 0;
3541   for (i=0; i<mbs; i++) {
3542     dcount  = 0;
3543     odcount = 0;
3544     for (j=0; j<bs; j++) {
3545       kmax = locrowlens[rowcount];
3546       for (k=0; k<kmax; k++) {
3547         tmp = mycols[nzcount++]/bs;
3548         if (!mask[tmp]) {
3549           mask[tmp] = 1;
3550           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3551           else masked1[dcount++] = tmp;
3552         }
3553       }
3554       rowcount++;
3555     }
3556 
3557     dlens[i]  = dcount;
3558     odlens[i] = odcount;
3559 
3560     /* zero out the mask elements we set */
3561     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3562     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3563   }
3564 
3565 
3566   if (!sizesset) {
3567     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3568   }
3569   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3570 
3571   if (!rank) {
3572     ierr = PetscMalloc1((maxnz+1),&buf);CHKERRQ(ierr);
3573     /* read in my part of the matrix numerical values  */
3574     nz     = procsnz[0];
3575     vals   = buf;
3576     mycols = ibuf;
3577     if (size == 1) nz -= extra_rows;
3578     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3579     if (size == 1) {
3580       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
3581     }
3582 
3583     /* insert into matrix */
3584     jj = rstart*bs;
3585     for (i=0; i<m; i++) {
3586       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3587       mycols += locrowlens[i];
3588       vals   += locrowlens[i];
3589       jj++;
3590     }
3591     /* read in other processors (except the last one) and ship out */
3592     for (i=1; i<size-1; i++) {
3593       nz   = procsnz[i];
3594       vals = buf;
3595       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3596       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3597     }
3598     /* the last proc */
3599     if (size != 1) {
3600       nz   = procsnz[i] - extra_rows;
3601       vals = buf;
3602       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3603       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3604       ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3605     }
3606     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3607   } else {
3608     /* receive numeric values */
3609     ierr = PetscMalloc1((nz+1),&buf);CHKERRQ(ierr);
3610 
3611     /* receive message of values*/
3612     vals   = buf;
3613     mycols = ibuf;
3614     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3615 
3616     /* insert into matrix */
3617     jj = rstart*bs;
3618     for (i=0; i<m; i++) {
3619       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3620       mycols += locrowlens[i];
3621       vals   += locrowlens[i];
3622       jj++;
3623     }
3624   }
3625   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3626   ierr = PetscFree(buf);CHKERRQ(ierr);
3627   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3628   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3629   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3630   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3631   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3632   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3633   PetscFunctionReturn(0);
3634 }
3635 
3636 #undef __FUNCT__
3637 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3638 /*@
3639    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3640 
3641    Input Parameters:
3642 .  mat  - the matrix
3643 .  fact - factor
3644 
3645    Not Collective, each process can use a different factor
3646 
3647    Level: advanced
3648 
3649   Notes:
3650    This can also be set by the command line option: -mat_use_hash_table <fact>
3651 
3652 .keywords: matrix, hashtable, factor, HT
3653 
3654 .seealso: MatSetOption()
3655 @*/
3656 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3657 {
3658   PetscErrorCode ierr;
3659 
3660   PetscFunctionBegin;
3661   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3662   PetscFunctionReturn(0);
3663 }
3664 
3665 #undef __FUNCT__
3666 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3667 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3668 {
3669   Mat_MPIBAIJ *baij;
3670 
3671   PetscFunctionBegin;
3672   baij          = (Mat_MPIBAIJ*)mat->data;
3673   baij->ht_fact = fact;
3674   PetscFunctionReturn(0);
3675 }
3676 
3677 #undef __FUNCT__
3678 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3679 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3680 {
3681   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
3682 
3683   PetscFunctionBegin;
3684   *Ad     = a->A;
3685   *Ao     = a->B;
3686   *colmap = a->garray;
3687   PetscFunctionReturn(0);
3688 }
3689 
3690 /*
3691     Special version for direct calls from Fortran (to eliminate two function call overheads
3692 */
3693 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3694 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3695 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3696 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3697 #endif
3698 
3699 #undef __FUNCT__
3700 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3701 /*@C
3702   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3703 
3704   Collective on Mat
3705 
3706   Input Parameters:
3707 + mat - the matrix
3708 . min - number of input rows
3709 . im - input rows
3710 . nin - number of input columns
3711 . in - input columns
3712 . v - numerical values input
3713 - addvin - INSERT_VALUES or ADD_VALUES
3714 
3715   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3716 
3717   Level: advanced
3718 
3719 .seealso:   MatSetValuesBlocked()
3720 @*/
3721 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3722 {
3723   /* convert input arguments to C version */
3724   Mat        mat  = *matin;
3725   PetscInt   m    = *min, n = *nin;
3726   InsertMode addv = *addvin;
3727 
3728   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3729   const MatScalar *value;
3730   MatScalar       *barray     = baij->barray;
3731   PetscBool       roworiented = baij->roworiented;
3732   PetscErrorCode  ierr;
3733   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3734   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3735   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3736 
3737   PetscFunctionBegin;
3738   /* tasks normally handled by MatSetValuesBlocked() */
3739   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3740 #if defined(PETSC_USE_DEBUG)
3741   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3742   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3743 #endif
3744   if (mat->assembled) {
3745     mat->was_assembled = PETSC_TRUE;
3746     mat->assembled     = PETSC_FALSE;
3747   }
3748   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3749 
3750 
3751   if (!barray) {
3752     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
3753     baij->barray = barray;
3754   }
3755 
3756   if (roworiented) stepval = (n-1)*bs;
3757   else stepval = (m-1)*bs;
3758 
3759   for (i=0; i<m; i++) {
3760     if (im[i] < 0) continue;
3761 #if defined(PETSC_USE_DEBUG)
3762     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3763 #endif
3764     if (im[i] >= rstart && im[i] < rend) {
3765       row = im[i] - rstart;
3766       for (j=0; j<n; j++) {
3767         /* If NumCol = 1 then a copy is not required */
3768         if ((roworiented) && (n == 1)) {
3769           barray = (MatScalar*)v + i*bs2;
3770         } else if ((!roworiented) && (m == 1)) {
3771           barray = (MatScalar*)v + j*bs2;
3772         } else { /* Here a copy is required */
3773           if (roworiented) {
3774             value = v + i*(stepval+bs)*bs + j*bs;
3775           } else {
3776             value = v + j*(stepval+bs)*bs + i*bs;
3777           }
3778           for (ii=0; ii<bs; ii++,value+=stepval) {
3779             for (jj=0; jj<bs; jj++) {
3780               *barray++ = *value++;
3781             }
3782           }
3783           barray -=bs2;
3784         }
3785 
3786         if (in[j] >= cstart && in[j] < cend) {
3787           col  = in[j] - cstart;
3788           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3789         } else if (in[j] < 0) continue;
3790 #if defined(PETSC_USE_DEBUG)
3791         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3792 #endif
3793         else {
3794           if (mat->was_assembled) {
3795             if (!baij->colmap) {
3796               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3797             }
3798 
3799 #if defined(PETSC_USE_DEBUG)
3800 #if defined(PETSC_USE_CTABLE)
3801             { PetscInt data;
3802               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3803               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3804             }
3805 #else
3806             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3807 #endif
3808 #endif
3809 #if defined(PETSC_USE_CTABLE)
3810             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3811             col  = (col - 1)/bs;
3812 #else
3813             col = (baij->colmap[in[j]] - 1)/bs;
3814 #endif
3815             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3816               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3817               col  =  in[j];
3818             }
3819           } else col = in[j];
3820           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3821         }
3822       }
3823     } else {
3824       if (!baij->donotstash) {
3825         if (roworiented) {
3826           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3827         } else {
3828           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3829         }
3830       }
3831     }
3832   }
3833 
3834   /* task normally handled by MatSetValuesBlocked() */
3835   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3836   PetscFunctionReturn(0);
3837 }
3838 
3839 #undef __FUNCT__
3840 #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
3841 /*@
3842      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
3843          CSR format the local rows.
3844 
3845    Collective on MPI_Comm
3846 
3847    Input Parameters:
3848 +  comm - MPI communicator
3849 .  bs - the block size, only a block size of 1 is supported
3850 .  m - number of local rows (Cannot be PETSC_DECIDE)
3851 .  n - This value should be the same as the local size used in creating the
3852        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3853        calculated if N is given) For square matrices n is almost always m.
3854 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3855 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3856 .   i - row indices
3857 .   j - column indices
3858 -   a - matrix values
3859 
3860    Output Parameter:
3861 .   mat - the matrix
3862 
3863    Level: intermediate
3864 
3865    Notes:
3866        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3867      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3868      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3869 
3870      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3871      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3872      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3873      with column-major ordering within blocks.
3874 
3875        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3876 
3877 .keywords: matrix, aij, compressed row, sparse, parallel
3878 
3879 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3880           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3881 @*/
3882 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3883 {
3884   PetscErrorCode ierr;
3885 
3886   PetscFunctionBegin;
3887   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3888   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3889   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3890   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3891   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
3892   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3893   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3894   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3895   PetscFunctionReturn(0);
3896 }
3897