xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision b00a91154f763f12aa55f3d53a3f2776f15f49e3)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 #include <petscblaslapack.h>
4 #include <petscsf.h>
5 
6 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
7 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat);
8 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
9 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
10 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
11 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
12 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec);
14 
15 #undef __FUNCT__
16 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
17 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
18 {
19   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
20   PetscErrorCode ierr;
21   PetscInt       i,*idxb = 0;
22   PetscScalar    *va,*vb;
23   Vec            vtmp;
24 
25   PetscFunctionBegin;
26   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
27   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
28   if (idx) {
29     for (i=0; i<A->rmap->n; i++) {
30       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
31     }
32   }
33 
34   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
35   if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);}
36   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
37   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
38 
39   for (i=0; i<A->rmap->n; i++) {
40     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
41       va[i] = vb[i];
42       if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);
43     }
44   }
45 
46   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
47   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
48   ierr = PetscFree(idxb);CHKERRQ(ierr);
49   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
50   PetscFunctionReturn(0);
51 }
52 
53 #undef __FUNCT__
54 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
55 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
56 {
57   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
58   PetscErrorCode ierr;
59 
60   PetscFunctionBegin;
61   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
62   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
63   PetscFunctionReturn(0);
64 }
65 
66 #undef __FUNCT__
67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
68 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
69 {
70   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
71   PetscErrorCode ierr;
72 
73   PetscFunctionBegin;
74   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
75   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
76   PetscFunctionReturn(0);
77 }
78 
79 /*
80      Local utility routine that creates a mapping from the global column
81    number to the local number in the off-diagonal part of the local
82    storage of the matrix.  This is done in a non scalable way since the
83    length of colmap equals the global matrix length.
84 */
85 #undef __FUNCT__
86 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private"
87 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
88 {
89   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
90   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
91   PetscErrorCode ierr;
92   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
93 
94   PetscFunctionBegin;
95 #if defined(PETSC_USE_CTABLE)
96   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
97   for (i=0; i<nbs; i++) {
98     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
99   }
100 #else
101   ierr = PetscMalloc1((baij->Nbs+1),&baij->colmap);CHKERRQ(ierr);
102   ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
103   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
104   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
105 #endif
106   PetscFunctionReturn(0);
107 }
108 
109 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
110   { \
111  \
112     brow = row/bs;  \
113     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
114     rmax = aimax[brow]; nrow = ailen[brow]; \
115     bcol = col/bs; \
116     ridx = row % bs; cidx = col % bs; \
117     low  = 0; high = nrow; \
118     while (high-low > 3) { \
119       t = (low+high)/2; \
120       if (rp[t] > bcol) high = t; \
121       else              low  = t; \
122     } \
123     for (_i=low; _i<high; _i++) { \
124       if (rp[_i] > bcol) break; \
125       if (rp[_i] == bcol) { \
126         bap = ap +  bs2*_i + bs*cidx + ridx; \
127         if (addv == ADD_VALUES) *bap += value;  \
128         else                    *bap  = value;  \
129         goto a_noinsert; \
130       } \
131     } \
132     if (a->nonew == 1) goto a_noinsert; \
133     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
134     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
135     N = nrow++ - 1;  \
136     /* shift up all the later entries in this row */ \
137     for (ii=N; ii>=_i; ii--) { \
138       rp[ii+1] = rp[ii]; \
139       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
140     } \
141     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
142     rp[_i]                      = bcol;  \
143     ap[bs2*_i + bs*cidx + ridx] = value;  \
144 a_noinsert:; \
145     ailen[brow] = nrow; \
146   }
147 
148 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
149   { \
150     brow = row/bs;  \
151     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
152     rmax = bimax[brow]; nrow = bilen[brow]; \
153     bcol = col/bs; \
154     ridx = row % bs; cidx = col % bs; \
155     low  = 0; high = nrow; \
156     while (high-low > 3) { \
157       t = (low+high)/2; \
158       if (rp[t] > bcol) high = t; \
159       else              low  = t; \
160     } \
161     for (_i=low; _i<high; _i++) { \
162       if (rp[_i] > bcol) break; \
163       if (rp[_i] == bcol) { \
164         bap = ap +  bs2*_i + bs*cidx + ridx; \
165         if (addv == ADD_VALUES) *bap += value;  \
166         else                    *bap  = value;  \
167         goto b_noinsert; \
168       } \
169     } \
170     if (b->nonew == 1) goto b_noinsert; \
171     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
172     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
173     N = nrow++ - 1;  \
174     /* shift up all the later entries in this row */ \
175     for (ii=N; ii>=_i; ii--) { \
176       rp[ii+1] = rp[ii]; \
177       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
178     } \
179     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
180     rp[_i]                      = bcol;  \
181     ap[bs2*_i + bs*cidx + ridx] = value;  \
182 b_noinsert:; \
183     bilen[brow] = nrow; \
184   }
185 
186 #undef __FUNCT__
187 #define __FUNCT__ "MatSetValues_MPIBAIJ"
188 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
189 {
190   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
191   MatScalar      value;
192   PetscBool      roworiented = baij->roworiented;
193   PetscErrorCode ierr;
194   PetscInt       i,j,row,col;
195   PetscInt       rstart_orig=mat->rmap->rstart;
196   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
197   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
198 
199   /* Some Variables required in the macro */
200   Mat         A     = baij->A;
201   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
202   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
203   MatScalar   *aa   =a->a;
204 
205   Mat         B     = baij->B;
206   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
207   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
208   MatScalar   *ba   =b->a;
209 
210   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
211   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
212   MatScalar *ap,*bap;
213 
214   PetscFunctionBegin;
215   if (v) PetscValidScalarPointer(v,6);
216   for (i=0; i<m; i++) {
217     if (im[i] < 0) continue;
218 #if defined(PETSC_USE_DEBUG)
219     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
220 #endif
221     if (im[i] >= rstart_orig && im[i] < rend_orig) {
222       row = im[i] - rstart_orig;
223       for (j=0; j<n; j++) {
224         if (in[j] >= cstart_orig && in[j] < cend_orig) {
225           col = in[j] - cstart_orig;
226           if (roworiented) value = v[i*n+j];
227           else             value = v[i+j*m];
228           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
229           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
230         } else if (in[j] < 0) continue;
231 #if defined(PETSC_USE_DEBUG)
232         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
233 #endif
234         else {
235           if (mat->was_assembled) {
236             if (!baij->colmap) {
237               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
238             }
239 #if defined(PETSC_USE_CTABLE)
240             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
241             col  = col - 1;
242 #else
243             col = baij->colmap[in[j]/bs] - 1;
244 #endif
245             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
246               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
247               col  =  in[j];
248               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
249               B    = baij->B;
250               b    = (Mat_SeqBAIJ*)(B)->data;
251               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
252               ba   =b->a;
253             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
254             else col += in[j]%bs;
255           } else col = in[j];
256           if (roworiented) value = v[i*n+j];
257           else             value = v[i+j*m];
258           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
259           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
260         }
261       }
262     } else {
263       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
264       if (!baij->donotstash) {
265         mat->assembled = PETSC_FALSE;
266         if (roworiented) {
267           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
268         } else {
269           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
270         }
271       }
272     }
273   }
274   PetscFunctionReturn(0);
275 }
276 
277 #undef __FUNCT__
278 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
279 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
280 {
281   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
282   const PetscScalar *value;
283   MatScalar         *barray     = baij->barray;
284   PetscBool         roworiented = baij->roworiented;
285   PetscErrorCode    ierr;
286   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
287   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
288   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
289 
290   PetscFunctionBegin;
291   if (!barray) {
292     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
293     baij->barray = barray;
294   }
295 
296   if (roworiented) stepval = (n-1)*bs;
297   else stepval = (m-1)*bs;
298 
299   for (i=0; i<m; i++) {
300     if (im[i] < 0) continue;
301 #if defined(PETSC_USE_DEBUG)
302     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
303 #endif
304     if (im[i] >= rstart && im[i] < rend) {
305       row = im[i] - rstart;
306       for (j=0; j<n; j++) {
307         /* If NumCol = 1 then a copy is not required */
308         if ((roworiented) && (n == 1)) {
309           barray = (MatScalar*)v + i*bs2;
310         } else if ((!roworiented) && (m == 1)) {
311           barray = (MatScalar*)v + j*bs2;
312         } else { /* Here a copy is required */
313           if (roworiented) {
314             value = v + (i*(stepval+bs) + j)*bs;
315           } else {
316             value = v + (j*(stepval+bs) + i)*bs;
317           }
318           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
319             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
320             barray += bs;
321           }
322           barray -= bs2;
323         }
324 
325         if (in[j] >= cstart && in[j] < cend) {
326           col  = in[j] - cstart;
327           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
328         } else if (in[j] < 0) continue;
329 #if defined(PETSC_USE_DEBUG)
330         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
331 #endif
332         else {
333           if (mat->was_assembled) {
334             if (!baij->colmap) {
335               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
336             }
337 
338 #if defined(PETSC_USE_DEBUG)
339 #if defined(PETSC_USE_CTABLE)
340             { PetscInt data;
341               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
342               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
343             }
344 #else
345             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
346 #endif
347 #endif
348 #if defined(PETSC_USE_CTABLE)
349             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
350             col  = (col - 1)/bs;
351 #else
352             col = (baij->colmap[in[j]] - 1)/bs;
353 #endif
354             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
355               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
356               col  =  in[j];
357             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]);
358           } else col = in[j];
359           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
360         }
361       }
362     } else {
363       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
364       if (!baij->donotstash) {
365         if (roworiented) {
366           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
367         } else {
368           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
369         }
370       }
371     }
372   }
373   PetscFunctionReturn(0);
374 }
375 
376 #define HASH_KEY 0.6180339887
377 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
378 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
379 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
380 #undef __FUNCT__
381 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
382 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
383 {
384   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
385   PetscBool      roworiented = baij->roworiented;
386   PetscErrorCode ierr;
387   PetscInt       i,j,row,col;
388   PetscInt       rstart_orig=mat->rmap->rstart;
389   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
390   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
391   PetscReal      tmp;
392   MatScalar      **HD = baij->hd,value;
393 #if defined(PETSC_USE_DEBUG)
394   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
395 #endif
396 
397   PetscFunctionBegin;
398   if (v) PetscValidScalarPointer(v,6);
399   for (i=0; i<m; i++) {
400 #if defined(PETSC_USE_DEBUG)
401     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
402     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
403 #endif
404     row = im[i];
405     if (row >= rstart_orig && row < rend_orig) {
406       for (j=0; j<n; j++) {
407         col = in[j];
408         if (roworiented) value = v[i*n+j];
409         else             value = v[i+j*m];
410         /* Look up PetscInto the Hash Table */
411         key = (row/bs)*Nbs+(col/bs)+1;
412         h1  = HASH(size,key,tmp);
413 
414 
415         idx = h1;
416 #if defined(PETSC_USE_DEBUG)
417         insert_ct++;
418         total_ct++;
419         if (HT[idx] != key) {
420           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
421           if (idx == size) {
422             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
423             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
424           }
425         }
426 #else
427         if (HT[idx] != key) {
428           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
429           if (idx == size) {
430             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
431             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
432           }
433         }
434 #endif
435         /* A HASH table entry is found, so insert the values at the correct address */
436         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
437         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
438       }
439     } else if (!baij->donotstash) {
440       if (roworiented) {
441         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
442       } else {
443         ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
444       }
445     }
446   }
447 #if defined(PETSC_USE_DEBUG)
448   baij->ht_total_ct  = total_ct;
449   baij->ht_insert_ct = insert_ct;
450 #endif
451   PetscFunctionReturn(0);
452 }
453 
454 #undef __FUNCT__
455 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
456 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
457 {
458   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
459   PetscBool         roworiented = baij->roworiented;
460   PetscErrorCode    ierr;
461   PetscInt          i,j,ii,jj,row,col;
462   PetscInt          rstart=baij->rstartbs;
463   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
464   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
465   PetscReal         tmp;
466   MatScalar         **HD = baij->hd,*baij_a;
467   const PetscScalar *v_t,*value;
468 #if defined(PETSC_USE_DEBUG)
469   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
470 #endif
471 
472   PetscFunctionBegin;
473   if (roworiented) stepval = (n-1)*bs;
474   else stepval = (m-1)*bs;
475 
476   for (i=0; i<m; i++) {
477 #if defined(PETSC_USE_DEBUG)
478     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
479     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
480 #endif
481     row = im[i];
482     v_t = v + i*nbs2;
483     if (row >= rstart && row < rend) {
484       for (j=0; j<n; j++) {
485         col = in[j];
486 
487         /* Look up into the Hash Table */
488         key = row*Nbs+col+1;
489         h1  = HASH(size,key,tmp);
490 
491         idx = h1;
492 #if defined(PETSC_USE_DEBUG)
493         total_ct++;
494         insert_ct++;
495         if (HT[idx] != key) {
496           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
497           if (idx == size) {
498             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
499             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
500           }
501         }
502 #else
503         if (HT[idx] != key) {
504           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
505           if (idx == size) {
506             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
507             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
508           }
509         }
510 #endif
511         baij_a = HD[idx];
512         if (roworiented) {
513           /*value = v + i*(stepval+bs)*bs + j*bs;*/
514           /* value = v + (i*(stepval+bs)+j)*bs; */
515           value = v_t;
516           v_t  += bs;
517           if (addv == ADD_VALUES) {
518             for (ii=0; ii<bs; ii++,value+=stepval) {
519               for (jj=ii; jj<bs2; jj+=bs) {
520                 baij_a[jj] += *value++;
521               }
522             }
523           } else {
524             for (ii=0; ii<bs; ii++,value+=stepval) {
525               for (jj=ii; jj<bs2; jj+=bs) {
526                 baij_a[jj] = *value++;
527               }
528             }
529           }
530         } else {
531           value = v + j*(stepval+bs)*bs + i*bs;
532           if (addv == ADD_VALUES) {
533             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
534               for (jj=0; jj<bs; jj++) {
535                 baij_a[jj] += *value++;
536               }
537             }
538           } else {
539             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
540               for (jj=0; jj<bs; jj++) {
541                 baij_a[jj] = *value++;
542               }
543             }
544           }
545         }
546       }
547     } else {
548       if (!baij->donotstash) {
549         if (roworiented) {
550           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
551         } else {
552           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
553         }
554       }
555     }
556   }
557 #if defined(PETSC_USE_DEBUG)
558   baij->ht_total_ct  = total_ct;
559   baij->ht_insert_ct = insert_ct;
560 #endif
561   PetscFunctionReturn(0);
562 }
563 
564 #undef __FUNCT__
565 #define __FUNCT__ "MatGetValues_MPIBAIJ"
566 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
567 {
568   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
569   PetscErrorCode ierr;
570   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
571   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
572 
573   PetscFunctionBegin;
574   for (i=0; i<m; i++) {
575     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
576     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
577     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
578       row = idxm[i] - bsrstart;
579       for (j=0; j<n; j++) {
580         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
581         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
582         if (idxn[j] >= bscstart && idxn[j] < bscend) {
583           col  = idxn[j] - bscstart;
584           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
585         } else {
586           if (!baij->colmap) {
587             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
588           }
589 #if defined(PETSC_USE_CTABLE)
590           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
591           data--;
592 #else
593           data = baij->colmap[idxn[j]/bs]-1;
594 #endif
595           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
596           else {
597             col  = data + idxn[j]%bs;
598             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
599           }
600         }
601       }
602     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
603   }
604   PetscFunctionReturn(0);
605 }
606 
607 #undef __FUNCT__
608 #define __FUNCT__ "MatNorm_MPIBAIJ"
609 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
610 {
611   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
612   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
613   PetscErrorCode ierr;
614   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
615   PetscReal      sum = 0.0;
616   MatScalar      *v;
617 
618   PetscFunctionBegin;
619   if (baij->size == 1) {
620     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
621   } else {
622     if (type == NORM_FROBENIUS) {
623       v  = amat->a;
624       nz = amat->nz*bs2;
625       for (i=0; i<nz; i++) {
626         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
627       }
628       v  = bmat->a;
629       nz = bmat->nz*bs2;
630       for (i=0; i<nz; i++) {
631         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
632       }
633       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
634       *nrm = PetscSqrtReal(*nrm);
635     } else if (type == NORM_1) { /* max column sum */
636       PetscReal *tmp,*tmp2;
637       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
638       ierr = PetscMalloc2(mat->cmap->N,&tmp,mat->cmap->N,&tmp2);CHKERRQ(ierr);
639       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
640       v    = amat->a; jj = amat->j;
641       for (i=0; i<amat->nz; i++) {
642         for (j=0; j<bs; j++) {
643           col = bs*(cstart + *jj) + j; /* column index */
644           for (row=0; row<bs; row++) {
645             tmp[col] += PetscAbsScalar(*v);  v++;
646           }
647         }
648         jj++;
649       }
650       v = bmat->a; jj = bmat->j;
651       for (i=0; i<bmat->nz; i++) {
652         for (j=0; j<bs; j++) {
653           col = bs*garray[*jj] + j;
654           for (row=0; row<bs; row++) {
655             tmp[col] += PetscAbsScalar(*v); v++;
656           }
657         }
658         jj++;
659       }
660       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
661       *nrm = 0.0;
662       for (j=0; j<mat->cmap->N; j++) {
663         if (tmp2[j] > *nrm) *nrm = tmp2[j];
664       }
665       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
666     } else if (type == NORM_INFINITY) { /* max row sum */
667       PetscReal *sums;
668       ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr);
669       sum  = 0.0;
670       for (j=0; j<amat->mbs; j++) {
671         for (row=0; row<bs; row++) sums[row] = 0.0;
672         v  = amat->a + bs2*amat->i[j];
673         nz = amat->i[j+1]-amat->i[j];
674         for (i=0; i<nz; i++) {
675           for (col=0; col<bs; col++) {
676             for (row=0; row<bs; row++) {
677               sums[row] += PetscAbsScalar(*v); v++;
678             }
679           }
680         }
681         v  = bmat->a + bs2*bmat->i[j];
682         nz = bmat->i[j+1]-bmat->i[j];
683         for (i=0; i<nz; i++) {
684           for (col=0; col<bs; col++) {
685             for (row=0; row<bs; row++) {
686               sums[row] += PetscAbsScalar(*v); v++;
687             }
688           }
689         }
690         for (row=0; row<bs; row++) {
691           if (sums[row] > sum) sum = sums[row];
692         }
693       }
694       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
695       ierr = PetscFree(sums);CHKERRQ(ierr);
696     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
697   }
698   PetscFunctionReturn(0);
699 }
700 
701 /*
702   Creates the hash table, and sets the table
703   This table is created only once.
704   If new entried need to be added to the matrix
705   then the hash table has to be destroyed and
706   recreated.
707 */
708 #undef __FUNCT__
709 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
710 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
711 {
712   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
713   Mat            A     = baij->A,B=baij->B;
714   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
715   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
716   PetscErrorCode ierr;
717   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
718   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
719   PetscInt       *HT,key;
720   MatScalar      **HD;
721   PetscReal      tmp;
722 #if defined(PETSC_USE_INFO)
723   PetscInt ct=0,max=0;
724 #endif
725 
726   PetscFunctionBegin;
727   if (baij->ht) PetscFunctionReturn(0);
728 
729   baij->ht_size = (PetscInt)(factor*nz);
730   ht_size       = baij->ht_size;
731 
732   /* Allocate Memory for Hash Table */
733   ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr);
734   HD   = baij->hd;
735   HT   = baij->ht;
736 
737   /* Loop Over A */
738   for (i=0; i<a->mbs; i++) {
739     for (j=ai[i]; j<ai[i+1]; j++) {
740       row = i+rstart;
741       col = aj[j]+cstart;
742 
743       key = row*Nbs + col + 1;
744       h1  = HASH(ht_size,key,tmp);
745       for (k=0; k<ht_size; k++) {
746         if (!HT[(h1+k)%ht_size]) {
747           HT[(h1+k)%ht_size] = key;
748           HD[(h1+k)%ht_size] = a->a + j*bs2;
749           break;
750 #if defined(PETSC_USE_INFO)
751         } else {
752           ct++;
753 #endif
754         }
755       }
756 #if defined(PETSC_USE_INFO)
757       if (k> max) max = k;
758 #endif
759     }
760   }
761   /* Loop Over B */
762   for (i=0; i<b->mbs; i++) {
763     for (j=bi[i]; j<bi[i+1]; j++) {
764       row = i+rstart;
765       col = garray[bj[j]];
766       key = row*Nbs + col + 1;
767       h1  = HASH(ht_size,key,tmp);
768       for (k=0; k<ht_size; k++) {
769         if (!HT[(h1+k)%ht_size]) {
770           HT[(h1+k)%ht_size] = key;
771           HD[(h1+k)%ht_size] = b->a + j*bs2;
772           break;
773 #if defined(PETSC_USE_INFO)
774         } else {
775           ct++;
776 #endif
777         }
778       }
779 #if defined(PETSC_USE_INFO)
780       if (k> max) max = k;
781 #endif
782     }
783   }
784 
785   /* Print Summary */
786 #if defined(PETSC_USE_INFO)
787   for (i=0,j=0; i<ht_size; i++) {
788     if (HT[i]) j++;
789   }
790   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
791 #endif
792   PetscFunctionReturn(0);
793 }
794 
795 #undef __FUNCT__
796 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
797 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
798 {
799   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
800   PetscErrorCode ierr;
801   PetscInt       nstash,reallocs;
802   InsertMode     addv;
803 
804   PetscFunctionBegin;
805   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
806 
807   /* make sure all processors are either in INSERTMODE or ADDMODE */
808   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
809   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
810   mat->insertmode = addv; /* in case this processor had no cache */
811 
812   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
813   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
814   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
815   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
816   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
817   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
818   PetscFunctionReturn(0);
819 }
820 
821 #undef __FUNCT__
822 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
823 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
824 {
825   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
826   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
827   PetscErrorCode ierr;
828   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
829   PetscInt       *row,*col;
830   PetscBool      r1,r2,r3,other_disassembled;
831   MatScalar      *val;
832   InsertMode     addv = mat->insertmode;
833   PetscMPIInt    n;
834 
835   PetscFunctionBegin;
836   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
837   if (!baij->donotstash && !mat->nooffprocentries) {
838     while (1) {
839       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
840       if (!flg) break;
841 
842       for (i=0; i<n;) {
843         /* Now identify the consecutive vals belonging to the same row */
844         for (j=i,rstart=row[j]; j<n; j++) {
845           if (row[j] != rstart) break;
846         }
847         if (j < n) ncols = j-i;
848         else       ncols = n-i;
849         /* Now assemble all these values with a single function call */
850         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
851         i    = j;
852       }
853     }
854     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
855     /* Now process the block-stash. Since the values are stashed column-oriented,
856        set the roworiented flag to column oriented, and after MatSetValues()
857        restore the original flags */
858     r1 = baij->roworiented;
859     r2 = a->roworiented;
860     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
861 
862     baij->roworiented = PETSC_FALSE;
863     a->roworiented    = PETSC_FALSE;
864 
865     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
866     while (1) {
867       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
868       if (!flg) break;
869 
870       for (i=0; i<n;) {
871         /* Now identify the consecutive vals belonging to the same row */
872         for (j=i,rstart=row[j]; j<n; j++) {
873           if (row[j] != rstart) break;
874         }
875         if (j < n) ncols = j-i;
876         else       ncols = n-i;
877         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
878         i    = j;
879       }
880     }
881     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
882 
883     baij->roworiented = r1;
884     a->roworiented    = r2;
885 
886     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
887   }
888 
889   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
890   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
891 
892   /* determine if any processor has disassembled, if so we must
893      also disassemble ourselfs, in order that we may reassemble. */
894   /*
895      if nonzero structure of submatrix B cannot change then we know that
896      no processor disassembled thus we can skip this stuff
897   */
898   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
899     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
900     if (mat->was_assembled && !other_disassembled) {
901       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
902     }
903   }
904 
905   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
906     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
907   }
908   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
909   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
910 
911 #if defined(PETSC_USE_INFO)
912   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
913     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
914 
915     baij->ht_total_ct  = 0;
916     baij->ht_insert_ct = 0;
917   }
918 #endif
919   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
920     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
921 
922     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
923     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
924   }
925 
926   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
927 
928   baij->rowvalues = 0;
929   PetscFunctionReturn(0);
930 }
931 
932 #include <petscdraw.h>
933 #undef __FUNCT__
934 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
935 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
936 {
937   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
938   PetscErrorCode    ierr;
939   PetscMPIInt       size = baij->size,rank = baij->rank;
940   PetscInt          bs   = mat->rmap->bs;
941   PetscBool         iascii,isdraw;
942   PetscViewer       sviewer;
943   PetscViewerFormat format;
944 
945   PetscFunctionBegin;
946   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
947   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
948   if (iascii) {
949     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
950     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
951       MatInfo info;
952       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
953       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
954       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
955       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
956                                                 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
957       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
958       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
959       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
960       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
961       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
962       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
963       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
964       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
965       PetscFunctionReturn(0);
966     } else if (format == PETSC_VIEWER_ASCII_INFO) {
967       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
968       PetscFunctionReturn(0);
969     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
970       PetscFunctionReturn(0);
971     }
972   }
973 
974   if (isdraw) {
975     PetscDraw draw;
976     PetscBool isnull;
977     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
978     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
979   }
980 
981   if (size == 1) {
982     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
983     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
984   } else {
985     /* assemble the entire matrix onto first processor. */
986     Mat         A;
987     Mat_SeqBAIJ *Aloc;
988     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
989     MatScalar   *a;
990 
991     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
992     /* Perhaps this should be the type of mat? */
993     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
994     if (!rank) {
995       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
996     } else {
997       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
998     }
999     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1000     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
1001     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1002     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
1003 
1004     /* copy over the A part */
1005     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1006     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1007     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1008 
1009     for (i=0; i<mbs; i++) {
1010       rvals[0] = bs*(baij->rstartbs + i);
1011       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1012       for (j=ai[i]; j<ai[i+1]; j++) {
1013         col = (baij->cstartbs+aj[j])*bs;
1014         for (k=0; k<bs; k++) {
1015           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1016           col++; a += bs;
1017         }
1018       }
1019     }
1020     /* copy over the B part */
1021     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1022     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1023     for (i=0; i<mbs; i++) {
1024       rvals[0] = bs*(baij->rstartbs + i);
1025       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1026       for (j=ai[i]; j<ai[i+1]; j++) {
1027         col = baij->garray[aj[j]]*bs;
1028         for (k=0; k<bs; k++) {
1029           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1030           col++; a += bs;
1031         }
1032       }
1033     }
1034     ierr = PetscFree(rvals);CHKERRQ(ierr);
1035     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1036     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1037     /*
1038        Everyone has to call to draw the matrix since the graphics waits are
1039        synchronized across all processors that share the PetscDraw object
1040     */
1041     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1042     if (!rank) {
1043       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1044       /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/
1045       PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ);
1046       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1047     }
1048     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1049     ierr = MatDestroy(&A);CHKERRQ(ierr);
1050   }
1051   PetscFunctionReturn(0);
1052 }
1053 
1054 #undef __FUNCT__
1055 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1056 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1057 {
1058   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1059   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)a->A->data;
1060   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
1061   PetscErrorCode ierr;
1062   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1063   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1064   int            fd;
1065   PetscScalar    *column_values;
1066   FILE           *file;
1067   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1068   PetscInt       message_count,flowcontrolcount;
1069 
1070   PetscFunctionBegin;
1071   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1072   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1073   nz   = bs2*(A->nz + B->nz);
1074   rlen = mat->rmap->n;
1075   if (!rank) {
1076     header[0] = MAT_FILE_CLASSID;
1077     header[1] = mat->rmap->N;
1078     header[2] = mat->cmap->N;
1079 
1080     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1081     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1082     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1083     /* get largest number of rows any processor has */
1084     range = mat->rmap->range;
1085     for (i=1; i<size; i++) {
1086       rlen = PetscMax(rlen,range[i+1] - range[i]);
1087     }
1088   } else {
1089     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1090   }
1091 
1092   ierr = PetscMalloc1((rlen/bs),&crow_lens);CHKERRQ(ierr);
1093   /* compute lengths of each row  */
1094   for (i=0; i<a->mbs; i++) {
1095     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1096   }
1097   /* store the row lengths to the file */
1098   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1099   if (!rank) {
1100     MPI_Status status;
1101     ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr);
1102     rlen = (range[1] - range[0])/bs;
1103     for (i=0; i<rlen; i++) {
1104       for (j=0; j<bs; j++) {
1105         row_lens[i*bs+j] = bs*crow_lens[i];
1106       }
1107     }
1108     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1109     for (i=1; i<size; i++) {
1110       rlen = (range[i+1] - range[i])/bs;
1111       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1112       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1113       for (k=0; k<rlen; k++) {
1114         for (j=0; j<bs; j++) {
1115           row_lens[k*bs+j] = bs*crow_lens[k];
1116         }
1117       }
1118       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1119     }
1120     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1121     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1122   } else {
1123     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1124     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1125     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1126   }
1127   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1128 
1129   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1130      information needed to make it for each row from a block row. This does require more communication but still not more than
1131      the communication needed for the nonzero values  */
1132   nzmax = nz; /*  space a largest processor needs */
1133   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1134   ierr  = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr);
1135   cnt   = 0;
1136   for (i=0; i<a->mbs; i++) {
1137     pcnt = cnt;
1138     for (j=B->i[i]; j<B->i[i+1]; j++) {
1139       if ((col = garray[B->j[j]]) > cstart) break;
1140       for (l=0; l<bs; l++) {
1141         column_indices[cnt++] = bs*col+l;
1142       }
1143     }
1144     for (k=A->i[i]; k<A->i[i+1]; k++) {
1145       for (l=0; l<bs; l++) {
1146         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1147       }
1148     }
1149     for (; j<B->i[i+1]; j++) {
1150       for (l=0; l<bs; l++) {
1151         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1152       }
1153     }
1154     len = cnt - pcnt;
1155     for (k=1; k<bs; k++) {
1156       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1157       cnt += len;
1158     }
1159   }
1160   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1161 
1162   /* store the columns to the file */
1163   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1164   if (!rank) {
1165     MPI_Status status;
1166     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1167     for (i=1; i<size; i++) {
1168       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1169       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1170       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1171       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1172     }
1173     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1174   } else {
1175     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1176     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1177     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1178     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1179   }
1180   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1181 
1182   /* load up the numerical values */
1183   ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr);
1184   cnt  = 0;
1185   for (i=0; i<a->mbs; i++) {
1186     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1187     for (j=B->i[i]; j<B->i[i+1]; j++) {
1188       if (garray[B->j[j]] > cstart) break;
1189       for (l=0; l<bs; l++) {
1190         for (ll=0; ll<bs; ll++) {
1191           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1192         }
1193       }
1194       cnt += bs;
1195     }
1196     for (k=A->i[i]; k<A->i[i+1]; k++) {
1197       for (l=0; l<bs; l++) {
1198         for (ll=0; ll<bs; ll++) {
1199           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1200         }
1201       }
1202       cnt += bs;
1203     }
1204     for (; j<B->i[i+1]; j++) {
1205       for (l=0; l<bs; l++) {
1206         for (ll=0; ll<bs; ll++) {
1207           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1208         }
1209       }
1210       cnt += bs;
1211     }
1212     cnt += (bs-1)*rlen;
1213   }
1214   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1215 
1216   /* store the column values to the file */
1217   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1218   if (!rank) {
1219     MPI_Status status;
1220     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1221     for (i=1; i<size; i++) {
1222       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1223       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1224       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1225       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1226     }
1227     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1228   } else {
1229     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1230     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1231     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1232     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1233   }
1234   ierr = PetscFree(column_values);CHKERRQ(ierr);
1235 
1236   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1237   if (file) {
1238     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1239   }
1240   PetscFunctionReturn(0);
1241 }
1242 
1243 #undef __FUNCT__
1244 #define __FUNCT__ "MatView_MPIBAIJ"
1245 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1246 {
1247   PetscErrorCode ierr;
1248   PetscBool      iascii,isdraw,issocket,isbinary;
1249 
1250   PetscFunctionBegin;
1251   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1252   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1253   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1254   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1255   if (iascii || isdraw || issocket) {
1256     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1257   } else if (isbinary) {
1258     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1259   }
1260   PetscFunctionReturn(0);
1261 }
1262 
1263 #undef __FUNCT__
1264 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1265 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1266 {
1267   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1268   PetscErrorCode ierr;
1269 
1270   PetscFunctionBegin;
1271 #if defined(PETSC_USE_LOG)
1272   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1273 #endif
1274   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1275   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1276   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1277   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1278 #if defined(PETSC_USE_CTABLE)
1279   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1280 #else
1281   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1282 #endif
1283   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1284   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1285   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1286   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1287   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1288   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1289   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1290   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1291 
1292   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1293   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1294   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1295   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1296   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr);
1302   PetscFunctionReturn(0);
1303 }
1304 
1305 #undef __FUNCT__
1306 #define __FUNCT__ "MatMult_MPIBAIJ"
1307 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1308 {
1309   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1310   PetscErrorCode ierr;
1311   PetscInt       nt;
1312 
1313   PetscFunctionBegin;
1314   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1315   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1316   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1317   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1318   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1319   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1320   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1321   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1322   PetscFunctionReturn(0);
1323 }
1324 
1325 #undef __FUNCT__
1326 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1327 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1328 {
1329   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1330   PetscErrorCode ierr;
1331 
1332   PetscFunctionBegin;
1333   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1334   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1335   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1336   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1337   PetscFunctionReturn(0);
1338 }
1339 
1340 #undef __FUNCT__
1341 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1342 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1343 {
1344   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1345   PetscErrorCode ierr;
1346   PetscBool      merged;
1347 
1348   PetscFunctionBegin;
1349   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1350   /* do nondiagonal part */
1351   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1352   if (!merged) {
1353     /* send it on its way */
1354     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1355     /* do local part */
1356     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1357     /* receive remote parts: note this assumes the values are not actually */
1358     /* inserted in yy until the next line */
1359     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1360   } else {
1361     /* do local part */
1362     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1363     /* send it on its way */
1364     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1365     /* values actually were received in the Begin() but we need to call this nop */
1366     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1367   }
1368   PetscFunctionReturn(0);
1369 }
1370 
1371 #undef __FUNCT__
1372 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1373 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1374 {
1375   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1376   PetscErrorCode ierr;
1377 
1378   PetscFunctionBegin;
1379   /* do nondiagonal part */
1380   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1381   /* send it on its way */
1382   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1383   /* do local part */
1384   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1385   /* receive remote parts: note this assumes the values are not actually */
1386   /* inserted in yy until the next line, which is true for my implementation*/
1387   /* but is not perhaps always true. */
1388   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1389   PetscFunctionReturn(0);
1390 }
1391 
1392 /*
1393   This only works correctly for square matrices where the subblock A->A is the
1394    diagonal block
1395 */
1396 #undef __FUNCT__
1397 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1398 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1399 {
1400   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1401   PetscErrorCode ierr;
1402 
1403   PetscFunctionBegin;
1404   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1405   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1406   PetscFunctionReturn(0);
1407 }
1408 
1409 #undef __FUNCT__
1410 #define __FUNCT__ "MatScale_MPIBAIJ"
1411 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1412 {
1413   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1414   PetscErrorCode ierr;
1415 
1416   PetscFunctionBegin;
1417   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1418   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1419   PetscFunctionReturn(0);
1420 }
1421 
1422 #undef __FUNCT__
1423 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1424 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1425 {
1426   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1427   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1428   PetscErrorCode ierr;
1429   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1430   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1431   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1432 
1433   PetscFunctionBegin;
1434   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1435   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1436   mat->getrowactive = PETSC_TRUE;
1437 
1438   if (!mat->rowvalues && (idx || v)) {
1439     /*
1440         allocate enough space to hold information from the longest row.
1441     */
1442     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1443     PetscInt    max = 1,mbs = mat->mbs,tmp;
1444     for (i=0; i<mbs; i++) {
1445       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1446       if (max < tmp) max = tmp;
1447     }
1448     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1449   }
1450   lrow = row - brstart;
1451 
1452   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1453   if (!v)   {pvA = 0; pvB = 0;}
1454   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1455   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1456   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1457   nztot = nzA + nzB;
1458 
1459   cmap = mat->garray;
1460   if (v  || idx) {
1461     if (nztot) {
1462       /* Sort by increasing column numbers, assuming A and B already sorted */
1463       PetscInt imark = -1;
1464       if (v) {
1465         *v = v_p = mat->rowvalues;
1466         for (i=0; i<nzB; i++) {
1467           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1468           else break;
1469         }
1470         imark = i;
1471         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1472         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1473       }
1474       if (idx) {
1475         *idx = idx_p = mat->rowindices;
1476         if (imark > -1) {
1477           for (i=0; i<imark; i++) {
1478             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1479           }
1480         } else {
1481           for (i=0; i<nzB; i++) {
1482             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1483             else break;
1484           }
1485           imark = i;
1486         }
1487         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1488         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1489       }
1490     } else {
1491       if (idx) *idx = 0;
1492       if (v)   *v   = 0;
1493     }
1494   }
1495   *nz  = nztot;
1496   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1497   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1498   PetscFunctionReturn(0);
1499 }
1500 
1501 #undef __FUNCT__
1502 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1503 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1504 {
1505   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1506 
1507   PetscFunctionBegin;
1508   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1509   baij->getrowactive = PETSC_FALSE;
1510   PetscFunctionReturn(0);
1511 }
1512 
1513 #undef __FUNCT__
1514 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1515 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1516 {
1517   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1518   PetscErrorCode ierr;
1519 
1520   PetscFunctionBegin;
1521   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1522   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1523   PetscFunctionReturn(0);
1524 }
1525 
1526 #undef __FUNCT__
1527 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1528 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1529 {
1530   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1531   Mat            A  = a->A,B = a->B;
1532   PetscErrorCode ierr;
1533   PetscReal      isend[5],irecv[5];
1534 
1535   PetscFunctionBegin;
1536   info->block_size = (PetscReal)matin->rmap->bs;
1537 
1538   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1539 
1540   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1541   isend[3] = info->memory;  isend[4] = info->mallocs;
1542 
1543   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1544 
1545   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1546   isend[3] += info->memory;  isend[4] += info->mallocs;
1547 
1548   if (flag == MAT_LOCAL) {
1549     info->nz_used      = isend[0];
1550     info->nz_allocated = isend[1];
1551     info->nz_unneeded  = isend[2];
1552     info->memory       = isend[3];
1553     info->mallocs      = isend[4];
1554   } else if (flag == MAT_GLOBAL_MAX) {
1555     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1556 
1557     info->nz_used      = irecv[0];
1558     info->nz_allocated = irecv[1];
1559     info->nz_unneeded  = irecv[2];
1560     info->memory       = irecv[3];
1561     info->mallocs      = irecv[4];
1562   } else if (flag == MAT_GLOBAL_SUM) {
1563     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1564 
1565     info->nz_used      = irecv[0];
1566     info->nz_allocated = irecv[1];
1567     info->nz_unneeded  = irecv[2];
1568     info->memory       = irecv[3];
1569     info->mallocs      = irecv[4];
1570   } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1571   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1572   info->fill_ratio_needed = 0;
1573   info->factor_mallocs    = 0;
1574   PetscFunctionReturn(0);
1575 }
1576 
1577 #undef __FUNCT__
1578 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1579 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1580 {
1581   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1582   PetscErrorCode ierr;
1583 
1584   PetscFunctionBegin;
1585   switch (op) {
1586   case MAT_NEW_NONZERO_LOCATIONS:
1587   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1588   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1589   case MAT_KEEP_NONZERO_PATTERN:
1590   case MAT_NEW_NONZERO_LOCATION_ERR:
1591     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1592     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1593     break;
1594   case MAT_ROW_ORIENTED:
1595     a->roworiented = flg;
1596 
1597     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1598     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1599     break;
1600   case MAT_NEW_DIAGONALS:
1601     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1602     break;
1603   case MAT_IGNORE_OFF_PROC_ENTRIES:
1604     a->donotstash = flg;
1605     break;
1606   case MAT_USE_HASH_TABLE:
1607     a->ht_flag = flg;
1608     break;
1609   case MAT_SYMMETRIC:
1610   case MAT_STRUCTURALLY_SYMMETRIC:
1611   case MAT_HERMITIAN:
1612   case MAT_SYMMETRY_ETERNAL:
1613     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1614     break;
1615   default:
1616     SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1617   }
1618   PetscFunctionReturn(0);
1619 }
1620 
1621 #undef __FUNCT__
1622 #define __FUNCT__ "MatTranspose_MPIBAIJ"
1623 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1624 {
1625   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1626   Mat_SeqBAIJ    *Aloc;
1627   Mat            B;
1628   PetscErrorCode ierr;
1629   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1630   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1631   MatScalar      *a;
1632 
1633   PetscFunctionBegin;
1634   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1635   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1636     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1637     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1638     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1639     /* Do not know preallocation information, but must set block size */
1640     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr);
1641   } else {
1642     B = *matout;
1643   }
1644 
1645   /* copy over the A part */
1646   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1647   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1648   ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1649 
1650   for (i=0; i<mbs; i++) {
1651     rvals[0] = bs*(baij->rstartbs + i);
1652     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1653     for (j=ai[i]; j<ai[i+1]; j++) {
1654       col = (baij->cstartbs+aj[j])*bs;
1655       for (k=0; k<bs; k++) {
1656         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1657 
1658         col++; a += bs;
1659       }
1660     }
1661   }
1662   /* copy over the B part */
1663   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1664   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1665   for (i=0; i<mbs; i++) {
1666     rvals[0] = bs*(baij->rstartbs + i);
1667     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1668     for (j=ai[i]; j<ai[i+1]; j++) {
1669       col = baij->garray[aj[j]]*bs;
1670       for (k=0; k<bs; k++) {
1671         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1672         col++;
1673         a += bs;
1674       }
1675     }
1676   }
1677   ierr = PetscFree(rvals);CHKERRQ(ierr);
1678   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1679   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1680 
1681   if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B;
1682   else {
1683     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1684   }
1685   PetscFunctionReturn(0);
1686 }
1687 
1688 #undef __FUNCT__
1689 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1690 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1691 {
1692   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1693   Mat            a     = baij->A,b = baij->B;
1694   PetscErrorCode ierr;
1695   PetscInt       s1,s2,s3;
1696 
1697   PetscFunctionBegin;
1698   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1699   if (rr) {
1700     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1701     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1702     /* Overlap communication with computation. */
1703     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1704   }
1705   if (ll) {
1706     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1707     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1708     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1709   }
1710   /* scale  the diagonal block */
1711   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1712 
1713   if (rr) {
1714     /* Do a scatter end and then right scale the off-diagonal block */
1715     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1716     ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1717   }
1718   PetscFunctionReturn(0);
1719 }
1720 
1721 #undef __FUNCT__
1722 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1723 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1724 {
1725   Mat_MPIBAIJ   *l      = (Mat_MPIBAIJ *) A->data;
1726   PetscInt      *owners = A->rmap->range;
1727   PetscInt       n      = A->rmap->n;
1728   PetscMPIInt    size   = l->size;
1729   PetscSF        sf;
1730   PetscInt      *lrows;
1731   PetscSFNode   *rrows;
1732   PetscInt       lastidx = -1, r, p = 0, len = 0;
1733   PetscErrorCode ierr;
1734 
1735   PetscFunctionBegin;
1736   /* Create SF where leaves are input rows and roots are owned rows */
1737   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1738   for (r = 0; r < n; ++r) lrows[r] = -1;
1739   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1740   for (r = 0; r < N; ++r) {
1741     const PetscInt idx   = rows[r];
1742     PetscBool      found = PETSC_FALSE;
1743     /* Trick for efficient searching for sorted rows */
1744     if (lastidx > idx) p = 0;
1745     lastidx = idx;
1746     for (; p < size; ++p) {
1747       if (idx >= owners[p] && idx < owners[p+1]) {
1748         rrows[r].rank  = p;
1749         rrows[r].index = rows[r] - owners[p];
1750         found = PETSC_TRUE;
1751         break;
1752       }
1753     }
1754     if (!found) SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %d not found in matrix distribution", idx);
1755   }
1756   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1757   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1758   /* Collect flags for rows to be zeroed */
1759   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1760   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1761   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1762   /* Compress and put in row numbers */
1763   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1764   /* fix right hand side if needed */
1765   if (x && b) {
1766     const PetscScalar *xx;
1767     PetscScalar       *bb;
1768 
1769     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1770     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1771     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
1772     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1773     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1774   }
1775 
1776   /* actually zap the local rows */
1777   /*
1778         Zero the required rows. If the "diagonal block" of the matrix
1779      is square and the user wishes to set the diagonal we use separate
1780      code so that MatSetValues() is not called for each diagonal allocating
1781      new memory, thus calling lots of mallocs and slowing things down.
1782 
1783   */
1784   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1785   ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,0,0);CHKERRQ(ierr);
1786   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1787     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,0,0);CHKERRQ(ierr);
1788   } else if (diag != 0.0) {
1789     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr);
1790     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1791        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1792     for (r = 0; r < len; ++r) {
1793       const PetscInt row = lrows[r] + A->rmap->rstart;
1794       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1795     }
1796     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1797     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1798   } else {
1799     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr);
1800   }
1801   ierr = PetscFree(lrows);CHKERRQ(ierr);
1802   PetscFunctionReturn(0);
1803 }
1804 
1805 #undef __FUNCT__
1806 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1807 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1808 {
1809   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1810   PetscErrorCode ierr;
1811 
1812   PetscFunctionBegin;
1813   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1814   PetscFunctionReturn(0);
1815 }
1816 
1817 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
1818 
1819 #undef __FUNCT__
1820 #define __FUNCT__ "MatEqual_MPIBAIJ"
1821 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1822 {
1823   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1824   Mat            a,b,c,d;
1825   PetscBool      flg;
1826   PetscErrorCode ierr;
1827 
1828   PetscFunctionBegin;
1829   a = matA->A; b = matA->B;
1830   c = matB->A; d = matB->B;
1831 
1832   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1833   if (flg) {
1834     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1835   }
1836   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1837   PetscFunctionReturn(0);
1838 }
1839 
1840 #undef __FUNCT__
1841 #define __FUNCT__ "MatCopy_MPIBAIJ"
1842 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1843 {
1844   PetscErrorCode ierr;
1845   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1846   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
1847 
1848   PetscFunctionBegin;
1849   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1850   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1851     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1852   } else {
1853     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1854     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1855   }
1856   PetscFunctionReturn(0);
1857 }
1858 
1859 #undef __FUNCT__
1860 #define __FUNCT__ "MatSetUp_MPIBAIJ"
1861 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1862 {
1863   PetscErrorCode ierr;
1864 
1865   PetscFunctionBegin;
1866   ierr =  MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1867   PetscFunctionReturn(0);
1868 }
1869 
1870 #undef __FUNCT__
1871 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1872 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1873 {
1874   PetscErrorCode ierr;
1875   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
1876   PetscBLASInt   bnz,one=1;
1877   Mat_SeqBAIJ    *x,*y;
1878 
1879   PetscFunctionBegin;
1880   if (str == SAME_NONZERO_PATTERN) {
1881     PetscScalar alpha = a;
1882     x    = (Mat_SeqBAIJ*)xx->A->data;
1883     y    = (Mat_SeqBAIJ*)yy->A->data;
1884     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
1885     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1886     x    = (Mat_SeqBAIJ*)xx->B->data;
1887     y    = (Mat_SeqBAIJ*)yy->B->data;
1888     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
1889     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1890   } else {
1891     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1892   }
1893   PetscFunctionReturn(0);
1894 }
1895 
1896 #undef __FUNCT__
1897 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1898 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1899 {
1900   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1901   PetscErrorCode ierr;
1902 
1903   PetscFunctionBegin;
1904   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1905   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1906   PetscFunctionReturn(0);
1907 }
1908 
1909 #undef __FUNCT__
1910 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
1911 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1912 {
1913   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1914   PetscErrorCode ierr;
1915 
1916   PetscFunctionBegin;
1917   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1918   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1919   PetscFunctionReturn(0);
1920 }
1921 
1922 #undef __FUNCT__
1923 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
1924 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1925 {
1926   PetscErrorCode ierr;
1927   IS             iscol_local;
1928   PetscInt       csize;
1929 
1930   PetscFunctionBegin;
1931   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1932   if (call == MAT_REUSE_MATRIX) {
1933     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1934     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1935   } else {
1936     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1937   }
1938   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1939   if (call == MAT_INITIAL_MATRIX) {
1940     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1941     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
1942   }
1943   PetscFunctionReturn(0);
1944 }
1945 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*);
1946 #undef __FUNCT__
1947 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
1948 /*
1949   Not great since it makes two copies of the submatrix, first an SeqBAIJ
1950   in local and then by concatenating the local matrices the end result.
1951   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
1952 */
1953 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
1954 {
1955   PetscErrorCode ierr;
1956   PetscMPIInt    rank,size;
1957   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
1958   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow;
1959   Mat            M,Mreuse;
1960   MatScalar      *vwork,*aa;
1961   MPI_Comm       comm;
1962   IS             isrow_new, iscol_new;
1963   PetscBool      idflag,allrows, allcols;
1964   Mat_SeqBAIJ    *aij;
1965 
1966   PetscFunctionBegin;
1967   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
1968   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1969   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1970   /* The compression and expansion should be avoided. Doesn't point
1971      out errors, might change the indices, hence buggey */
1972   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
1973   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
1974 
1975   /* Check for special case: each processor gets entire matrix columns */
1976   ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr);
1977   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
1978   if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE;
1979   else allcols = PETSC_FALSE;
1980 
1981   ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr);
1982   ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr);
1983   if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE;
1984   else allrows = PETSC_FALSE;
1985 
1986   if (call ==  MAT_REUSE_MATRIX) {
1987     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
1988     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1989     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
1990   } else {
1991     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
1992   }
1993   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
1994   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
1995   /*
1996       m - number of local rows
1997       n - number of columns (same on all processors)
1998       rstart - first row in new global matrix generated
1999   */
2000   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2001   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2002   m    = m/bs;
2003   n    = n/bs;
2004 
2005   if (call == MAT_INITIAL_MATRIX) {
2006     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2007     ii  = aij->i;
2008     jj  = aij->j;
2009 
2010     /*
2011         Determine the number of non-zeros in the diagonal and off-diagonal
2012         portions of the matrix in order to do correct preallocation
2013     */
2014 
2015     /* first get start and end of "diagonal" columns */
2016     if (csize == PETSC_DECIDE) {
2017       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2018       if (mglobal == n*bs) { /* square matrix */
2019         nlocal = m;
2020       } else {
2021         nlocal = n/size + ((n % size) > rank);
2022       }
2023     } else {
2024       nlocal = csize/bs;
2025     }
2026     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2027     rstart = rend - nlocal;
2028     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2029 
2030     /* next, compute all the lengths */
2031     ierr  = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr);
2032     for (i=0; i<m; i++) {
2033       jend = ii[i+1] - ii[i];
2034       olen = 0;
2035       dlen = 0;
2036       for (j=0; j<jend; j++) {
2037         if (*jj < rstart || *jj >= rend) olen++;
2038         else dlen++;
2039         jj++;
2040       }
2041       olens[i] = olen;
2042       dlens[i] = dlen;
2043     }
2044     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2045     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2046     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2047     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2048     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
2049   } else {
2050     PetscInt ml,nl;
2051 
2052     M    = *newmat;
2053     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2054     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2055     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2056     /*
2057          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2058        rather than the slower MatSetValues().
2059     */
2060     M->was_assembled = PETSC_TRUE;
2061     M->assembled     = PETSC_FALSE;
2062   }
2063   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2064   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2065   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
2066   ii   = aij->i;
2067   jj   = aij->j;
2068   aa   = aij->a;
2069   for (i=0; i<m; i++) {
2070     row   = rstart/bs + i;
2071     nz    = ii[i+1] - ii[i];
2072     cwork = jj;     jj += nz;
2073     vwork = aa;     aa += nz*bs*bs;
2074     ierr  = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2075   }
2076 
2077   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2078   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2079   *newmat = M;
2080 
2081   /* save submatrix used in processor for next request */
2082   if (call ==  MAT_INITIAL_MATRIX) {
2083     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2084     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2085   }
2086   PetscFunctionReturn(0);
2087 }
2088 
2089 #undef __FUNCT__
2090 #define __FUNCT__ "MatPermute_MPIBAIJ"
2091 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2092 {
2093   MPI_Comm       comm,pcomm;
2094   PetscInt       clocal_size,nrows;
2095   const PetscInt *rows;
2096   PetscMPIInt    size;
2097   IS             crowp,lcolp;
2098   PetscErrorCode ierr;
2099 
2100   PetscFunctionBegin;
2101   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2102   /* make a collective version of 'rowp' */
2103   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2104   if (pcomm==comm) {
2105     crowp = rowp;
2106   } else {
2107     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2108     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2109     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2110     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2111   }
2112   ierr = ISSetPermutation(crowp);CHKERRQ(ierr);
2113   /* make a local version of 'colp' */
2114   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2115   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2116   if (size==1) {
2117     lcolp = colp;
2118   } else {
2119     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2120   }
2121   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2122   /* now we just get the submatrix */
2123   ierr = MatGetLocalSize(A,PETSC_NULL,&clocal_size);CHKERRQ(ierr);
2124   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2125   /* clean up */
2126   if (pcomm!=comm) {
2127     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2128   }
2129   if (size>1) {
2130     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2131   }
2132   PetscFunctionReturn(0);
2133 }
2134 
2135 #undef __FUNCT__
2136 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2137 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2138 {
2139   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2140   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
2141 
2142   PetscFunctionBegin;
2143   if (nghosts) *nghosts = B->nbs;
2144   if (ghosts) *ghosts = baij->garray;
2145   PetscFunctionReturn(0);
2146 }
2147 
2148 #undef __FUNCT__
2149 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ"
2150 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2151 {
2152   Mat            B;
2153   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2154   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2155   Mat_SeqAIJ     *b;
2156   PetscErrorCode ierr;
2157   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2158   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2159   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2160 
2161   PetscFunctionBegin;
2162   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2163   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2164 
2165   /* ----------------------------------------------------------------
2166      Tell every processor the number of nonzeros per row
2167   */
2168   ierr = PetscMalloc1((A->rmap->N/bs),&lens);CHKERRQ(ierr);
2169   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2170     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2171   }
2172   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2173   ierr      = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr);
2174   displs    = recvcounts + size;
2175   for (i=0; i<size; i++) {
2176     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2177     displs[i]     = A->rmap->range[i]/bs;
2178   }
2179 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2180   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2181 #else
2182   ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2183 #endif
2184   /* ---------------------------------------------------------------
2185      Create the sequential matrix of the same type as the local block diagonal
2186   */
2187   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2188   ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2189   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2190   ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2191   b    = (Mat_SeqAIJ*)B->data;
2192 
2193   /*--------------------------------------------------------------------
2194     Copy my part of matrix column indices over
2195   */
2196   sendcount  = ad->nz + bd->nz;
2197   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2198   a_jsendbuf = ad->j;
2199   b_jsendbuf = bd->j;
2200   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2201   cnt        = 0;
2202   for (i=0; i<n; i++) {
2203 
2204     /* put in lower diagonal portion */
2205     m = bd->i[i+1] - bd->i[i];
2206     while (m > 0) {
2207       /* is it above diagonal (in bd (compressed) numbering) */
2208       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2209       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2210       m--;
2211     }
2212 
2213     /* put in diagonal portion */
2214     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2215       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2216     }
2217 
2218     /* put in upper diagonal portion */
2219     while (m-- > 0) {
2220       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2221     }
2222   }
2223   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2224 
2225   /*--------------------------------------------------------------------
2226     Gather all column indices to all processors
2227   */
2228   for (i=0; i<size; i++) {
2229     recvcounts[i] = 0;
2230     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2231       recvcounts[i] += lens[j];
2232     }
2233   }
2234   displs[0] = 0;
2235   for (i=1; i<size; i++) {
2236     displs[i] = displs[i-1] + recvcounts[i-1];
2237   }
2238 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2239   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2240 #else
2241   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2242 #endif
2243   /*--------------------------------------------------------------------
2244     Assemble the matrix into useable form (note numerical values not yet set)
2245   */
2246   /* set the b->ilen (length of each row) values */
2247   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2248   /* set the b->i indices */
2249   b->i[0] = 0;
2250   for (i=1; i<=A->rmap->N/bs; i++) {
2251     b->i[i] = b->i[i-1] + lens[i-1];
2252   }
2253   ierr = PetscFree(lens);CHKERRQ(ierr);
2254   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2255   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2256   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2257 
2258   if (A->symmetric) {
2259     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2260   } else if (A->hermitian) {
2261     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2262   } else if (A->structurally_symmetric) {
2263     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2264   }
2265   *newmat = B;
2266   PetscFunctionReturn(0);
2267 }
2268 
2269 #undef __FUNCT__
2270 #define __FUNCT__ "MatSOR_MPIBAIJ"
2271 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2272 {
2273   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2274   PetscErrorCode ierr;
2275   Vec            bb1 = 0;
2276 
2277   PetscFunctionBegin;
2278   if (flag == SOR_APPLY_UPPER) {
2279     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2280     PetscFunctionReturn(0);
2281   }
2282 
2283   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2284     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2285   }
2286 
2287   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2288     if (flag & SOR_ZERO_INITIAL_GUESS) {
2289       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2290       its--;
2291     }
2292 
2293     while (its--) {
2294       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2295       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2296 
2297       /* update rhs: bb1 = bb - B*x */
2298       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2299       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2300 
2301       /* local sweep */
2302       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2303     }
2304   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2305     if (flag & SOR_ZERO_INITIAL_GUESS) {
2306       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2307       its--;
2308     }
2309     while (its--) {
2310       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2311       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2312 
2313       /* update rhs: bb1 = bb - B*x */
2314       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2315       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2316 
2317       /* local sweep */
2318       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2319     }
2320   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2321     if (flag & SOR_ZERO_INITIAL_GUESS) {
2322       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2323       its--;
2324     }
2325     while (its--) {
2326       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2327       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2328 
2329       /* update rhs: bb1 = bb - B*x */
2330       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2331       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2332 
2333       /* local sweep */
2334       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2335     }
2336   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2337 
2338   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2339   PetscFunctionReturn(0);
2340 }
2341 
2342 #undef __FUNCT__
2343 #define __FUNCT__ "MatGetColumnNorms_MPIBAIJ"
2344 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms)
2345 {
2346   PetscErrorCode ierr;
2347   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)A->data;
2348   PetscInt       N,i,*garray = aij->garray;
2349   PetscInt       ib,jb,bs = A->rmap->bs;
2350   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) aij->A->data;
2351   MatScalar      *a_val = a_aij->a;
2352   Mat_SeqBAIJ    *b_aij = (Mat_SeqBAIJ*) aij->B->data;
2353   MatScalar      *b_val = b_aij->a;
2354   PetscReal      *work;
2355 
2356   PetscFunctionBegin;
2357   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
2358   ierr = PetscCalloc1(N,&work);CHKERRQ(ierr);
2359   if (type == NORM_2) {
2360     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2361       for (jb=0; jb<bs; jb++) {
2362         for (ib=0; ib<bs; ib++) {
2363           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2364           a_val++;
2365         }
2366       }
2367     }
2368     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2369       for (jb=0; jb<bs; jb++) {
2370         for (ib=0; ib<bs; ib++) {
2371           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2372           b_val++;
2373         }
2374       }
2375     }
2376   } else if (type == NORM_1) {
2377     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2378       for (jb=0; jb<bs; jb++) {
2379         for (ib=0; ib<bs; ib++) {
2380           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2381           a_val++;
2382         }
2383       }
2384     }
2385     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2386       for (jb=0; jb<bs; jb++) {
2387        for (ib=0; ib<bs; ib++) {
2388           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2389           b_val++;
2390         }
2391       }
2392     }
2393   } else if (type == NORM_INFINITY) {
2394     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2395       for (jb=0; jb<bs; jb++) {
2396         for (ib=0; ib<bs; ib++) {
2397           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2398           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2399           a_val++;
2400         }
2401       }
2402     }
2403     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2404       for (jb=0; jb<bs; jb++) {
2405         for (ib=0; ib<bs; ib++) {
2406           int col = garray[b_aij->j[i]] * bs + jb;
2407           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2408           b_val++;
2409         }
2410       }
2411     }
2412   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
2413   if (type == NORM_INFINITY) {
2414     ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2415   } else {
2416     ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2417   }
2418   ierr = PetscFree(work);CHKERRQ(ierr);
2419   if (type == NORM_2) {
2420     for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]);
2421   }
2422   PetscFunctionReturn(0);
2423 }
2424 
2425 #undef __FUNCT__
2426 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ"
2427 PetscErrorCode  MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2428 {
2429   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2430   PetscErrorCode ierr;
2431 
2432   PetscFunctionBegin;
2433   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2434   PetscFunctionReturn(0);
2435 }
2436 
2437 
2438 /* -------------------------------------------------------------------*/
2439 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2440                                        MatGetRow_MPIBAIJ,
2441                                        MatRestoreRow_MPIBAIJ,
2442                                        MatMult_MPIBAIJ,
2443                                 /* 4*/ MatMultAdd_MPIBAIJ,
2444                                        MatMultTranspose_MPIBAIJ,
2445                                        MatMultTransposeAdd_MPIBAIJ,
2446                                        0,
2447                                        0,
2448                                        0,
2449                                 /*10*/ 0,
2450                                        0,
2451                                        0,
2452                                        MatSOR_MPIBAIJ,
2453                                        MatTranspose_MPIBAIJ,
2454                                 /*15*/ MatGetInfo_MPIBAIJ,
2455                                        MatEqual_MPIBAIJ,
2456                                        MatGetDiagonal_MPIBAIJ,
2457                                        MatDiagonalScale_MPIBAIJ,
2458                                        MatNorm_MPIBAIJ,
2459                                 /*20*/ MatAssemblyBegin_MPIBAIJ,
2460                                        MatAssemblyEnd_MPIBAIJ,
2461                                        MatSetOption_MPIBAIJ,
2462                                        MatZeroEntries_MPIBAIJ,
2463                                 /*24*/ MatZeroRows_MPIBAIJ,
2464                                        0,
2465                                        0,
2466                                        0,
2467                                        0,
2468                                 /*29*/ MatSetUp_MPIBAIJ,
2469                                        0,
2470                                        0,
2471                                        0,
2472                                        0,
2473                                 /*34*/ MatDuplicate_MPIBAIJ,
2474                                        0,
2475                                        0,
2476                                        0,
2477                                        0,
2478                                 /*39*/ MatAXPY_MPIBAIJ,
2479                                        MatGetSubMatrices_MPIBAIJ,
2480                                        MatIncreaseOverlap_MPIBAIJ,
2481                                        MatGetValues_MPIBAIJ,
2482                                        MatCopy_MPIBAIJ,
2483                                 /*44*/ 0,
2484                                        MatScale_MPIBAIJ,
2485                                        0,
2486                                        0,
2487                                        0,
2488                                 /*49*/ 0,
2489                                        0,
2490                                        0,
2491                                        0,
2492                                        0,
2493                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2494                                        0,
2495                                        MatSetUnfactored_MPIBAIJ,
2496                                        MatPermute_MPIBAIJ,
2497                                        MatSetValuesBlocked_MPIBAIJ,
2498                                 /*59*/ MatGetSubMatrix_MPIBAIJ,
2499                                        MatDestroy_MPIBAIJ,
2500                                        MatView_MPIBAIJ,
2501                                        0,
2502                                        0,
2503                                 /*64*/ 0,
2504                                        0,
2505                                        0,
2506                                        0,
2507                                        0,
2508                                 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2509                                        0,
2510                                        0,
2511                                        0,
2512                                        0,
2513                                 /*74*/ 0,
2514                                        MatFDColoringApply_BAIJ,
2515                                        0,
2516                                        0,
2517                                        0,
2518                                 /*79*/ 0,
2519                                        0,
2520                                        0,
2521                                        0,
2522                                        MatLoad_MPIBAIJ,
2523                                 /*84*/ 0,
2524                                        0,
2525                                        0,
2526                                        0,
2527                                        0,
2528                                 /*89*/ 0,
2529                                        0,
2530                                        0,
2531                                        0,
2532                                        0,
2533                                 /*94*/ 0,
2534                                        0,
2535                                        0,
2536                                        0,
2537                                        0,
2538                                 /*99*/ 0,
2539                                        0,
2540                                        0,
2541                                        0,
2542                                        0,
2543                                 /*104*/0,
2544                                        MatRealPart_MPIBAIJ,
2545                                        MatImaginaryPart_MPIBAIJ,
2546                                        0,
2547                                        0,
2548                                 /*109*/0,
2549                                        0,
2550                                        0,
2551                                        0,
2552                                        0,
2553                                 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2554                                        0,
2555                                        MatGetGhosts_MPIBAIJ,
2556                                        0,
2557                                        0,
2558                                 /*119*/0,
2559                                        0,
2560                                        0,
2561                                        0,
2562                                        MatGetMultiProcBlock_MPIBAIJ,
2563                                 /*124*/0,
2564                                        MatGetColumnNorms_MPIBAIJ,
2565                                        MatInvertBlockDiagonal_MPIBAIJ,
2566                                        0,
2567                                        0,
2568                                /*129*/ 0,
2569                                        0,
2570                                        0,
2571                                        0,
2572                                        0,
2573                                /*134*/ 0,
2574                                        0,
2575                                        0,
2576                                        0,
2577                                        0,
2578                                /*139*/ 0,
2579                                        0,
2580                                        0,
2581                                        MatFDColoringSetUp_MPIXAIJ
2582 };
2583 
2584 #undef __FUNCT__
2585 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2586 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2587 {
2588   PetscFunctionBegin;
2589   *a = ((Mat_MPIBAIJ*)A->data)->A;
2590   PetscFunctionReturn(0);
2591 }
2592 
2593 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2594 
2595 #undef __FUNCT__
2596 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2597 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2598 {
2599   PetscInt       m,rstart,cstart,cend;
2600   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2601   const PetscInt *JJ    =0;
2602   PetscScalar    *values=0;
2603   PetscBool      roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2604   PetscErrorCode ierr;
2605 
2606   PetscFunctionBegin;
2607   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2608   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2609   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2610   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2611   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2612   m      = B->rmap->n/bs;
2613   rstart = B->rmap->rstart/bs;
2614   cstart = B->cmap->rstart/bs;
2615   cend   = B->cmap->rend/bs;
2616 
2617   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2618   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2619   for (i=0; i<m; i++) {
2620     nz = ii[i+1] - ii[i];
2621     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2622     nz_max = PetscMax(nz_max,nz);
2623     JJ     = jj + ii[i];
2624     for (j=0; j<nz; j++) {
2625       if (*JJ >= cstart) break;
2626       JJ++;
2627     }
2628     d = 0;
2629     for (; j<nz; j++) {
2630       if (*JJ++ >= cend) break;
2631       d++;
2632     }
2633     d_nnz[i] = d;
2634     o_nnz[i] = nz - d;
2635   }
2636   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2637   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2638 
2639   values = (PetscScalar*)V;
2640   if (!values) {
2641     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2642     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2643   }
2644   for (i=0; i<m; i++) {
2645     PetscInt          row    = i + rstart;
2646     PetscInt          ncols  = ii[i+1] - ii[i];
2647     const PetscInt    *icols = jj + ii[i];
2648     if (!roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2649       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2650       ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2651     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2652       PetscInt j;
2653       for (j=0; j<ncols; j++) {
2654         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2655         ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2656       }
2657     }
2658   }
2659 
2660   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2661   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2662   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2663   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2664   PetscFunctionReturn(0);
2665 }
2666 
2667 #undef __FUNCT__
2668 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2669 /*@C
2670    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2671    (the default parallel PETSc format).
2672 
2673    Collective on MPI_Comm
2674 
2675    Input Parameters:
2676 +  A - the matrix
2677 .  bs - the block size
2678 .  i - the indices into j for the start of each local row (starts with zero)
2679 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2680 -  v - optional values in the matrix
2681 
2682    Level: developer
2683 
2684    Notes: The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
2685    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
2686    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2687    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2688    block column and the second index is over columns within a block.
2689 
2690 .keywords: matrix, aij, compressed row, sparse, parallel
2691 
2692 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ
2693 @*/
2694 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2695 {
2696   PetscErrorCode ierr;
2697 
2698   PetscFunctionBegin;
2699   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2700   PetscValidType(B,1);
2701   PetscValidLogicalCollectiveInt(B,bs,2);
2702   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2703   PetscFunctionReturn(0);
2704 }
2705 
2706 #undef __FUNCT__
2707 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2708 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2709 {
2710   Mat_MPIBAIJ    *b;
2711   PetscErrorCode ierr;
2712   PetscInt       i;
2713 
2714   PetscFunctionBegin;
2715   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2716   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2717   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2718   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2719   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2720 
2721   if (d_nnz) {
2722     for (i=0; i<B->rmap->n/bs; i++) {
2723       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2724     }
2725   }
2726   if (o_nnz) {
2727     for (i=0; i<B->rmap->n/bs; i++) {
2728       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2729     }
2730   }
2731 
2732   b      = (Mat_MPIBAIJ*)B->data;
2733   b->bs2 = bs*bs;
2734   b->mbs = B->rmap->n/bs;
2735   b->nbs = B->cmap->n/bs;
2736   b->Mbs = B->rmap->N/bs;
2737   b->Nbs = B->cmap->N/bs;
2738 
2739   for (i=0; i<=b->size; i++) {
2740     b->rangebs[i] = B->rmap->range[i]/bs;
2741   }
2742   b->rstartbs = B->rmap->rstart/bs;
2743   b->rendbs   = B->rmap->rend/bs;
2744   b->cstartbs = B->cmap->rstart/bs;
2745   b->cendbs   = B->cmap->rend/bs;
2746 
2747   if (!B->preallocated) {
2748     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2749     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2750     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2751     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2752     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2753     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2754     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2755     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2756     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2757   }
2758 
2759   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2760   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2761   B->preallocated = PETSC_TRUE;
2762   PetscFunctionReturn(0);
2763 }
2764 
2765 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2766 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2767 
2768 #undef __FUNCT__
2769 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
2770 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
2771 {
2772   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2773   PetscErrorCode ierr;
2774   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2775   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2776   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2777 
2778   PetscFunctionBegin;
2779   ierr  = PetscMalloc1((M+1),&ii);CHKERRQ(ierr);
2780   ii[0] = 0;
2781   for (i=0; i<M; i++) {
2782     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2783     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
2784     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2785     /* remove one from count of matrix has diagonal */
2786     for (j=id[i]; j<id[i+1]; j++) {
2787       if (jd[j] == i) {ii[i+1]--;break;}
2788     }
2789   }
2790   ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr);
2791   cnt  = 0;
2792   for (i=0; i<M; i++) {
2793     for (j=io[i]; j<io[i+1]; j++) {
2794       if (garray[jo[j]] > rstart) break;
2795       jj[cnt++] = garray[jo[j]];
2796     }
2797     for (k=id[i]; k<id[i+1]; k++) {
2798       if (jd[k] != i) {
2799         jj[cnt++] = rstart + jd[k];
2800       }
2801     }
2802     for (; j<io[i+1]; j++) {
2803       jj[cnt++] = garray[jo[j]];
2804     }
2805   }
2806   ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr);
2807   PetscFunctionReturn(0);
2808 }
2809 
2810 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2811 
2812 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
2813 
2814 #undef __FUNCT__
2815 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ"
2816 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
2817 {
2818   PetscErrorCode ierr;
2819   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2820   Mat            B;
2821   Mat_MPIAIJ     *b;
2822 
2823   PetscFunctionBegin;
2824   if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
2825 
2826   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2827   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2828   ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2829   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
2830   ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
2831   b    = (Mat_MPIAIJ*) B->data;
2832 
2833   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
2834   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2835   ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
2836   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
2837   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
2838   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2839   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2840   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2841   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2842   if (reuse == MAT_REUSE_MATRIX) {
2843     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
2844   } else {
2845    *newmat = B;
2846   }
2847   PetscFunctionReturn(0);
2848 }
2849 
2850 #if defined(PETSC_HAVE_MUMPS)
2851 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
2852 #endif
2853 
2854 /*MC
2855    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2856 
2857    Options Database Keys:
2858 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2859 . -mat_block_size <bs> - set the blocksize used to store the matrix
2860 - -mat_use_hash_table <fact>
2861 
2862   Level: beginner
2863 
2864 .seealso: MatCreateMPIBAIJ
2865 M*/
2866 
2867 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
2868 
2869 #undef __FUNCT__
2870 #define __FUNCT__ "MatCreate_MPIBAIJ"
2871 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2872 {
2873   Mat_MPIBAIJ    *b;
2874   PetscErrorCode ierr;
2875   PetscBool      flg;
2876 
2877   PetscFunctionBegin;
2878   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2879   B->data = (void*)b;
2880 
2881   ierr         = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2882   B->assembled = PETSC_FALSE;
2883 
2884   B->insertmode = NOT_SET_VALUES;
2885   ierr          = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
2886   ierr          = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
2887 
2888   /* build local table of row and column ownerships */
2889   ierr = PetscMalloc1((b->size+1),&b->rangebs);CHKERRQ(ierr);
2890 
2891   /* build cache for off array entries formed */
2892   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
2893 
2894   b->donotstash  = PETSC_FALSE;
2895   b->colmap      = NULL;
2896   b->garray      = NULL;
2897   b->roworiented = PETSC_TRUE;
2898 
2899   /* stuff used in block assembly */
2900   b->barray = 0;
2901 
2902   /* stuff used for matrix vector multiply */
2903   b->lvec  = 0;
2904   b->Mvctx = 0;
2905 
2906   /* stuff for MatGetRow() */
2907   b->rowindices   = 0;
2908   b->rowvalues    = 0;
2909   b->getrowactive = PETSC_FALSE;
2910 
2911   /* hash table stuff */
2912   b->ht           = 0;
2913   b->hd           = 0;
2914   b->ht_size      = 0;
2915   b->ht_flag      = PETSC_FALSE;
2916   b->ht_fact      = 0;
2917   b->ht_total_ct  = 0;
2918   b->ht_insert_ct = 0;
2919 
2920   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
2921   b->ijonly = PETSC_FALSE;
2922 
2923   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
2924   ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr);
2925   if (flg) {
2926     PetscReal fact = 1.39;
2927     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2928     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
2929     if (fact <= 1.0) fact = 1.39;
2930     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2931     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2932   }
2933   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2934 
2935 #if defined(PETSC_HAVE_MUMPS)
2936   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_baij_mumps);CHKERRQ(ierr);
2937 #endif
2938   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
2939   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
2940   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
2941   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2942   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2943   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2944   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2945   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
2946   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
2947   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2948   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr);
2949   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
2950   PetscFunctionReturn(0);
2951 }
2952 
2953 /*MC
2954    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2955 
2956    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2957    and MATMPIBAIJ otherwise.
2958 
2959    Options Database Keys:
2960 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2961 
2962   Level: beginner
2963 
2964 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2965 M*/
2966 
2967 #undef __FUNCT__
2968 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
2969 /*@C
2970    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
2971    (block compressed row).  For good matrix assembly performance
2972    the user should preallocate the matrix storage by setting the parameters
2973    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2974    performance can be increased by more than a factor of 50.
2975 
2976    Collective on Mat
2977 
2978    Input Parameters:
2979 +  A - the matrix
2980 .  bs   - size of block
2981 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2982            submatrix  (same for all local rows)
2983 .  d_nnz - array containing the number of block nonzeros in the various block rows
2984            of the in diagonal portion of the local (possibly different for each block
2985            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
2986            set it even if it is zero.
2987 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2988            submatrix (same for all local rows).
2989 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2990            off-diagonal portion of the local submatrix (possibly different for
2991            each block row) or NULL.
2992 
2993    If the *_nnz parameter is given then the *_nz parameter is ignored
2994 
2995    Options Database Keys:
2996 +   -mat_block_size - size of the blocks to use
2997 -   -mat_use_hash_table <fact>
2998 
2999    Notes:
3000    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3001    than it must be used on all processors that share the object for that argument.
3002 
3003    Storage Information:
3004    For a square global matrix we define each processor's diagonal portion
3005    to be its local rows and the corresponding columns (a square submatrix);
3006    each processor's off-diagonal portion encompasses the remainder of the
3007    local matrix (a rectangular submatrix).
3008 
3009    The user can specify preallocated storage for the diagonal part of
3010    the local submatrix with either d_nz or d_nnz (not both).  Set
3011    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3012    memory allocation.  Likewise, specify preallocated storage for the
3013    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3014 
3015    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3016    the figure below we depict these three local rows and all columns (0-11).
3017 
3018 .vb
3019            0 1 2 3 4 5 6 7 8 9 10 11
3020           --------------------------
3021    row 3  |o o o d d d o o o o  o  o
3022    row 4  |o o o d d d o o o o  o  o
3023    row 5  |o o o d d d o o o o  o  o
3024           --------------------------
3025 .ve
3026 
3027    Thus, any entries in the d locations are stored in the d (diagonal)
3028    submatrix, and any entries in the o locations are stored in the
3029    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3030    stored simply in the MATSEQBAIJ format for compressed row storage.
3031 
3032    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3033    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3034    In general, for PDE problems in which most nonzeros are near the diagonal,
3035    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3036    or you will get TERRIBLE performance; see the users' manual chapter on
3037    matrices.
3038 
3039    You can call MatGetInfo() to get information on how effective the preallocation was;
3040    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3041    You can also run with the option -info and look for messages with the string
3042    malloc in them to see if additional memory allocation was needed.
3043 
3044    Level: intermediate
3045 
3046 .keywords: matrix, block, aij, compressed row, sparse, parallel
3047 
3048 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3049 @*/
3050 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3051 {
3052   PetscErrorCode ierr;
3053 
3054   PetscFunctionBegin;
3055   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3056   PetscValidType(B,1);
3057   PetscValidLogicalCollectiveInt(B,bs,2);
3058   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3059   PetscFunctionReturn(0);
3060 }
3061 
3062 #undef __FUNCT__
3063 #define __FUNCT__ "MatCreateBAIJ"
3064 /*@C
3065    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3066    (block compressed row).  For good matrix assembly performance
3067    the user should preallocate the matrix storage by setting the parameters
3068    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3069    performance can be increased by more than a factor of 50.
3070 
3071    Collective on MPI_Comm
3072 
3073    Input Parameters:
3074 +  comm - MPI communicator
3075 .  bs   - size of blockk
3076 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3077            This value should be the same as the local size used in creating the
3078            y vector for the matrix-vector product y = Ax.
3079 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3080            This value should be the same as the local size used in creating the
3081            x vector for the matrix-vector product y = Ax.
3082 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3083 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3084 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3085            submatrix  (same for all local rows)
3086 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3087            of the in diagonal portion of the local (possibly different for each block
3088            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3089            and set it even if it is zero.
3090 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3091            submatrix (same for all local rows).
3092 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3093            off-diagonal portion of the local submatrix (possibly different for
3094            each block row) or NULL.
3095 
3096    Output Parameter:
3097 .  A - the matrix
3098 
3099    Options Database Keys:
3100 +   -mat_block_size - size of the blocks to use
3101 -   -mat_use_hash_table <fact>
3102 
3103    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3104    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3105    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3106 
3107    Notes:
3108    If the *_nnz parameter is given then the *_nz parameter is ignored
3109 
3110    A nonzero block is any block that as 1 or more nonzeros in it
3111 
3112    The user MUST specify either the local or global matrix dimensions
3113    (possibly both).
3114 
3115    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3116    than it must be used on all processors that share the object for that argument.
3117 
3118    Storage Information:
3119    For a square global matrix we define each processor's diagonal portion
3120    to be its local rows and the corresponding columns (a square submatrix);
3121    each processor's off-diagonal portion encompasses the remainder of the
3122    local matrix (a rectangular submatrix).
3123 
3124    The user can specify preallocated storage for the diagonal part of
3125    the local submatrix with either d_nz or d_nnz (not both).  Set
3126    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3127    memory allocation.  Likewise, specify preallocated storage for the
3128    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3129 
3130    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3131    the figure below we depict these three local rows and all columns (0-11).
3132 
3133 .vb
3134            0 1 2 3 4 5 6 7 8 9 10 11
3135           --------------------------
3136    row 3  |o o o d d d o o o o  o  o
3137    row 4  |o o o d d d o o o o  o  o
3138    row 5  |o o o d d d o o o o  o  o
3139           --------------------------
3140 .ve
3141 
3142    Thus, any entries in the d locations are stored in the d (diagonal)
3143    submatrix, and any entries in the o locations are stored in the
3144    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3145    stored simply in the MATSEQBAIJ format for compressed row storage.
3146 
3147    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3148    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3149    In general, for PDE problems in which most nonzeros are near the diagonal,
3150    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3151    or you will get TERRIBLE performance; see the users' manual chapter on
3152    matrices.
3153 
3154    Level: intermediate
3155 
3156 .keywords: matrix, block, aij, compressed row, sparse, parallel
3157 
3158 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3159 @*/
3160 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3161 {
3162   PetscErrorCode ierr;
3163   PetscMPIInt    size;
3164 
3165   PetscFunctionBegin;
3166   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3167   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3168   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3169   if (size > 1) {
3170     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3171     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3172   } else {
3173     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3174     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3175   }
3176   PetscFunctionReturn(0);
3177 }
3178 
3179 #undef __FUNCT__
3180 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3181 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3182 {
3183   Mat            mat;
3184   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3185   PetscErrorCode ierr;
3186   PetscInt       len=0;
3187 
3188   PetscFunctionBegin;
3189   *newmat = 0;
3190   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3191   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3192   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3193   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3194 
3195   mat->factortype   = matin->factortype;
3196   mat->preallocated = PETSC_TRUE;
3197   mat->assembled    = PETSC_TRUE;
3198   mat->insertmode   = NOT_SET_VALUES;
3199 
3200   a             = (Mat_MPIBAIJ*)mat->data;
3201   mat->rmap->bs = matin->rmap->bs;
3202   a->bs2        = oldmat->bs2;
3203   a->mbs        = oldmat->mbs;
3204   a->nbs        = oldmat->nbs;
3205   a->Mbs        = oldmat->Mbs;
3206   a->Nbs        = oldmat->Nbs;
3207 
3208   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3209   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3210 
3211   a->size         = oldmat->size;
3212   a->rank         = oldmat->rank;
3213   a->donotstash   = oldmat->donotstash;
3214   a->roworiented  = oldmat->roworiented;
3215   a->rowindices   = 0;
3216   a->rowvalues    = 0;
3217   a->getrowactive = PETSC_FALSE;
3218   a->barray       = 0;
3219   a->rstartbs     = oldmat->rstartbs;
3220   a->rendbs       = oldmat->rendbs;
3221   a->cstartbs     = oldmat->cstartbs;
3222   a->cendbs       = oldmat->cendbs;
3223 
3224   /* hash table stuff */
3225   a->ht           = 0;
3226   a->hd           = 0;
3227   a->ht_size      = 0;
3228   a->ht_flag      = oldmat->ht_flag;
3229   a->ht_fact      = oldmat->ht_fact;
3230   a->ht_total_ct  = 0;
3231   a->ht_insert_ct = 0;
3232 
3233   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3234   if (oldmat->colmap) {
3235 #if defined(PETSC_USE_CTABLE)
3236     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3237 #else
3238     ierr = PetscMalloc1((a->Nbs),&a->colmap);CHKERRQ(ierr);
3239     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3240     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3241 #endif
3242   } else a->colmap = 0;
3243 
3244   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3245     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
3246     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3247     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3248   } else a->garray = 0;
3249 
3250   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3251   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3252   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3253   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3254   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3255 
3256   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3257   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3258   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3259   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3260   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3261   *newmat = mat;
3262   PetscFunctionReturn(0);
3263 }
3264 
3265 #undef __FUNCT__
3266 #define __FUNCT__ "MatLoad_MPIBAIJ"
3267 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3268 {
3269   PetscErrorCode ierr;
3270   int            fd;
3271   PetscInt       i,nz,j,rstart,rend;
3272   PetscScalar    *vals,*buf;
3273   MPI_Comm       comm;
3274   MPI_Status     status;
3275   PetscMPIInt    rank,size,maxnz;
3276   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3277   PetscInt       *locrowlens = NULL,*procsnz = NULL,*browners = NULL;
3278   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3279   PetscMPIInt    tag    = ((PetscObject)viewer)->tag;
3280   PetscInt       *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount;
3281   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3282 
3283   PetscFunctionBegin;
3284   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3285   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3286   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3287   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3288 
3289   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3290   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3291   if (!rank) {
3292     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3293     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3294     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3295   }
3296 
3297   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3298 
3299   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3300   M    = header[1]; N = header[2];
3301 
3302   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3303   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3304   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3305 
3306   /* If global sizes are set, check if they are consistent with that given in the file */
3307   if (sizesset) {
3308     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3309   }
3310   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3311   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3312 
3313   if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices");
3314 
3315   /*
3316      This code adds extra rows to make sure the number of rows is
3317      divisible by the blocksize
3318   */
3319   Mbs        = M/bs;
3320   extra_rows = bs - M + bs*Mbs;
3321   if (extra_rows == bs) extra_rows = 0;
3322   else                  Mbs++;
3323   if (extra_rows && !rank) {
3324     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3325   }
3326 
3327   /* determine ownership of all rows */
3328   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3329     mbs = Mbs/size + ((Mbs % size) > rank);
3330     m   = mbs*bs;
3331   } else { /* User set */
3332     m   = newmat->rmap->n;
3333     mbs = m/bs;
3334   }
3335   ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
3336   ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3337 
3338   /* process 0 needs enough room for process with most rows */
3339   if (!rank) {
3340     mmax = rowners[1];
3341     for (i=2; i<=size; i++) {
3342       mmax = PetscMax(mmax,rowners[i]);
3343     }
3344     mmax*=bs;
3345   } else mmax = -1;             /* unused, but compiler warns anyway */
3346 
3347   rowners[0] = 0;
3348   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
3349   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
3350   rstart = rowners[rank];
3351   rend   = rowners[rank+1];
3352 
3353   /* distribute row lengths to all processors */
3354   ierr = PetscMalloc1(m,&locrowlens);CHKERRQ(ierr);
3355   if (!rank) {
3356     mend = m;
3357     if (size == 1) mend = mend - extra_rows;
3358     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3359     for (j=mend; j<m; j++) locrowlens[j] = 1;
3360     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3361     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3362     for (j=0; j<m; j++) {
3363       procsnz[0] += locrowlens[j];
3364     }
3365     for (i=1; i<size; i++) {
3366       mend = browners[i+1] - browners[i];
3367       if (i == size-1) mend = mend - extra_rows;
3368       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3369       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3370       /* calculate the number of nonzeros on each processor */
3371       for (j=0; j<browners[i+1]-browners[i]; j++) {
3372         procsnz[i] += rowlengths[j];
3373       }
3374       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3375     }
3376     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3377   } else {
3378     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3379   }
3380 
3381   if (!rank) {
3382     /* determine max buffer needed and allocate it */
3383     maxnz = procsnz[0];
3384     for (i=1; i<size; i++) {
3385       maxnz = PetscMax(maxnz,procsnz[i]);
3386     }
3387     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3388 
3389     /* read in my part of the matrix column indices  */
3390     nz     = procsnz[0];
3391     ierr   = PetscMalloc1((nz+1),&ibuf);CHKERRQ(ierr);
3392     mycols = ibuf;
3393     if (size == 1) nz -= extra_rows;
3394     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3395     if (size == 1) {
3396       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
3397     }
3398 
3399     /* read in every ones (except the last) and ship off */
3400     for (i=1; i<size-1; i++) {
3401       nz   = procsnz[i];
3402       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3403       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3404     }
3405     /* read in the stuff for the last proc */
3406     if (size != 1) {
3407       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3408       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3409       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3410       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3411     }
3412     ierr = PetscFree(cols);CHKERRQ(ierr);
3413   } else {
3414     /* determine buffer space needed for message */
3415     nz = 0;
3416     for (i=0; i<m; i++) {
3417       nz += locrowlens[i];
3418     }
3419     ierr   = PetscMalloc1((nz+1),&ibuf);CHKERRQ(ierr);
3420     mycols = ibuf;
3421     /* receive message of column indices*/
3422     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3423     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3424     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3425   }
3426 
3427   /* loop over local rows, determining number of off diagonal entries */
3428   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
3429   ierr     = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
3430   rowcount = 0; nzcount = 0;
3431   for (i=0; i<mbs; i++) {
3432     dcount  = 0;
3433     odcount = 0;
3434     for (j=0; j<bs; j++) {
3435       kmax = locrowlens[rowcount];
3436       for (k=0; k<kmax; k++) {
3437         tmp = mycols[nzcount++]/bs;
3438         if (!mask[tmp]) {
3439           mask[tmp] = 1;
3440           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3441           else masked1[dcount++] = tmp;
3442         }
3443       }
3444       rowcount++;
3445     }
3446 
3447     dlens[i]  = dcount;
3448     odlens[i] = odcount;
3449 
3450     /* zero out the mask elements we set */
3451     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3452     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3453   }
3454 
3455 
3456   if (!sizesset) {
3457     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3458   }
3459   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3460 
3461   if (!rank) {
3462     ierr = PetscMalloc1((maxnz+1),&buf);CHKERRQ(ierr);
3463     /* read in my part of the matrix numerical values  */
3464     nz     = procsnz[0];
3465     vals   = buf;
3466     mycols = ibuf;
3467     if (size == 1) nz -= extra_rows;
3468     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3469     if (size == 1) {
3470       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
3471     }
3472 
3473     /* insert into matrix */
3474     jj = rstart*bs;
3475     for (i=0; i<m; i++) {
3476       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3477       mycols += locrowlens[i];
3478       vals   += locrowlens[i];
3479       jj++;
3480     }
3481     /* read in other processors (except the last one) and ship out */
3482     for (i=1; i<size-1; i++) {
3483       nz   = procsnz[i];
3484       vals = buf;
3485       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3486       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3487     }
3488     /* the last proc */
3489     if (size != 1) {
3490       nz   = procsnz[i] - extra_rows;
3491       vals = buf;
3492       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3493       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3494       ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3495     }
3496     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3497   } else {
3498     /* receive numeric values */
3499     ierr = PetscMalloc1((nz+1),&buf);CHKERRQ(ierr);
3500 
3501     /* receive message of values*/
3502     vals   = buf;
3503     mycols = ibuf;
3504     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3505 
3506     /* insert into matrix */
3507     jj = rstart*bs;
3508     for (i=0; i<m; i++) {
3509       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3510       mycols += locrowlens[i];
3511       vals   += locrowlens[i];
3512       jj++;
3513     }
3514   }
3515   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3516   ierr = PetscFree(buf);CHKERRQ(ierr);
3517   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3518   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3519   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3520   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3521   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3522   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3523   PetscFunctionReturn(0);
3524 }
3525 
3526 #undef __FUNCT__
3527 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3528 /*@
3529    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3530 
3531    Input Parameters:
3532 .  mat  - the matrix
3533 .  fact - factor
3534 
3535    Not Collective, each process can use a different factor
3536 
3537    Level: advanced
3538 
3539   Notes:
3540    This can also be set by the command line option: -mat_use_hash_table <fact>
3541 
3542 .keywords: matrix, hashtable, factor, HT
3543 
3544 .seealso: MatSetOption()
3545 @*/
3546 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3547 {
3548   PetscErrorCode ierr;
3549 
3550   PetscFunctionBegin;
3551   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3552   PetscFunctionReturn(0);
3553 }
3554 
3555 #undef __FUNCT__
3556 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3557 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3558 {
3559   Mat_MPIBAIJ *baij;
3560 
3561   PetscFunctionBegin;
3562   baij          = (Mat_MPIBAIJ*)mat->data;
3563   baij->ht_fact = fact;
3564   PetscFunctionReturn(0);
3565 }
3566 
3567 #undef __FUNCT__
3568 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3569 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3570 {
3571   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
3572 
3573   PetscFunctionBegin;
3574   *Ad     = a->A;
3575   *Ao     = a->B;
3576   *colmap = a->garray;
3577   PetscFunctionReturn(0);
3578 }
3579 
3580 /*
3581     Special version for direct calls from Fortran (to eliminate two function call overheads
3582 */
3583 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3584 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3585 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3586 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3587 #endif
3588 
3589 #undef __FUNCT__
3590 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3591 /*@C
3592   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3593 
3594   Collective on Mat
3595 
3596   Input Parameters:
3597 + mat - the matrix
3598 . min - number of input rows
3599 . im - input rows
3600 . nin - number of input columns
3601 . in - input columns
3602 . v - numerical values input
3603 - addvin - INSERT_VALUES or ADD_VALUES
3604 
3605   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3606 
3607   Level: advanced
3608 
3609 .seealso:   MatSetValuesBlocked()
3610 @*/
3611 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3612 {
3613   /* convert input arguments to C version */
3614   Mat        mat  = *matin;
3615   PetscInt   m    = *min, n = *nin;
3616   InsertMode addv = *addvin;
3617 
3618   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3619   const MatScalar *value;
3620   MatScalar       *barray     = baij->barray;
3621   PetscBool       roworiented = baij->roworiented;
3622   PetscErrorCode  ierr;
3623   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3624   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3625   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3626 
3627   PetscFunctionBegin;
3628   /* tasks normally handled by MatSetValuesBlocked() */
3629   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3630 #if defined(PETSC_USE_DEBUG)
3631   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3632   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3633 #endif
3634   if (mat->assembled) {
3635     mat->was_assembled = PETSC_TRUE;
3636     mat->assembled     = PETSC_FALSE;
3637   }
3638   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3639 
3640 
3641   if (!barray) {
3642     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
3643     baij->barray = barray;
3644   }
3645 
3646   if (roworiented) stepval = (n-1)*bs;
3647   else stepval = (m-1)*bs;
3648 
3649   for (i=0; i<m; i++) {
3650     if (im[i] < 0) continue;
3651 #if defined(PETSC_USE_DEBUG)
3652     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3653 #endif
3654     if (im[i] >= rstart && im[i] < rend) {
3655       row = im[i] - rstart;
3656       for (j=0; j<n; j++) {
3657         /* If NumCol = 1 then a copy is not required */
3658         if ((roworiented) && (n == 1)) {
3659           barray = (MatScalar*)v + i*bs2;
3660         } else if ((!roworiented) && (m == 1)) {
3661           barray = (MatScalar*)v + j*bs2;
3662         } else { /* Here a copy is required */
3663           if (roworiented) {
3664             value = v + i*(stepval+bs)*bs + j*bs;
3665           } else {
3666             value = v + j*(stepval+bs)*bs + i*bs;
3667           }
3668           for (ii=0; ii<bs; ii++,value+=stepval) {
3669             for (jj=0; jj<bs; jj++) {
3670               *barray++ = *value++;
3671             }
3672           }
3673           barray -=bs2;
3674         }
3675 
3676         if (in[j] >= cstart && in[j] < cend) {
3677           col  = in[j] - cstart;
3678           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3679         } else if (in[j] < 0) continue;
3680 #if defined(PETSC_USE_DEBUG)
3681         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3682 #endif
3683         else {
3684           if (mat->was_assembled) {
3685             if (!baij->colmap) {
3686               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3687             }
3688 
3689 #if defined(PETSC_USE_DEBUG)
3690 #if defined(PETSC_USE_CTABLE)
3691             { PetscInt data;
3692               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3693               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3694             }
3695 #else
3696             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3697 #endif
3698 #endif
3699 #if defined(PETSC_USE_CTABLE)
3700             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3701             col  = (col - 1)/bs;
3702 #else
3703             col = (baij->colmap[in[j]] - 1)/bs;
3704 #endif
3705             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3706               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3707               col  =  in[j];
3708             }
3709           } else col = in[j];
3710           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3711         }
3712       }
3713     } else {
3714       if (!baij->donotstash) {
3715         if (roworiented) {
3716           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3717         } else {
3718           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3719         }
3720       }
3721     }
3722   }
3723 
3724   /* task normally handled by MatSetValuesBlocked() */
3725   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3726   PetscFunctionReturn(0);
3727 }
3728 
3729 #undef __FUNCT__
3730 #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
3731 /*@
3732      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
3733          CSR format the local rows.
3734 
3735    Collective on MPI_Comm
3736 
3737    Input Parameters:
3738 +  comm - MPI communicator
3739 .  bs - the block size, only a block size of 1 is supported
3740 .  m - number of local rows (Cannot be PETSC_DECIDE)
3741 .  n - This value should be the same as the local size used in creating the
3742        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3743        calculated if N is given) For square matrices n is almost always m.
3744 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3745 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3746 .   i - row indices
3747 .   j - column indices
3748 -   a - matrix values
3749 
3750    Output Parameter:
3751 .   mat - the matrix
3752 
3753    Level: intermediate
3754 
3755    Notes:
3756        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3757      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3758      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3759 
3760      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3761      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3762      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3763      with column-major ordering within blocks.
3764 
3765        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3766 
3767 .keywords: matrix, aij, compressed row, sparse, parallel
3768 
3769 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3770           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3771 @*/
3772 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3773 {
3774   PetscErrorCode ierr;
3775 
3776   PetscFunctionBegin;
3777   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3778   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3779   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3780   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3781   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
3782   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3783   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3784   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3785   PetscFunctionReturn(0);
3786 }
3787