xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision ff1a2e19e92ca3eb486258317b2f290407e2b2d1)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 #include <petscblaslapack.h>
4 
5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 extern PetscErrorCode DisAssemble_MPIBAIJ(Mat);
7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec);
13 
14 #undef __FUNCT__
15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
17 {
18   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
19   PetscErrorCode ierr;
20   PetscInt       i,*idxb = 0;
21   PetscScalar    *va,*vb;
22   Vec            vtmp;
23 
24   PetscFunctionBegin;
25   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
26   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
27   if (idx) {
28     for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;}
29   }
30 
31   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
32   if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
33   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
34   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
35 
36   for (i=0; i<A->rmap->n; i++){
37     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);}
38   }
39 
40   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
41   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
42   ierr = PetscFree(idxb);CHKERRQ(ierr);
43   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
44   PetscFunctionReturn(0);
45 }
46 
47 EXTERN_C_BEGIN
48 #undef __FUNCT__
49 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
50 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
51 {
52   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
53   PetscErrorCode ierr;
54 
55   PetscFunctionBegin;
56   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
57   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
58   PetscFunctionReturn(0);
59 }
60 EXTERN_C_END
61 
62 EXTERN_C_BEGIN
63 #undef __FUNCT__
64 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
65 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
66 {
67   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
68   PetscErrorCode ierr;
69 
70   PetscFunctionBegin;
71   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
72   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
73   PetscFunctionReturn(0);
74 }
75 EXTERN_C_END
76 
77 /*
78      Local utility routine that creates a mapping from the global column
79    number to the local number in the off-diagonal part of the local
80    storage of the matrix.  This is done in a non scalable way since the
81    length of colmap equals the global matrix length.
82 */
83 #undef __FUNCT__
84 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
85 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
86 {
87   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
88   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
89   PetscErrorCode ierr;
90   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
91 
92   PetscFunctionBegin;
93 #if defined (PETSC_USE_CTABLE)
94   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
95   for (i=0; i<nbs; i++){
96     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
97   }
98 #else
99   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
100   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
101   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
102   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
103 #endif
104   PetscFunctionReturn(0);
105 }
106 
107 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
108 { \
109  \
110     brow = row/bs;  \
111     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
112     rmax = aimax[brow]; nrow = ailen[brow]; \
113       bcol = col/bs; \
114       ridx = row % bs; cidx = col % bs; \
115       low = 0; high = nrow; \
116       while (high-low > 3) { \
117         t = (low+high)/2; \
118         if (rp[t] > bcol) high = t; \
119         else              low  = t; \
120       } \
121       for (_i=low; _i<high; _i++) { \
122         if (rp[_i] > bcol) break; \
123         if (rp[_i] == bcol) { \
124           bap  = ap +  bs2*_i + bs*cidx + ridx; \
125           if (addv == ADD_VALUES) *bap += value;  \
126           else                    *bap  = value;  \
127           goto a_noinsert; \
128         } \
129       } \
130       if (a->nonew == 1) goto a_noinsert; \
131       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
132       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
133       N = nrow++ - 1;  \
134       /* shift up all the later entries in this row */ \
135       for (ii=N; ii>=_i; ii--) { \
136         rp[ii+1] = rp[ii]; \
137         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
138       } \
139       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
140       rp[_i]                      = bcol;  \
141       ap[bs2*_i + bs*cidx + ridx] = value;  \
142       a_noinsert:; \
143     ailen[brow] = nrow; \
144 }
145 
146 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
147 { \
148     brow = row/bs;  \
149     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
150     rmax = bimax[brow]; nrow = bilen[brow]; \
151       bcol = col/bs; \
152       ridx = row % bs; cidx = col % bs; \
153       low = 0; high = nrow; \
154       while (high-low > 3) { \
155         t = (low+high)/2; \
156         if (rp[t] > bcol) high = t; \
157         else              low  = t; \
158       } \
159       for (_i=low; _i<high; _i++) { \
160         if (rp[_i] > bcol) break; \
161         if (rp[_i] == bcol) { \
162           bap  = ap +  bs2*_i + bs*cidx + ridx; \
163           if (addv == ADD_VALUES) *bap += value;  \
164           else                    *bap  = value;  \
165           goto b_noinsert; \
166         } \
167       } \
168       if (b->nonew == 1) goto b_noinsert; \
169       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
170       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
171       CHKMEMQ;\
172       N = nrow++ - 1;  \
173       /* shift up all the later entries in this row */ \
174       for (ii=N; ii>=_i; ii--) { \
175         rp[ii+1] = rp[ii]; \
176         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
177       } \
178       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
179       rp[_i]                      = bcol;  \
180       ap[bs2*_i + bs*cidx + ridx] = value;  \
181       b_noinsert:; \
182     bilen[brow] = nrow; \
183 }
184 
185 #undef __FUNCT__
186 #define __FUNCT__ "MatSetValues_MPIBAIJ"
187 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
188 {
189   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
190   MatScalar      value;
191   PetscBool      roworiented = baij->roworiented;
192   PetscErrorCode ierr;
193   PetscInt       i,j,row,col;
194   PetscInt       rstart_orig=mat->rmap->rstart;
195   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
196   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
197 
198   /* Some Variables required in the macro */
199   Mat            A = baij->A;
200   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
201   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
202   MatScalar      *aa=a->a;
203 
204   Mat            B = baij->B;
205   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
206   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
207   MatScalar      *ba=b->a;
208 
209   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
210   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
211   MatScalar      *ap,*bap;
212 
213   PetscFunctionBegin;
214   if (v) PetscValidScalarPointer(v,6);
215   for (i=0; i<m; i++) {
216     if (im[i] < 0) continue;
217 #if defined(PETSC_USE_DEBUG)
218     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
219 #endif
220     if (im[i] >= rstart_orig && im[i] < rend_orig) {
221       row = im[i] - rstart_orig;
222       for (j=0; j<n; j++) {
223         if (in[j] >= cstart_orig && in[j] < cend_orig){
224           col = in[j] - cstart_orig;
225           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
226           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
227           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
228         } else if (in[j] < 0) continue;
229 #if defined(PETSC_USE_DEBUG)
230         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
231 #endif
232         else {
233           if (mat->was_assembled) {
234             if (!baij->colmap) {
235               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
236             }
237 #if defined (PETSC_USE_CTABLE)
238             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
239             col  = col - 1;
240 #else
241             col = baij->colmap[in[j]/bs] - 1;
242 #endif
243             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
244               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
245               col =  in[j];
246               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
247               B = baij->B;
248               b = (Mat_SeqBAIJ*)(B)->data;
249               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
250               ba=b->a;
251             } else col += in[j]%bs;
252           } else col = in[j];
253           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
254           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
255           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
256         }
257       }
258     } else {
259       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
260       if (!baij->donotstash) {
261         mat->assembled = PETSC_FALSE;
262         if (roworiented) {
263           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
264         } else {
265           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
266         }
267       }
268     }
269   }
270   PetscFunctionReturn(0);
271 }
272 
273 #undef __FUNCT__
274 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
275 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
276 {
277   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
278   const PetscScalar *value;
279   MatScalar         *barray=baij->barray;
280   PetscBool         roworiented = baij->roworiented;
281   PetscErrorCode    ierr;
282   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
283   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
284   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
285 
286   PetscFunctionBegin;
287   if(!barray) {
288     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
289     baij->barray = barray;
290   }
291 
292   if (roworiented) {
293     stepval = (n-1)*bs;
294   } else {
295     stepval = (m-1)*bs;
296   }
297   for (i=0; i<m; i++) {
298     if (im[i] < 0) continue;
299 #if defined(PETSC_USE_DEBUG)
300     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
301 #endif
302     if (im[i] >= rstart && im[i] < rend) {
303       row = im[i] - rstart;
304       for (j=0; j<n; j++) {
305         /* If NumCol = 1 then a copy is not required */
306         if ((roworiented) && (n == 1)) {
307           barray = (MatScalar*)v + i*bs2;
308         } else if((!roworiented) && (m == 1)) {
309           barray = (MatScalar*)v + j*bs2;
310         } else { /* Here a copy is required */
311           if (roworiented) {
312             value = v + (i*(stepval+bs) + j)*bs;
313           } else {
314 	    value = v + (j*(stepval+bs) + i)*bs;
315           }
316           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
317             for (jj=0; jj<bs; jj++) {
318               barray[jj]  = value[jj];
319             }
320             barray += bs;
321           }
322           barray -= bs2;
323         }
324 
325         if (in[j] >= cstart && in[j] < cend){
326           col  = in[j] - cstart;
327           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
328         }
329         else if (in[j] < 0) continue;
330 #if defined(PETSC_USE_DEBUG)
331         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
332 #endif
333         else {
334           if (mat->was_assembled) {
335             if (!baij->colmap) {
336               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
337             }
338 
339 #if defined(PETSC_USE_DEBUG)
340 #if defined (PETSC_USE_CTABLE)
341             { PetscInt data;
342               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
343               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
344             }
345 #else
346             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
347 #endif
348 #endif
349 #if defined (PETSC_USE_CTABLE)
350 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
351             col  = (col - 1)/bs;
352 #else
353             col = (baij->colmap[in[j]] - 1)/bs;
354 #endif
355             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
356               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
357               col =  in[j];
358             }
359           }
360           else col = in[j];
361           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
362         }
363       }
364     } else {
365       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
366       if (!baij->donotstash) {
367         if (roworiented) {
368           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
369         } else {
370           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
371         }
372       }
373     }
374   }
375   PetscFunctionReturn(0);
376 }
377 
378 #define HASH_KEY 0.6180339887
379 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
380 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
381 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
382 #undef __FUNCT__
383 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
384 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
385 {
386   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
387   PetscBool      roworiented = baij->roworiented;
388   PetscErrorCode ierr;
389   PetscInt       i,j,row,col;
390   PetscInt       rstart_orig=mat->rmap->rstart;
391   PetscInt       rend_orig=mat->rmap->rend,Nbs=baij->Nbs;
392   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
393   PetscReal      tmp;
394   MatScalar      **HD = baij->hd,value;
395 #if defined(PETSC_USE_DEBUG)
396   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
397 #endif
398 
399   PetscFunctionBegin;
400   if (v) PetscValidScalarPointer(v,6);
401   for (i=0; i<m; i++) {
402 #if defined(PETSC_USE_DEBUG)
403     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
404     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
405 #endif
406       row = im[i];
407     if (row >= rstart_orig && row < rend_orig) {
408       for (j=0; j<n; j++) {
409         col = in[j];
410         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
411         /* Look up PetscInto the Hash Table */
412         key = (row/bs)*Nbs+(col/bs)+1;
413         h1  = HASH(size,key,tmp);
414 
415 
416         idx = h1;
417 #if defined(PETSC_USE_DEBUG)
418         insert_ct++;
419         total_ct++;
420         if (HT[idx] != key) {
421           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
422           if (idx == size) {
423             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
424             if (idx == h1) {
425               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
426             }
427           }
428         }
429 #else
430         if (HT[idx] != key) {
431           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
432           if (idx == size) {
433             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
434             if (idx == h1) {
435               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
436             }
437           }
438         }
439 #endif
440         /* A HASH table entry is found, so insert the values at the correct address */
441         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
442         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
443       }
444     } else {
445       if (!baij->donotstash) {
446         if (roworiented) {
447           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
448         } else {
449           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
450         }
451       }
452     }
453   }
454 #if defined(PETSC_USE_DEBUG)
455   baij->ht_total_ct = total_ct;
456   baij->ht_insert_ct = insert_ct;
457 #endif
458   PetscFunctionReturn(0);
459 }
460 
461 #undef __FUNCT__
462 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
463 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
464 {
465   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
466   PetscBool         roworiented = baij->roworiented;
467   PetscErrorCode    ierr;
468   PetscInt          i,j,ii,jj,row,col;
469   PetscInt          rstart=baij->rstartbs;
470   PetscInt          rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
471   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
472   PetscReal         tmp;
473   MatScalar         **HD = baij->hd,*baij_a;
474   const PetscScalar *v_t,*value;
475 #if defined(PETSC_USE_DEBUG)
476   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
477 #endif
478 
479   PetscFunctionBegin;
480 
481   if (roworiented) {
482     stepval = (n-1)*bs;
483   } else {
484     stepval = (m-1)*bs;
485   }
486   for (i=0; i<m; i++) {
487 #if defined(PETSC_USE_DEBUG)
488     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
489     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
490 #endif
491     row   = im[i];
492     v_t   = v + i*nbs2;
493     if (row >= rstart && row < rend) {
494       for (j=0; j<n; j++) {
495         col = in[j];
496 
497         /* Look up into the Hash Table */
498         key = row*Nbs+col+1;
499         h1  = HASH(size,key,tmp);
500 
501         idx = h1;
502 #if defined(PETSC_USE_DEBUG)
503         total_ct++;
504         insert_ct++;
505        if (HT[idx] != key) {
506           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
507           if (idx == size) {
508             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
509             if (idx == h1) {
510               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
511             }
512           }
513         }
514 #else
515         if (HT[idx] != key) {
516           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
517           if (idx == size) {
518             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
519             if (idx == h1) {
520               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
521             }
522           }
523         }
524 #endif
525         baij_a = HD[idx];
526         if (roworiented) {
527           /*value = v + i*(stepval+bs)*bs + j*bs;*/
528           /* value = v + (i*(stepval+bs)+j)*bs; */
529           value = v_t;
530           v_t  += bs;
531           if (addv == ADD_VALUES) {
532             for (ii=0; ii<bs; ii++,value+=stepval) {
533               for (jj=ii; jj<bs2; jj+=bs) {
534                 baij_a[jj]  += *value++;
535               }
536             }
537           } else {
538             for (ii=0; ii<bs; ii++,value+=stepval) {
539               for (jj=ii; jj<bs2; jj+=bs) {
540                 baij_a[jj]  = *value++;
541               }
542             }
543           }
544         } else {
545           value = v + j*(stepval+bs)*bs + i*bs;
546           if (addv == ADD_VALUES) {
547             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
548               for (jj=0; jj<bs; jj++) {
549                 baij_a[jj]  += *value++;
550               }
551             }
552           } else {
553             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
554               for (jj=0; jj<bs; jj++) {
555                 baij_a[jj]  = *value++;
556               }
557             }
558           }
559         }
560       }
561     } else {
562       if (!baij->donotstash) {
563         if (roworiented) {
564           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
565         } else {
566           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
567         }
568       }
569     }
570   }
571 #if defined(PETSC_USE_DEBUG)
572   baij->ht_total_ct = total_ct;
573   baij->ht_insert_ct = insert_ct;
574 #endif
575   PetscFunctionReturn(0);
576 }
577 
578 #undef __FUNCT__
579 #define __FUNCT__ "MatGetValues_MPIBAIJ"
580 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
581 {
582   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
583   PetscErrorCode ierr;
584   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
585   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
586 
587   PetscFunctionBegin;
588   for (i=0; i<m; i++) {
589     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
590     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
591     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
592       row = idxm[i] - bsrstart;
593       for (j=0; j<n; j++) {
594         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
595         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
596         if (idxn[j] >= bscstart && idxn[j] < bscend){
597           col = idxn[j] - bscstart;
598           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
599         } else {
600           if (!baij->colmap) {
601             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
602           }
603 #if defined (PETSC_USE_CTABLE)
604           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
605           data --;
606 #else
607           data = baij->colmap[idxn[j]/bs]-1;
608 #endif
609           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
610           else {
611             col  = data + idxn[j]%bs;
612             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
613           }
614         }
615       }
616     } else {
617       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
618     }
619   }
620  PetscFunctionReturn(0);
621 }
622 
623 #undef __FUNCT__
624 #define __FUNCT__ "MatNorm_MPIBAIJ"
625 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
626 {
627   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
628   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
629   PetscErrorCode ierr;
630   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
631   PetscReal      sum = 0.0;
632   MatScalar      *v;
633 
634   PetscFunctionBegin;
635   if (baij->size == 1) {
636     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
637   } else {
638     if (type == NORM_FROBENIUS) {
639       v = amat->a;
640       nz = amat->nz*bs2;
641       for (i=0; i<nz; i++) {
642 #if defined(PETSC_USE_COMPLEX)
643         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
644 #else
645         sum += (*v)*(*v); v++;
646 #endif
647       }
648       v = bmat->a;
649       nz = bmat->nz*bs2;
650       for (i=0; i<nz; i++) {
651 #if defined(PETSC_USE_COMPLEX)
652         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
653 #else
654         sum += (*v)*(*v); v++;
655 #endif
656       }
657       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
658       *nrm = PetscSqrtReal(*nrm);
659     } else if (type == NORM_1) { /* max column sum */
660       PetscReal *tmp,*tmp2;
661       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
662       ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
663       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
664       v = amat->a; jj = amat->j;
665       for (i=0; i<amat->nz; i++) {
666         for (j=0; j<bs; j++){
667           col = bs*(cstart + *jj) + j; /* column index */
668           for (row=0; row<bs; row++){
669             tmp[col] += PetscAbsScalar(*v);  v++;
670           }
671         }
672         jj++;
673       }
674       v = bmat->a; jj = bmat->j;
675       for (i=0; i<bmat->nz; i++) {
676         for (j=0; j<bs; j++){
677           col = bs*garray[*jj] + j;
678           for (row=0; row<bs; row++){
679             tmp[col] += PetscAbsScalar(*v); v++;
680           }
681         }
682         jj++;
683       }
684       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
685       *nrm = 0.0;
686       for (j=0; j<mat->cmap->N; j++) {
687         if (tmp2[j] > *nrm) *nrm = tmp2[j];
688       }
689       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
690     } else if (type == NORM_INFINITY) { /* max row sum */
691       PetscReal *sums;
692       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr);
693       sum = 0.0;
694       for (j=0; j<amat->mbs; j++) {
695         for (row=0; row<bs; row++) sums[row] = 0.0;
696         v = amat->a + bs2*amat->i[j];
697         nz = amat->i[j+1]-amat->i[j];
698         for (i=0; i<nz; i++) {
699           for (col=0; col<bs; col++){
700             for (row=0; row<bs; row++){
701               sums[row] += PetscAbsScalar(*v); v++;
702             }
703           }
704         }
705         v = bmat->a + bs2*bmat->i[j];
706         nz = bmat->i[j+1]-bmat->i[j];
707         for (i=0; i<nz; i++) {
708           for (col=0; col<bs; col++){
709             for (row=0; row<bs; row++){
710               sums[row] += PetscAbsScalar(*v); v++;
711             }
712           }
713         }
714         for (row=0; row<bs; row++){
715           if (sums[row] > sum) sum = sums[row];
716         }
717       }
718       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
719       ierr = PetscFree(sums);CHKERRQ(ierr);
720     } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet");
721   }
722   PetscFunctionReturn(0);
723 }
724 
725 /*
726   Creates the hash table, and sets the table
727   This table is created only once.
728   If new entried need to be added to the matrix
729   then the hash table has to be destroyed and
730   recreated.
731 */
732 #undef __FUNCT__
733 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
734 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
735 {
736   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
737   Mat            A = baij->A,B=baij->B;
738   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
739   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
740   PetscErrorCode ierr;
741   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
742   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
743   PetscInt       *HT,key;
744   MatScalar      **HD;
745   PetscReal      tmp;
746 #if defined(PETSC_USE_INFO)
747   PetscInt       ct=0,max=0;
748 #endif
749 
750   PetscFunctionBegin;
751   if (baij->ht) PetscFunctionReturn(0);
752 
753   baij->ht_size = (PetscInt)(factor*nz);
754   ht_size       = baij->ht_size;
755 
756   /* Allocate Memory for Hash Table */
757   ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr);
758   ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr);
759   ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr);
760   HD   = baij->hd;
761   HT   = baij->ht;
762 
763   /* Loop Over A */
764   for (i=0; i<a->mbs; i++) {
765     for (j=ai[i]; j<ai[i+1]; j++) {
766       row = i+rstart;
767       col = aj[j]+cstart;
768 
769       key = row*Nbs + col + 1;
770       h1  = HASH(ht_size,key,tmp);
771       for (k=0; k<ht_size; k++){
772         if (!HT[(h1+k)%ht_size]) {
773           HT[(h1+k)%ht_size] = key;
774           HD[(h1+k)%ht_size] = a->a + j*bs2;
775           break;
776 #if defined(PETSC_USE_INFO)
777         } else {
778           ct++;
779 #endif
780         }
781       }
782 #if defined(PETSC_USE_INFO)
783       if (k> max) max = k;
784 #endif
785     }
786   }
787   /* Loop Over B */
788   for (i=0; i<b->mbs; i++) {
789     for (j=bi[i]; j<bi[i+1]; j++) {
790       row = i+rstart;
791       col = garray[bj[j]];
792       key = row*Nbs + col + 1;
793       h1  = HASH(ht_size,key,tmp);
794       for (k=0; k<ht_size; k++){
795         if (!HT[(h1+k)%ht_size]) {
796           HT[(h1+k)%ht_size] = key;
797           HD[(h1+k)%ht_size] = b->a + j*bs2;
798           break;
799 #if defined(PETSC_USE_INFO)
800         } else {
801           ct++;
802 #endif
803         }
804       }
805 #if defined(PETSC_USE_INFO)
806       if (k> max) max = k;
807 #endif
808     }
809   }
810 
811   /* Print Summary */
812 #if defined(PETSC_USE_INFO)
813   for (i=0,j=0; i<ht_size; i++) {
814     if (HT[i]) {j++;}
815   }
816   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
817 #endif
818   PetscFunctionReturn(0);
819 }
820 
821 #undef __FUNCT__
822 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
823 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
824 {
825   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
826   PetscErrorCode ierr;
827   PetscInt       nstash,reallocs;
828   InsertMode     addv;
829 
830   PetscFunctionBegin;
831   if (baij->donotstash || mat->nooffprocentries) {
832     PetscFunctionReturn(0);
833   }
834 
835   /* make sure all processors are either in INSERTMODE or ADDMODE */
836   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
837   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
838   mat->insertmode = addv; /* in case this processor had no cache */
839 
840   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
841   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
842   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
843   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
844   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
845   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
846   PetscFunctionReturn(0);
847 }
848 
849 #undef __FUNCT__
850 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
851 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
852 {
853   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
854   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
855   PetscErrorCode ierr;
856   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
857   PetscInt       *row,*col;
858   PetscBool      r1,r2,r3,other_disassembled;
859   MatScalar      *val;
860   InsertMode     addv = mat->insertmode;
861   PetscMPIInt    n;
862 
863   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
864   PetscFunctionBegin;
865   if (!baij->donotstash && !mat->nooffprocentries) {
866     while (1) {
867       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
868       if (!flg) break;
869 
870       for (i=0; i<n;) {
871         /* Now identify the consecutive vals belonging to the same row */
872         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
873         if (j < n) ncols = j-i;
874         else       ncols = n-i;
875         /* Now assemble all these values with a single function call */
876         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
877         i = j;
878       }
879     }
880     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
881     /* Now process the block-stash. Since the values are stashed column-oriented,
882        set the roworiented flag to column oriented, and after MatSetValues()
883        restore the original flags */
884     r1 = baij->roworiented;
885     r2 = a->roworiented;
886     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
887     baij->roworiented = PETSC_FALSE;
888     a->roworiented    = PETSC_FALSE;
889     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
890     while (1) {
891       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
892       if (!flg) break;
893 
894       for (i=0; i<n;) {
895         /* Now identify the consecutive vals belonging to the same row */
896         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
897         if (j < n) ncols = j-i;
898         else       ncols = n-i;
899         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
900         i = j;
901       }
902     }
903     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
904     baij->roworiented = r1;
905     a->roworiented    = r2;
906     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
907   }
908 
909   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
910   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
911 
912   /* determine if any processor has disassembled, if so we must
913      also disassemble ourselfs, in order that we may reassemble. */
914   /*
915      if nonzero structure of submatrix B cannot change then we know that
916      no processor disassembled thus we can skip this stuff
917   */
918   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
919     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
920     if (mat->was_assembled && !other_disassembled) {
921       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
922     }
923   }
924 
925   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
926     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
927   }
928   ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
929   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
930   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
931 
932 #if defined(PETSC_USE_INFO)
933   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
934     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
935     baij->ht_total_ct  = 0;
936     baij->ht_insert_ct = 0;
937   }
938 #endif
939   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
940     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
941     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
942     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
943   }
944 
945   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
946   baij->rowvalues = 0;
947   PetscFunctionReturn(0);
948 }
949 
950 #undef __FUNCT__
951 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
952 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
953 {
954   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
955   PetscErrorCode    ierr;
956   PetscMPIInt       size = baij->size,rank = baij->rank;
957   PetscInt          bs = mat->rmap->bs;
958   PetscBool         iascii,isdraw;
959   PetscViewer       sviewer;
960   PetscViewerFormat format;
961 
962   PetscFunctionBegin;
963   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
964   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
965   if (iascii) {
966     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
967     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
968       MatInfo info;
969       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
970       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
971       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
972       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
973              rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
974       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
975       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
976       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
977       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
978       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
979       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
980       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
981       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
982       PetscFunctionReturn(0);
983     } else if (format == PETSC_VIEWER_ASCII_INFO) {
984       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
985       PetscFunctionReturn(0);
986     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
987       PetscFunctionReturn(0);
988     }
989   }
990 
991   if (isdraw) {
992     PetscDraw       draw;
993     PetscBool  isnull;
994     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
995     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
996   }
997 
998   if (size == 1) {
999     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1000     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1001   } else {
1002     /* assemble the entire matrix onto first processor. */
1003     Mat         A;
1004     Mat_SeqBAIJ *Aloc;
1005     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1006     MatScalar   *a;
1007 
1008     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1009     /* Perhaps this should be the type of mat? */
1010     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1011     if (!rank) {
1012       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1013     } else {
1014       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1015     }
1016     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1017     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1018     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1019     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1020 
1021     /* copy over the A part */
1022     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1023     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1024     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1025 
1026     for (i=0; i<mbs; i++) {
1027       rvals[0] = bs*(baij->rstartbs + i);
1028       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1029       for (j=ai[i]; j<ai[i+1]; j++) {
1030         col = (baij->cstartbs+aj[j])*bs;
1031         for (k=0; k<bs; k++) {
1032           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1033           col++; a += bs;
1034         }
1035       }
1036     }
1037     /* copy over the B part */
1038     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1039     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1040     for (i=0; i<mbs; i++) {
1041       rvals[0] = bs*(baij->rstartbs + i);
1042       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1043       for (j=ai[i]; j<ai[i+1]; j++) {
1044         col = baij->garray[aj[j]]*bs;
1045         for (k=0; k<bs; k++) {
1046           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1047           col++; a += bs;
1048         }
1049       }
1050     }
1051     ierr = PetscFree(rvals);CHKERRQ(ierr);
1052     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1053     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1054     /*
1055        Everyone has to call to draw the matrix since the graphics waits are
1056        synchronized across all processors that share the PetscDraw object
1057     */
1058     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1059     if (!rank) {
1060       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1061     /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/
1062       PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ);
1063       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1064     }
1065     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1066     ierr = MatDestroy(&A);CHKERRQ(ierr);
1067   }
1068   PetscFunctionReturn(0);
1069 }
1070 
1071 #undef __FUNCT__
1072 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1073 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1074 {
1075   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1076   Mat_SeqBAIJ*   A = (Mat_SeqBAIJ*)a->A->data;
1077   Mat_SeqBAIJ*   B = (Mat_SeqBAIJ*)a->B->data;
1078   PetscErrorCode ierr;
1079   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1080   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1081   int            fd;
1082   PetscScalar    *column_values;
1083   FILE           *file;
1084   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1085   PetscInt       message_count,flowcontrolcount;
1086 
1087   PetscFunctionBegin;
1088   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1089   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1090   nz   = bs2*(A->nz + B->nz);
1091   rlen = mat->rmap->n;
1092   if (!rank) {
1093     header[0] = MAT_FILE_CLASSID;
1094     header[1] = mat->rmap->N;
1095     header[2] = mat->cmap->N;
1096     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1097     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1098     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1099     /* get largest number of rows any processor has */
1100     range = mat->rmap->range;
1101     for (i=1; i<size; i++) {
1102       rlen = PetscMax(rlen,range[i+1] - range[i]);
1103     }
1104   } else {
1105     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1106   }
1107 
1108   ierr  = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr);
1109   /* compute lengths of each row  */
1110   for (i=0; i<a->mbs; i++) {
1111     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1112   }
1113   /* store the row lengths to the file */
1114   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1115   if (!rank) {
1116     MPI_Status status;
1117     ierr  = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr);
1118     rlen  = (range[1] - range[0])/bs;
1119     for (i=0; i<rlen; i++) {
1120       for (j=0; j<bs; j++) {
1121         row_lens[i*bs+j] = bs*crow_lens[i];
1122       }
1123     }
1124     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1125     for (i=1; i<size; i++) {
1126       rlen = (range[i+1] - range[i])/bs;
1127       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1128       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1129       for (k=0; k<rlen; k++) {
1130 	for (j=0; j<bs; j++) {
1131 	  row_lens[k*bs+j] = bs*crow_lens[k];
1132 	}
1133       }
1134       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1135     }
1136     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1137     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1138   } else {
1139     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1140     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1141     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1142   }
1143   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1144 
1145   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1146      information needed to make it for each row from a block row. This does require more communication but still not more than
1147      the communication needed for the nonzero values  */
1148   nzmax = nz; /*  space a largest processor needs */
1149   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1150   ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1151   cnt  = 0;
1152   for (i=0; i<a->mbs; i++) {
1153     pcnt = cnt;
1154     for (j=B->i[i]; j<B->i[i+1]; j++) {
1155       if ( (col = garray[B->j[j]]) > cstart) break;
1156       for (l=0; l<bs; l++) {
1157 	column_indices[cnt++] = bs*col+l;
1158       }
1159     }
1160     for (k=A->i[i]; k<A->i[i+1]; k++) {
1161       for (l=0; l<bs; l++) {
1162         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1163       }
1164     }
1165     for (; j<B->i[i+1]; j++) {
1166       for (l=0; l<bs; l++) {
1167         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1168       }
1169     }
1170     len = cnt - pcnt;
1171     for (k=1; k<bs; k++) {
1172       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1173       cnt += len;
1174     }
1175   }
1176   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1177 
1178   /* store the columns to the file */
1179   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1180   if (!rank) {
1181     MPI_Status status;
1182     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1183     for (i=1; i<size; i++) {
1184       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1185       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1186       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1187       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1188     }
1189     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1190   } else {
1191     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1192     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1193     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1194     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1195   }
1196   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1197 
1198   /* load up the numerical values */
1199   ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1200   cnt = 0;
1201   for (i=0; i<a->mbs; i++) {
1202     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1203     for (j=B->i[i]; j<B->i[i+1]; j++) {
1204       if ( garray[B->j[j]] > cstart) break;
1205       for (l=0; l<bs; l++) {
1206         for (ll=0; ll<bs; ll++) {
1207 	  column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1208         }
1209       }
1210       cnt += bs;
1211     }
1212     for (k=A->i[i]; k<A->i[i+1]; k++) {
1213       for (l=0; l<bs; l++) {
1214         for (ll=0; ll<bs; ll++) {
1215           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1216         }
1217       }
1218       cnt += bs;
1219     }
1220     for (; j<B->i[i+1]; j++) {
1221       for (l=0; l<bs; l++) {
1222         for (ll=0; ll<bs; ll++) {
1223 	  column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1224         }
1225       }
1226       cnt += bs;
1227     }
1228     cnt += (bs-1)*rlen;
1229   }
1230   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1231 
1232   /* store the column values to the file */
1233   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1234   if (!rank) {
1235     MPI_Status status;
1236     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1237     for (i=1; i<size; i++) {
1238       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1239       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1240       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1241       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1242     }
1243     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1244   } else {
1245     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1246     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1247     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1248     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1249   }
1250   ierr = PetscFree(column_values);CHKERRQ(ierr);
1251 
1252   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1253   if (file) {
1254     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1255   }
1256   PetscFunctionReturn(0);
1257 }
1258 
1259 #undef __FUNCT__
1260 #define __FUNCT__ "MatView_MPIBAIJ"
1261 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1262 {
1263   PetscErrorCode ierr;
1264   PetscBool      iascii,isdraw,issocket,isbinary;
1265 
1266   PetscFunctionBegin;
1267   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1268   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1269   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1270   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1271   if (iascii || isdraw || issocket) {
1272     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1273   } else if (isbinary) {
1274     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1275   } else {
1276     SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1277   }
1278   PetscFunctionReturn(0);
1279 }
1280 
1281 #undef __FUNCT__
1282 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1283 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1284 {
1285   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1286   PetscErrorCode ierr;
1287 
1288   PetscFunctionBegin;
1289 #if defined(PETSC_USE_LOG)
1290   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1291 #endif
1292   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1293   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1294   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1295   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1296 #if defined (PETSC_USE_CTABLE)
1297   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1298 #else
1299   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1300 #endif
1301   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1302   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1303   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1304   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1305   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1306   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1307   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1308   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1309 
1310   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1311   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1312   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1313   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1314   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1315   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1316   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1317   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1318   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1319   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C","",PETSC_NULL);CHKERRQ(ierr);
1320   PetscFunctionReturn(0);
1321 }
1322 
1323 #undef __FUNCT__
1324 #define __FUNCT__ "MatMult_MPIBAIJ"
1325 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1326 {
1327   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1328   PetscErrorCode ierr;
1329   PetscInt       nt;
1330 
1331   PetscFunctionBegin;
1332   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1333   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1334   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1335   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1336   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1337   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1338   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1339   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1340   PetscFunctionReturn(0);
1341 }
1342 
1343 #undef __FUNCT__
1344 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1345 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1346 {
1347   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1348   PetscErrorCode ierr;
1349 
1350   PetscFunctionBegin;
1351   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1352   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1353   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1354   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1355   PetscFunctionReturn(0);
1356 }
1357 
1358 #undef __FUNCT__
1359 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1360 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1361 {
1362   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1363   PetscErrorCode ierr;
1364   PetscBool      merged;
1365 
1366   PetscFunctionBegin;
1367   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1368   /* do nondiagonal part */
1369   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1370   if (!merged) {
1371     /* send it on its way */
1372     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1373     /* do local part */
1374     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1375     /* receive remote parts: note this assumes the values are not actually */
1376     /* inserted in yy until the next line */
1377     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1378   } else {
1379     /* do local part */
1380     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1381     /* send it on its way */
1382     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1383     /* values actually were received in the Begin() but we need to call this nop */
1384     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1385   }
1386   PetscFunctionReturn(0);
1387 }
1388 
1389 #undef __FUNCT__
1390 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1391 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1392 {
1393   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1394   PetscErrorCode ierr;
1395 
1396   PetscFunctionBegin;
1397   /* do nondiagonal part */
1398   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1399   /* send it on its way */
1400   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1401   /* do local part */
1402   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1403   /* receive remote parts: note this assumes the values are not actually */
1404   /* inserted in yy until the next line, which is true for my implementation*/
1405   /* but is not perhaps always true. */
1406   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1407   PetscFunctionReturn(0);
1408 }
1409 
1410 /*
1411   This only works correctly for square matrices where the subblock A->A is the
1412    diagonal block
1413 */
1414 #undef __FUNCT__
1415 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1416 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1417 {
1418   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1419   PetscErrorCode ierr;
1420 
1421   PetscFunctionBegin;
1422   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1423   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1424   PetscFunctionReturn(0);
1425 }
1426 
1427 #undef __FUNCT__
1428 #define __FUNCT__ "MatScale_MPIBAIJ"
1429 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1430 {
1431   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1432   PetscErrorCode ierr;
1433 
1434   PetscFunctionBegin;
1435   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1436   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1437   PetscFunctionReturn(0);
1438 }
1439 
1440 #undef __FUNCT__
1441 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1442 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1443 {
1444   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1445   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1446   PetscErrorCode ierr;
1447   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1448   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1449   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1450 
1451   PetscFunctionBegin;
1452   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1453   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1454   mat->getrowactive = PETSC_TRUE;
1455 
1456   if (!mat->rowvalues && (idx || v)) {
1457     /*
1458         allocate enough space to hold information from the longest row.
1459     */
1460     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1461     PetscInt     max = 1,mbs = mat->mbs,tmp;
1462     for (i=0; i<mbs; i++) {
1463       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1464       if (max < tmp) { max = tmp; }
1465     }
1466     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1467   }
1468   lrow = row - brstart;
1469 
1470   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1471   if (!v)   {pvA = 0; pvB = 0;}
1472   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1473   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1474   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1475   nztot = nzA + nzB;
1476 
1477   cmap  = mat->garray;
1478   if (v  || idx) {
1479     if (nztot) {
1480       /* Sort by increasing column numbers, assuming A and B already sorted */
1481       PetscInt imark = -1;
1482       if (v) {
1483         *v = v_p = mat->rowvalues;
1484         for (i=0; i<nzB; i++) {
1485           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1486           else break;
1487         }
1488         imark = i;
1489         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1490         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1491       }
1492       if (idx) {
1493         *idx = idx_p = mat->rowindices;
1494         if (imark > -1) {
1495           for (i=0; i<imark; i++) {
1496             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1497           }
1498         } else {
1499           for (i=0; i<nzB; i++) {
1500             if (cmap[cworkB[i]/bs] < cstart)
1501               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1502             else break;
1503           }
1504           imark = i;
1505         }
1506         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1507         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1508       }
1509     } else {
1510       if (idx) *idx = 0;
1511       if (v)   *v   = 0;
1512     }
1513   }
1514   *nz = nztot;
1515   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1516   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1517   PetscFunctionReturn(0);
1518 }
1519 
1520 #undef __FUNCT__
1521 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1522 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1523 {
1524   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1525 
1526   PetscFunctionBegin;
1527   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1528   baij->getrowactive = PETSC_FALSE;
1529   PetscFunctionReturn(0);
1530 }
1531 
1532 #undef __FUNCT__
1533 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1534 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1535 {
1536   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1537   PetscErrorCode ierr;
1538 
1539   PetscFunctionBegin;
1540   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1541   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1542   PetscFunctionReturn(0);
1543 }
1544 
1545 #undef __FUNCT__
1546 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1547 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1548 {
1549   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1550   Mat            A = a->A,B = a->B;
1551   PetscErrorCode ierr;
1552   PetscReal      isend[5],irecv[5];
1553 
1554   PetscFunctionBegin;
1555   info->block_size     = (PetscReal)matin->rmap->bs;
1556   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1557   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1558   isend[3] = info->memory;  isend[4] = info->mallocs;
1559   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1560   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1561   isend[3] += info->memory;  isend[4] += info->mallocs;
1562   if (flag == MAT_LOCAL) {
1563     info->nz_used      = isend[0];
1564     info->nz_allocated = isend[1];
1565     info->nz_unneeded  = isend[2];
1566     info->memory       = isend[3];
1567     info->mallocs      = isend[4];
1568   } else if (flag == MAT_GLOBAL_MAX) {
1569     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1570     info->nz_used      = irecv[0];
1571     info->nz_allocated = irecv[1];
1572     info->nz_unneeded  = irecv[2];
1573     info->memory       = irecv[3];
1574     info->mallocs      = irecv[4];
1575   } else if (flag == MAT_GLOBAL_SUM) {
1576     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1577     info->nz_used      = irecv[0];
1578     info->nz_allocated = irecv[1];
1579     info->nz_unneeded  = irecv[2];
1580     info->memory       = irecv[3];
1581     info->mallocs      = irecv[4];
1582   } else {
1583     SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1584   }
1585   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1586   info->fill_ratio_needed = 0;
1587   info->factor_mallocs    = 0;
1588   PetscFunctionReturn(0);
1589 }
1590 
1591 #undef __FUNCT__
1592 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1593 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool  flg)
1594 {
1595   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1596   PetscErrorCode ierr;
1597 
1598   PetscFunctionBegin;
1599   switch (op) {
1600   case MAT_NEW_NONZERO_LOCATIONS:
1601   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1602   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1603   case MAT_KEEP_NONZERO_PATTERN:
1604   case MAT_NEW_NONZERO_LOCATION_ERR:
1605     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1606     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1607     break;
1608   case MAT_ROW_ORIENTED:
1609     a->roworiented = flg;
1610     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1611     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1612     break;
1613   case MAT_NEW_DIAGONALS:
1614     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1615     break;
1616   case MAT_IGNORE_OFF_PROC_ENTRIES:
1617     a->donotstash = flg;
1618     break;
1619   case MAT_USE_HASH_TABLE:
1620     a->ht_flag = flg;
1621     break;
1622   case MAT_SYMMETRIC:
1623   case MAT_STRUCTURALLY_SYMMETRIC:
1624   case MAT_HERMITIAN:
1625   case MAT_SYMMETRY_ETERNAL:
1626     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1627     break;
1628   default:
1629     SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op);
1630   }
1631   PetscFunctionReturn(0);
1632 }
1633 
1634 #undef __FUNCT__
1635 #define __FUNCT__ "MatTranspose_MPIBAIJ"
1636 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1637 {
1638   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1639   Mat_SeqBAIJ    *Aloc;
1640   Mat            B;
1641   PetscErrorCode ierr;
1642   PetscInt       M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1643   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1644   MatScalar      *a;
1645 
1646   PetscFunctionBegin;
1647   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1648   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1649     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1650     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1651     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1652     /* Do not know preallocation information, but must set block size */
1653     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,PETSC_NULL,PETSC_DECIDE,PETSC_NULL);CHKERRQ(ierr);
1654   } else {
1655     B = *matout;
1656   }
1657 
1658   /* copy over the A part */
1659   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1660   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1661   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1662 
1663   for (i=0; i<mbs; i++) {
1664     rvals[0] = bs*(baij->rstartbs + i);
1665     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1666     for (j=ai[i]; j<ai[i+1]; j++) {
1667       col = (baij->cstartbs+aj[j])*bs;
1668       for (k=0; k<bs; k++) {
1669         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1670         col++; a += bs;
1671       }
1672     }
1673   }
1674   /* copy over the B part */
1675   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1676   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1677   for (i=0; i<mbs; i++) {
1678     rvals[0] = bs*(baij->rstartbs + i);
1679     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1680     for (j=ai[i]; j<ai[i+1]; j++) {
1681       col = baij->garray[aj[j]]*bs;
1682       for (k=0; k<bs; k++) {
1683         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1684         col++; a += bs;
1685       }
1686     }
1687   }
1688   ierr = PetscFree(rvals);CHKERRQ(ierr);
1689   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1690   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1691 
1692   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1693     *matout = B;
1694   } else {
1695     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1696   }
1697   PetscFunctionReturn(0);
1698 }
1699 
1700 #undef __FUNCT__
1701 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1702 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1703 {
1704   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1705   Mat            a = baij->A,b = baij->B;
1706   PetscErrorCode ierr;
1707   PetscInt       s1,s2,s3;
1708 
1709   PetscFunctionBegin;
1710   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1711   if (rr) {
1712     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1713     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1714     /* Overlap communication with computation. */
1715     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1716   }
1717   if (ll) {
1718     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1719     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1720     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1721   }
1722   /* scale  the diagonal block */
1723   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1724 
1725   if (rr) {
1726     /* Do a scatter end and then right scale the off-diagonal block */
1727     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1728     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1729   }
1730 
1731   PetscFunctionReturn(0);
1732 }
1733 
1734 #undef __FUNCT__
1735 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1736 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1737 {
1738   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1739   PetscErrorCode    ierr;
1740   PetscMPIInt       imdex,size = l->size,n,rank = l->rank;
1741   PetscInt          i,*owners = A->rmap->range;
1742   PetscInt          *nprocs,j,idx,nsends,row;
1743   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
1744   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1745   PetscInt          *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1746   MPI_Comm          comm = ((PetscObject)A)->comm;
1747   MPI_Request       *send_waits,*recv_waits;
1748   MPI_Status        recv_status,*send_status;
1749   const PetscScalar *xx;
1750   PetscScalar       *bb;
1751 #if defined(PETSC_DEBUG)
1752   PetscBool         found = PETSC_FALSE;
1753 #endif
1754 
1755   PetscFunctionBegin;
1756   /*  first count number of contributors to each processor */
1757   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1758   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1759   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1760   j = 0;
1761   for (i=0; i<N; i++) {
1762     if (lastidx > (idx = rows[i])) j = 0;
1763     lastidx = idx;
1764     for (; j<size; j++) {
1765       if (idx >= owners[j] && idx < owners[j+1]) {
1766         nprocs[2*j]++;
1767         nprocs[2*j+1] = 1;
1768         owner[i] = j;
1769 #if defined(PETSC_DEBUG)
1770         found = PETSC_TRUE;
1771 #endif
1772         break;
1773       }
1774     }
1775 #if defined(PETSC_DEBUG)
1776     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1777     found = PETSC_FALSE;
1778 #endif
1779   }
1780   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1781 
1782   if (A->nooffproczerorows) {
1783     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
1784     nrecvs = nsends;
1785     nmax   = N;
1786   } else {
1787     /* inform other processors of number of messages and max length*/
1788     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1789   }
1790 
1791   /* post receives:   */
1792   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1793   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1794   for (i=0; i<nrecvs; i++) {
1795     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1796   }
1797 
1798   /* do sends:
1799      1) starts[i] gives the starting index in svalues for stuff going to
1800      the ith processor
1801   */
1802   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1803   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1804   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1805   starts[0]  = 0;
1806   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1807   for (i=0; i<N; i++) {
1808     svalues[starts[owner[i]]++] = rows[i];
1809   }
1810 
1811   starts[0] = 0;
1812   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1813   count = 0;
1814   for (i=0; i<size; i++) {
1815     if (nprocs[2*i+1]) {
1816       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1817     }
1818   }
1819   ierr = PetscFree(starts);CHKERRQ(ierr);
1820 
1821   base = owners[rank];
1822 
1823   /*  wait on receives */
1824   ierr   = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr);
1825   count  = nrecvs;
1826   slen = 0;
1827   while (count) {
1828     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1829     /* unpack receives into our local space */
1830     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1831     source[imdex]  = recv_status.MPI_SOURCE;
1832     lens[imdex]    = n;
1833     slen          += n;
1834     count--;
1835   }
1836   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1837 
1838   /* move the data into the send scatter */
1839   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1840   count = 0;
1841   for (i=0; i<nrecvs; i++) {
1842     values = rvalues + i*nmax;
1843     for (j=0; j<lens[i]; j++) {
1844       lrows[count++] = values[j] - base;
1845     }
1846   }
1847   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1848   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1849   ierr = PetscFree(owner);CHKERRQ(ierr);
1850   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1851 
1852   /* fix right hand side if needed */
1853   if (x && b) {
1854     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1855     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1856     for (i=0; i<slen; i++) {
1857       bb[lrows[i]] = diag*xx[lrows[i]];
1858     }
1859     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1860     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1861   }
1862 
1863   /* actually zap the local rows */
1864   /*
1865         Zero the required rows. If the "diagonal block" of the matrix
1866      is square and the user wishes to set the diagonal we use separate
1867      code so that MatSetValues() is not called for each diagonal allocating
1868      new memory, thus calling lots of mallocs and slowing things down.
1869 
1870   */
1871   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1872   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1873   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1874     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
1875   } else if (diag != 0.0) {
1876     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1877     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1878        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1879     for (i=0; i<slen; i++) {
1880       row  = lrows[i] + rstart_bs;
1881       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1882     }
1883     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1884     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1885   } else {
1886     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1887   }
1888 
1889   ierr = PetscFree(lrows);CHKERRQ(ierr);
1890 
1891   /* wait on sends */
1892   if (nsends) {
1893     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1894     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1895     ierr = PetscFree(send_status);CHKERRQ(ierr);
1896   }
1897   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1898   ierr = PetscFree(svalues);CHKERRQ(ierr);
1899 
1900   PetscFunctionReturn(0);
1901 }
1902 
1903 #undef __FUNCT__
1904 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1905 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1906 {
1907   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1908   PetscErrorCode ierr;
1909 
1910   PetscFunctionBegin;
1911   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1912   PetscFunctionReturn(0);
1913 }
1914 
1915 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1916 
1917 #undef __FUNCT__
1918 #define __FUNCT__ "MatEqual_MPIBAIJ"
1919 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1920 {
1921   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1922   Mat            a,b,c,d;
1923   PetscBool      flg;
1924   PetscErrorCode ierr;
1925 
1926   PetscFunctionBegin;
1927   a = matA->A; b = matA->B;
1928   c = matB->A; d = matB->B;
1929 
1930   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1931   if (flg) {
1932     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1933   }
1934   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1935   PetscFunctionReturn(0);
1936 }
1937 
1938 #undef __FUNCT__
1939 #define __FUNCT__ "MatCopy_MPIBAIJ"
1940 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1941 {
1942   PetscErrorCode ierr;
1943   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1944   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1945 
1946   PetscFunctionBegin;
1947   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1948   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1949     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1950   } else {
1951     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1952     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1953   }
1954   PetscFunctionReturn(0);
1955 }
1956 
1957 #undef __FUNCT__
1958 #define __FUNCT__ "MatSetUp_MPIBAIJ"
1959 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1960 {
1961   PetscErrorCode ierr;
1962 
1963   PetscFunctionBegin;
1964   ierr =  MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1965   PetscFunctionReturn(0);
1966 }
1967 
1968 #undef __FUNCT__
1969 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1970 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1971 {
1972   PetscErrorCode ierr;
1973   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1974   PetscBLASInt   bnz,one=1;
1975   Mat_SeqBAIJ    *x,*y;
1976 
1977   PetscFunctionBegin;
1978   if (str == SAME_NONZERO_PATTERN) {
1979     PetscScalar alpha = a;
1980     x = (Mat_SeqBAIJ *)xx->A->data;
1981     y = (Mat_SeqBAIJ *)yy->A->data;
1982     bnz = PetscBLASIntCast(x->nz);
1983     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1984     x = (Mat_SeqBAIJ *)xx->B->data;
1985     y = (Mat_SeqBAIJ *)yy->B->data;
1986     bnz = PetscBLASIntCast(x->nz);
1987     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1988   } else {
1989     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1990   }
1991   PetscFunctionReturn(0);
1992 }
1993 
1994 #undef __FUNCT__
1995 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ"
1996 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs)
1997 {
1998   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1999   PetscInt rbs,cbs;
2000   PetscErrorCode ierr;
2001 
2002   PetscFunctionBegin;
2003   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
2004   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
2005   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
2006   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
2007   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
2008   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
2009   PetscFunctionReturn(0);
2010 }
2011 
2012 #undef __FUNCT__
2013 #define __FUNCT__ "MatRealPart_MPIBAIJ"
2014 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
2015 {
2016   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
2017   PetscErrorCode ierr;
2018 
2019   PetscFunctionBegin;
2020   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2021   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2022   PetscFunctionReturn(0);
2023 }
2024 
2025 #undef __FUNCT__
2026 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
2027 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2028 {
2029   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
2030   PetscErrorCode ierr;
2031 
2032   PetscFunctionBegin;
2033   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2034   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2035   PetscFunctionReturn(0);
2036 }
2037 
2038 #undef __FUNCT__
2039 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
2040 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2041 {
2042   PetscErrorCode ierr;
2043   IS             iscol_local;
2044   PetscInt       csize;
2045 
2046   PetscFunctionBegin;
2047   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2048   if (call == MAT_REUSE_MATRIX) {
2049     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2050     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2051   } else {
2052     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2053   }
2054   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2055   if (call == MAT_INITIAL_MATRIX) {
2056     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2057     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2058   }
2059   PetscFunctionReturn(0);
2060 }
2061 
2062 #undef __FUNCT__
2063 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
2064 /*
2065     Not great since it makes two copies of the submatrix, first an SeqBAIJ
2066   in local and then by concatenating the local matrices the end result.
2067   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
2068 */
2069 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2070 {
2071   PetscErrorCode ierr;
2072   PetscMPIInt    rank,size;
2073   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2074   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2075   Mat            *local,M,Mreuse;
2076   MatScalar      *vwork,*aa;
2077   MPI_Comm       comm = ((PetscObject)mat)->comm;
2078   Mat_SeqBAIJ    *aij;
2079 
2080 
2081   PetscFunctionBegin;
2082   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2083   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2084 
2085   if (call ==  MAT_REUSE_MATRIX) {
2086     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2087     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2088     local = &Mreuse;
2089     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2090   } else {
2091     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2092     Mreuse = *local;
2093     ierr   = PetscFree(local);CHKERRQ(ierr);
2094   }
2095 
2096   /*
2097       m - number of local rows
2098       n - number of columns (same on all processors)
2099       rstart - first row in new global matrix generated
2100   */
2101   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2102   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2103   m    = m/bs;
2104   n    = n/bs;
2105 
2106   if (call == MAT_INITIAL_MATRIX) {
2107     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2108     ii  = aij->i;
2109     jj  = aij->j;
2110 
2111     /*
2112         Determine the number of non-zeros in the diagonal and off-diagonal
2113         portions of the matrix in order to do correct preallocation
2114     */
2115 
2116     /* first get start and end of "diagonal" columns */
2117     if (csize == PETSC_DECIDE) {
2118       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2119       if (mglobal == n*bs) { /* square matrix */
2120 	nlocal = m;
2121       } else {
2122         nlocal = n/size + ((n % size) > rank);
2123       }
2124     } else {
2125       nlocal = csize/bs;
2126     }
2127     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2128     rstart = rend - nlocal;
2129     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2130 
2131     /* next, compute all the lengths */
2132     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2133     olens = dlens + m;
2134     for (i=0; i<m; i++) {
2135       jend = ii[i+1] - ii[i];
2136       olen = 0;
2137       dlen = 0;
2138       for (j=0; j<jend; j++) {
2139         if (*jj < rstart || *jj >= rend) olen++;
2140         else dlen++;
2141         jj++;
2142       }
2143       olens[i] = olen;
2144       dlens[i] = dlen;
2145     }
2146     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2147     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2148     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2149     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2150     ierr = PetscFree(dlens);CHKERRQ(ierr);
2151   } else {
2152     PetscInt ml,nl;
2153 
2154     M = *newmat;
2155     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2156     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2157     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2158     /*
2159          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2160        rather than the slower MatSetValues().
2161     */
2162     M->was_assembled = PETSC_TRUE;
2163     M->assembled     = PETSC_FALSE;
2164   }
2165   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2166   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2167   aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2168   ii  = aij->i;
2169   jj  = aij->j;
2170   aa  = aij->a;
2171   for (i=0; i<m; i++) {
2172     row   = rstart/bs + i;
2173     nz    = ii[i+1] - ii[i];
2174     cwork = jj;     jj += nz;
2175     vwork = aa;     aa += nz;
2176     ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2177   }
2178 
2179   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2180   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2181   *newmat = M;
2182 
2183   /* save submatrix used in processor for next request */
2184   if (call ==  MAT_INITIAL_MATRIX) {
2185     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2186     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2187   }
2188 
2189   PetscFunctionReturn(0);
2190 }
2191 
2192 #undef __FUNCT__
2193 #define __FUNCT__ "MatPermute_MPIBAIJ"
2194 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2195 {
2196   MPI_Comm       comm,pcomm;
2197   PetscInt       first,local_size,nrows;
2198   const PetscInt *rows;
2199   PetscMPIInt    size;
2200   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
2201   PetscErrorCode ierr;
2202 
2203   PetscFunctionBegin;
2204   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2205   /* make a collective version of 'rowp' */
2206   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2207   if (pcomm==comm) {
2208     crowp = rowp;
2209   } else {
2210     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2211     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2212     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2213     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2214   }
2215   /* collect the global row permutation and invert it */
2216   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
2217   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
2218   if (pcomm!=comm) {
2219     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2220   }
2221   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
2222   /* get the local target indices */
2223   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
2224   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
2225   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
2226   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
2227   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
2228   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
2229   /* the column permutation is so much easier;
2230      make a local version of 'colp' and invert it */
2231   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2232   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2233   if (size==1) {
2234     lcolp = colp;
2235   } else {
2236     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
2237     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
2238     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr);
2239   }
2240   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2241   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
2242   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
2243   if (size>1) {
2244     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
2245     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2246   }
2247   /* now we just get the submatrix */
2248   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2249   /* clean up */
2250   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
2251   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
2252   PetscFunctionReturn(0);
2253 }
2254 
2255 #undef __FUNCT__
2256 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2257 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2258 {
2259   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*) mat->data;
2260   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
2261 
2262   PetscFunctionBegin;
2263   if (nghosts) { *nghosts = B->nbs;}
2264   if (ghosts) {*ghosts = baij->garray;}
2265   PetscFunctionReturn(0);
2266 }
2267 
2268 extern PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat);
2269 
2270 #undef __FUNCT__
2271 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ"
2272 /*
2273     This routine is almost identical to MatFDColoringCreate_MPIBAIJ()!
2274 */
2275 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
2276 {
2277   Mat_MPIBAIJ            *baij = (Mat_MPIBAIJ*)mat->data;
2278   PetscErrorCode        ierr;
2279   PetscMPIInt           size,*ncolsonproc,*disp,nn;
2280   PetscInt              bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col;
2281   const PetscInt        *is;
2282   PetscInt              nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj;
2283   PetscInt              *rowhit,M,cstart,cend,colb;
2284   PetscInt              *columnsforrow,l;
2285   IS                    *isa;
2286   PetscBool              done,flg;
2287   ISLocalToGlobalMapping map = mat->cmap->bmapping;
2288   PetscInt               *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype;
2289 
2290   PetscFunctionBegin;
2291   if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();");
2292   if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock");
2293 
2294   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
2295   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2296   M                = mat->rmap->n/bs;
2297   cstart           = mat->cmap->rstart/bs;
2298   cend             = mat->cmap->rend/bs;
2299   c->M             = mat->rmap->N/bs;  /* set the global rows and columns and local rows */
2300   c->N             = mat->cmap->N/bs;
2301   c->m             = mat->rmap->n/bs;
2302   c->rstart        = mat->rmap->rstart/bs;
2303 
2304   c->ncolors       = nis;
2305   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
2306   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
2307   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
2308   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
2309   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
2310   ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
2311 
2312   /* Allow access to data structures of local part of matrix */
2313   if (!baij->colmap) {
2314     ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
2315   }
2316   ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2317   ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2318 
2319   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
2320   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
2321 
2322   for (i=0; i<nis; i++) {
2323     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
2324     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
2325     c->ncolumns[i] = n;
2326     if (n) {
2327       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
2328       ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr);
2329       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
2330     } else {
2331       c->columns[i]  = 0;
2332     }
2333 
2334     if (ctype == IS_COLORING_GLOBAL){
2335       /* Determine the total (parallel) number of columns of this color */
2336       ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
2337       ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr);
2338 
2339       nn   = PetscMPIIntCast(n);
2340       ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2341       nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];}
2342       if (!nctot) {
2343         ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
2344       }
2345 
2346       disp[0] = 0;
2347       for (j=1; j<size; j++) {
2348         disp[j] = disp[j-1] + ncolsonproc[j-1];
2349       }
2350 
2351       /* Get complete list of columns for color on each processor */
2352       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2353       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2354       ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr);
2355     } else if (ctype == IS_COLORING_GHOSTED){
2356       /* Determine local number of columns of this color on this process, including ghost points */
2357       nctot = n;
2358       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2359       ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
2360     } else {
2361       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
2362     }
2363 
2364     /*
2365        Mark all rows affect by these columns
2366     */
2367     /* Temporary option to allow for debugging/testing */
2368     flg  = PETSC_FALSE;
2369     ierr = PetscOptionsGetBool(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr);
2370     if (!flg) {/*-----------------------------------------------------------------------------*/
2371       /* crude, fast version */
2372       ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
2373       /* loop over columns*/
2374       for (j=0; j<nctot; j++) {
2375         if (ctype == IS_COLORING_GHOSTED) {
2376           col = ltog[cols[j]];
2377         } else {
2378           col  = cols[j];
2379         }
2380         if (col >= cstart && col < cend) {
2381           /* column is in diagonal block of matrix */
2382           rows = A_cj + A_ci[col-cstart];
2383           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2384         } else {
2385 #if defined (PETSC_USE_CTABLE)
2386           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2387 	  colb --;
2388 #else
2389           colb = baij->colmap[col] - 1;
2390 #endif
2391           if (colb == -1) {
2392             m = 0;
2393           } else {
2394             colb = colb/bs;
2395             rows = B_cj + B_ci[colb];
2396             m    = B_ci[colb+1] - B_ci[colb];
2397           }
2398         }
2399         /* loop over columns marking them in rowhit */
2400         for (k=0; k<m; k++) {
2401           rowhit[*rows++] = col + 1;
2402         }
2403       }
2404 
2405       /* count the number of hits */
2406       nrows = 0;
2407       for (j=0; j<M; j++) {
2408         if (rowhit[j]) nrows++;
2409       }
2410       c->nrows[i]         = nrows;
2411       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2412       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2413       ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2414       nrows = 0;
2415       for (j=0; j<M; j++) {
2416         if (rowhit[j]) {
2417           c->rows[i][nrows]           = j;
2418           c->columnsforrow[i][nrows] = rowhit[j] - 1;
2419           nrows++;
2420         }
2421       }
2422     } else {/*-------------------------------------------------------------------------------*/
2423       /* slow version, using rowhit as a linked list */
2424       PetscInt currentcol,fm,mfm;
2425       rowhit[M] = M;
2426       nrows     = 0;
2427       /* loop over columns*/
2428       for (j=0; j<nctot; j++) {
2429         if (ctype == IS_COLORING_GHOSTED) {
2430           col = ltog[cols[j]];
2431         } else {
2432           col  = cols[j];
2433         }
2434         if (col >= cstart && col < cend) {
2435           /* column is in diagonal block of matrix */
2436           rows = A_cj + A_ci[col-cstart];
2437           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2438         } else {
2439 #if defined (PETSC_USE_CTABLE)
2440           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2441           colb --;
2442 #else
2443           colb = baij->colmap[col] - 1;
2444 #endif
2445           if (colb == -1) {
2446             m = 0;
2447           } else {
2448             colb = colb/bs;
2449             rows = B_cj + B_ci[colb];
2450             m    = B_ci[colb+1] - B_ci[colb];
2451           }
2452         }
2453 
2454         /* loop over columns marking them in rowhit */
2455         fm    = M; /* fm points to first entry in linked list */
2456         for (k=0; k<m; k++) {
2457           currentcol = *rows++;
2458 	  /* is it already in the list? */
2459           do {
2460             mfm  = fm;
2461             fm   = rowhit[fm];
2462           } while (fm < currentcol);
2463           /* not in list so add it */
2464           if (fm != currentcol) {
2465             nrows++;
2466             columnsforrow[currentcol] = col;
2467             /* next three lines insert new entry into linked list */
2468             rowhit[mfm]               = currentcol;
2469             rowhit[currentcol]        = fm;
2470             fm                        = currentcol;
2471             /* fm points to present position in list since we know the columns are sorted */
2472           } else {
2473             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
2474           }
2475         }
2476       }
2477       c->nrows[i]         = nrows;
2478       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2479       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2480       ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2481       /* now store the linked list of rows into c->rows[i] */
2482       nrows = 0;
2483       fm    = rowhit[M];
2484       do {
2485         c->rows[i][nrows]            = fm;
2486         c->columnsforrow[i][nrows++] = columnsforrow[fm];
2487         fm                           = rowhit[fm];
2488       } while (fm < M);
2489     } /* ---------------------------------------------------------------------------------------*/
2490     ierr = PetscFree(cols);CHKERRQ(ierr);
2491   }
2492 
2493   /* Optimize by adding the vscale, and scaleforrow[][] fields */
2494   /*
2495        vscale will contain the "diagonal" on processor scalings followed by the off processor
2496   */
2497   if (ctype == IS_COLORING_GLOBAL) {
2498     PetscInt *garray;
2499     ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2500     for (i=0; i<baij->B->cmap->n/bs; i++) {
2501       for (j=0; j<bs; j++) {
2502         garray[i*bs+j] = bs*baij->garray[i]+j;
2503       }
2504     }
2505     ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr);
2506     ierr = PetscFree(garray);CHKERRQ(ierr);
2507     CHKMEMQ;
2508     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2509     for (k=0; k<c->ncolors; k++) {
2510       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2511       for (l=0; l<c->nrows[k]; l++) {
2512         col = c->columnsforrow[k][l];
2513         if (col >= cstart && col < cend) {
2514           /* column is in diagonal block of matrix */
2515           colb = col - cstart;
2516         } else {
2517           /* column  is in "off-processor" part */
2518 #if defined (PETSC_USE_CTABLE)
2519           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2520           colb --;
2521 #else
2522           colb = baij->colmap[col] - 1;
2523 #endif
2524           colb = colb/bs;
2525           colb += cend - cstart;
2526         }
2527         c->vscaleforrow[k][l] = colb;
2528       }
2529     }
2530   } else if (ctype == IS_COLORING_GHOSTED) {
2531     /* Get gtol mapping */
2532     PetscInt N = mat->cmap->N, *gtol;
2533     ierr = PetscMalloc((N+1)*sizeof(PetscInt),&gtol);CHKERRQ(ierr);
2534     for (i=0; i<N; i++) gtol[i] = -1;
2535     for (i=0; i<map->n; i++) gtol[ltog[i]] = i;
2536 
2537     c->vscale = 0; /* will be created in MatFDColoringApply() */
2538     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2539     for (k=0; k<c->ncolors; k++) {
2540       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2541       for (l=0; l<c->nrows[k]; l++) {
2542         col = c->columnsforrow[k][l];      /* global column index */
2543         c->vscaleforrow[k][l] = gtol[col]; /* local column index */
2544       }
2545     }
2546     ierr = PetscFree(gtol);CHKERRQ(ierr);
2547   }
2548   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
2549 
2550   ierr = PetscFree(rowhit);CHKERRQ(ierr);
2551   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
2552   ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2553   ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2554     CHKMEMQ;
2555   PetscFunctionReturn(0);
2556 }
2557 
2558 #undef __FUNCT__
2559 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ"
2560 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2561 {
2562   Mat            B;
2563   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
2564   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2565   Mat_SeqAIJ     *b;
2566   PetscErrorCode ierr;
2567   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2568   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2569   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2570 
2571   PetscFunctionBegin;
2572   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2573   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2574 
2575   /* ----------------------------------------------------------------
2576      Tell every processor the number of nonzeros per row
2577   */
2578   ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
2579   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2580     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2581   }
2582   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2583   ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr);
2584   displs     = recvcounts + size;
2585   for (i=0; i<size; i++) {
2586     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2587     displs[i]     = A->rmap->range[i]/bs;
2588   }
2589 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2590   ierr  = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2591 #else
2592   ierr  = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2593 #endif
2594   /* ---------------------------------------------------------------
2595      Create the sequential matrix of the same type as the local block diagonal
2596   */
2597   ierr  = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2598   ierr  = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2599   ierr  = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2600   ierr  = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2601   b = (Mat_SeqAIJ *)B->data;
2602 
2603   /*--------------------------------------------------------------------
2604     Copy my part of matrix column indices over
2605   */
2606   sendcount  = ad->nz + bd->nz;
2607   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2608   a_jsendbuf = ad->j;
2609   b_jsendbuf = bd->j;
2610   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2611   cnt        = 0;
2612   for (i=0; i<n; i++) {
2613 
2614     /* put in lower diagonal portion */
2615     m = bd->i[i+1] - bd->i[i];
2616     while (m > 0) {
2617       /* is it above diagonal (in bd (compressed) numbering) */
2618       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2619       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2620       m--;
2621     }
2622 
2623     /* put in diagonal portion */
2624     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2625       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2626     }
2627 
2628     /* put in upper diagonal portion */
2629     while (m-- > 0) {
2630       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2631     }
2632   }
2633   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2634 
2635   /*--------------------------------------------------------------------
2636     Gather all column indices to all processors
2637   */
2638   for (i=0; i<size; i++) {
2639     recvcounts[i] = 0;
2640     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2641       recvcounts[i] += lens[j];
2642     }
2643   }
2644   displs[0]  = 0;
2645   for (i=1; i<size; i++) {
2646     displs[i] = displs[i-1] + recvcounts[i-1];
2647   }
2648 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2649   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2650 #else
2651   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2652 #endif
2653   /*--------------------------------------------------------------------
2654     Assemble the matrix into useable form (note numerical values not yet set)
2655   */
2656   /* set the b->ilen (length of each row) values */
2657   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2658   /* set the b->i indices */
2659   b->i[0] = 0;
2660   for (i=1; i<=A->rmap->N/bs; i++) {
2661     b->i[i] = b->i[i-1] + lens[i-1];
2662   }
2663   ierr = PetscFree(lens);CHKERRQ(ierr);
2664   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2665   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2666   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2667 
2668   if (A->symmetric){
2669     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2670   } else if (A->hermitian) {
2671     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2672   } else if (A->structurally_symmetric) {
2673     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2674   }
2675   *newmat = B;
2676   PetscFunctionReturn(0);
2677 }
2678 
2679 #undef __FUNCT__
2680 #define __FUNCT__ "MatSOR_MPIBAIJ"
2681 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2682 {
2683   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2684   PetscErrorCode ierr;
2685   Vec            bb1 = 0;
2686 
2687   PetscFunctionBegin;
2688   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2689     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2690   }
2691 
2692   if (flag == SOR_APPLY_UPPER) {
2693     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2694     PetscFunctionReturn(0);
2695   }
2696 
2697   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2698     if (flag & SOR_ZERO_INITIAL_GUESS) {
2699       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2700       its--;
2701     }
2702 
2703     while (its--) {
2704       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2705       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2706 
2707       /* update rhs: bb1 = bb - B*x */
2708       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2709       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2710 
2711       /* local sweep */
2712       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2713     }
2714   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
2715     if (flag & SOR_ZERO_INITIAL_GUESS) {
2716       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2717       its--;
2718     }
2719     while (its--) {
2720       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2721       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2722 
2723       /* update rhs: bb1 = bb - B*x */
2724       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2725       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2726 
2727       /* local sweep */
2728       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2729     }
2730   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
2731     if (flag & SOR_ZERO_INITIAL_GUESS) {
2732       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2733       its--;
2734     }
2735     while (its--) {
2736       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2737       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2738 
2739       /* update rhs: bb1 = bb - B*x */
2740       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2741       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2742 
2743       /* local sweep */
2744       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2745     }
2746   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2747 
2748   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2749   PetscFunctionReturn(0);
2750 }
2751 
2752 extern PetscErrorCode  MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2753 
2754 #undef __FUNCT__
2755 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ"
2756 PetscErrorCode  MatInvertBlockDiagonal_MPIBAIJ(Mat A,PetscScalar **values)
2757 {
2758   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2759   PetscErrorCode ierr;
2760 
2761   PetscFunctionBegin;
2762   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2763   PetscFunctionReturn(0);
2764 }
2765 
2766 
2767 /* -------------------------------------------------------------------*/
2768 static struct _MatOps MatOps_Values = {
2769        MatSetValues_MPIBAIJ,
2770        MatGetRow_MPIBAIJ,
2771        MatRestoreRow_MPIBAIJ,
2772        MatMult_MPIBAIJ,
2773 /* 4*/ MatMultAdd_MPIBAIJ,
2774        MatMultTranspose_MPIBAIJ,
2775        MatMultTransposeAdd_MPIBAIJ,
2776        0,
2777        0,
2778        0,
2779 /*10*/ 0,
2780        0,
2781        0,
2782        MatSOR_MPIBAIJ,
2783        MatTranspose_MPIBAIJ,
2784 /*15*/ MatGetInfo_MPIBAIJ,
2785        MatEqual_MPIBAIJ,
2786        MatGetDiagonal_MPIBAIJ,
2787        MatDiagonalScale_MPIBAIJ,
2788        MatNorm_MPIBAIJ,
2789 /*20*/ MatAssemblyBegin_MPIBAIJ,
2790        MatAssemblyEnd_MPIBAIJ,
2791        MatSetOption_MPIBAIJ,
2792        MatZeroEntries_MPIBAIJ,
2793 /*24*/ MatZeroRows_MPIBAIJ,
2794        0,
2795        0,
2796        0,
2797        0,
2798 /*29*/ MatSetUp_MPIBAIJ,
2799        0,
2800        0,
2801        0,
2802        0,
2803 /*34*/ MatDuplicate_MPIBAIJ,
2804        0,
2805        0,
2806        0,
2807        0,
2808 /*39*/ MatAXPY_MPIBAIJ,
2809        MatGetSubMatrices_MPIBAIJ,
2810        MatIncreaseOverlap_MPIBAIJ,
2811        MatGetValues_MPIBAIJ,
2812        MatCopy_MPIBAIJ,
2813 /*44*/ 0,
2814        MatScale_MPIBAIJ,
2815        0,
2816        0,
2817        0,
2818 /*49*/ MatSetBlockSize_MPIBAIJ,
2819        0,
2820        0,
2821        0,
2822        0,
2823 /*54*/ MatFDColoringCreate_MPIBAIJ,
2824        0,
2825        MatSetUnfactored_MPIBAIJ,
2826        MatPermute_MPIBAIJ,
2827        MatSetValuesBlocked_MPIBAIJ,
2828 /*59*/ MatGetSubMatrix_MPIBAIJ,
2829        MatDestroy_MPIBAIJ,
2830        MatView_MPIBAIJ,
2831        0,
2832        0,
2833 /*64*/ 0,
2834        0,
2835        0,
2836        0,
2837        0,
2838 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2839        0,
2840        0,
2841        0,
2842        0,
2843 /*74*/ 0,
2844        MatFDColoringApply_BAIJ,
2845        0,
2846        0,
2847        0,
2848 /*79*/ 0,
2849        0,
2850        0,
2851        0,
2852        MatLoad_MPIBAIJ,
2853 /*84*/ 0,
2854        0,
2855        0,
2856        0,
2857        0,
2858 /*89*/ 0,
2859        0,
2860        0,
2861        0,
2862        0,
2863 /*94*/ 0,
2864        0,
2865        0,
2866        0,
2867        0,
2868 /*99*/ 0,
2869        0,
2870        0,
2871        0,
2872        0,
2873 /*104*/0,
2874        MatRealPart_MPIBAIJ,
2875        MatImaginaryPart_MPIBAIJ,
2876        0,
2877        0,
2878 /*109*/0,
2879        0,
2880        0,
2881        0,
2882        0,
2883 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2884        0,
2885        MatGetGhosts_MPIBAIJ,
2886        0,
2887        0,
2888 /*119*/0,
2889        0,
2890        0,
2891        0,
2892        0,
2893 /*124*/0,
2894        0,
2895        MatInvertBlockDiagonal_MPIBAIJ
2896 };
2897 
2898 EXTERN_C_BEGIN
2899 #undef __FUNCT__
2900 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2901 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2902 {
2903   PetscFunctionBegin;
2904   *a = ((Mat_MPIBAIJ *)A->data)->A;
2905   PetscFunctionReturn(0);
2906 }
2907 EXTERN_C_END
2908 
2909 EXTERN_C_BEGIN
2910 extern PetscErrorCode  MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2911 EXTERN_C_END
2912 
2913 EXTERN_C_BEGIN
2914 #undef __FUNCT__
2915 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2916 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2917 {
2918   PetscInt       m,rstart,cstart,cend;
2919   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2920   const PetscInt *JJ=0;
2921   PetscScalar    *values=0;
2922   PetscErrorCode ierr;
2923 
2924   PetscFunctionBegin;
2925 
2926   if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2927   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2928   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2929   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2930   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2931   m      = B->rmap->n/bs;
2932   rstart = B->rmap->rstart/bs;
2933   cstart = B->cmap->rstart/bs;
2934   cend   = B->cmap->rend/bs;
2935 
2936   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2937   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
2938   for (i=0; i<m; i++) {
2939     nz = ii[i+1] - ii[i];
2940     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2941     nz_max = PetscMax(nz_max,nz);
2942     JJ  = jj + ii[i];
2943     for (j=0; j<nz; j++) {
2944       if (*JJ >= cstart) break;
2945       JJ++;
2946     }
2947     d = 0;
2948     for (; j<nz; j++) {
2949       if (*JJ++ >= cend) break;
2950       d++;
2951     }
2952     d_nnz[i] = d;
2953     o_nnz[i] = nz - d;
2954   }
2955   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2956   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2957 
2958   values = (PetscScalar*)V;
2959   if (!values) {
2960     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2961     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2962   }
2963   for (i=0; i<m; i++) {
2964     PetscInt          row    = i + rstart;
2965     PetscInt          ncols  = ii[i+1] - ii[i];
2966     const PetscInt    *icols = jj + ii[i];
2967     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2968     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2969   }
2970 
2971   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2972   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2973   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2974   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2975   PetscFunctionReturn(0);
2976 }
2977 EXTERN_C_END
2978 
2979 #undef __FUNCT__
2980 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2981 /*@C
2982    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2983    (the default parallel PETSc format).
2984 
2985    Collective on MPI_Comm
2986 
2987    Input Parameters:
2988 +  A - the matrix
2989 .  bs - the block size
2990 .  i - the indices into j for the start of each local row (starts with zero)
2991 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2992 -  v - optional values in the matrix
2993 
2994    Level: developer
2995 
2996 .keywords: matrix, aij, compressed row, sparse, parallel
2997 
2998 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2999 @*/
3000 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3001 {
3002   PetscErrorCode ierr;
3003 
3004   PetscFunctionBegin;
3005   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3006   PetscValidType(B,1);
3007   PetscValidLogicalCollectiveInt(B,bs,2);
3008   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3009   PetscFunctionReturn(0);
3010 }
3011 
3012 EXTERN_C_BEGIN
3013 #undef __FUNCT__
3014 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
3015 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
3016 {
3017   Mat_MPIBAIJ    *b;
3018   PetscErrorCode ierr;
3019   PetscInt       i, newbs = PetscAbs(bs);
3020   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3021 
3022   PetscFunctionBegin;
3023   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3024   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3025   if (bs < 0) {
3026     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr);
3027       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
3028     ierr = PetscOptionsEnd();CHKERRQ(ierr);
3029     bs   = PetscAbs(bs);
3030   }
3031   if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
3032   bs = newbs;
3033 
3034 
3035   if (bs < 1) SETERRQ(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
3036   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3037   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3038   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3039   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3040 
3041   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3042   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3043   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3044   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3045 
3046   if (d_nnz) {
3047     for (i=0; i<B->rmap->n/bs; i++) {
3048       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
3049     }
3050   }
3051   if (o_nnz) {
3052     for (i=0; i<B->rmap->n/bs; i++) {
3053       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
3054     }
3055   }
3056 
3057   b = (Mat_MPIBAIJ*)B->data;
3058   b->bs2 = bs*bs;
3059   b->mbs = B->rmap->n/bs;
3060   b->nbs = B->cmap->n/bs;
3061   b->Mbs = B->rmap->N/bs;
3062   b->Nbs = B->cmap->N/bs;
3063 
3064   for (i=0; i<=b->size; i++) {
3065     b->rangebs[i] = B->rmap->range[i]/bs;
3066   }
3067   b->rstartbs = B->rmap->rstart/bs;
3068   b->rendbs   = B->rmap->rend/bs;
3069   b->cstartbs = B->cmap->rstart/bs;
3070   b->cendbs   = B->cmap->rend/bs;
3071 
3072   if (!B->preallocated) {
3073     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3074     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3075     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
3076     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3077     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3078     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3079     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
3080     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3081     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
3082   }
3083 
3084   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3085   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
3086   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3087   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3088   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3089   B->preallocated = PETSC_TRUE;
3090   PetscFunctionReturn(0);
3091 }
3092 EXTERN_C_END
3093 
3094 EXTERN_C_BEGIN
3095 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
3096 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
3097 EXTERN_C_END
3098 
3099 
3100 EXTERN_C_BEGIN
3101 #undef __FUNCT__
3102 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
3103 PetscErrorCode  MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj)
3104 {
3105   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
3106   PetscErrorCode ierr;
3107   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
3108   PetscInt       M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
3109   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
3110 
3111   PetscFunctionBegin;
3112   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr);
3113   ii[0] = 0;
3114   CHKMEMQ;
3115   for (i=0; i<M; i++) {
3116     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
3117     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
3118     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
3119     /* remove one from count of matrix has diagonal */
3120     for (j=id[i]; j<id[i+1]; j++) {
3121       if (jd[j] == i) {ii[i+1]--;break;}
3122     }
3123   CHKMEMQ;
3124   }
3125   ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3126   cnt = 0;
3127   for (i=0; i<M; i++) {
3128     for (j=io[i]; j<io[i+1]; j++) {
3129       if (garray[jo[j]] > rstart) break;
3130       jj[cnt++] = garray[jo[j]];
3131   CHKMEMQ;
3132     }
3133     for (k=id[i]; k<id[i+1]; k++) {
3134       if (jd[k] != i) {
3135         jj[cnt++] = rstart + jd[k];
3136   CHKMEMQ;
3137       }
3138     }
3139     for (;j<io[i+1]; j++) {
3140       jj[cnt++] = garray[jo[j]];
3141   CHKMEMQ;
3142     }
3143   }
3144   ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr);
3145   PetscFunctionReturn(0);
3146 }
3147 EXTERN_C_END
3148 
3149 #include <../src/mat/impls/aij/mpi/mpiaij.h>
3150 EXTERN_C_BEGIN
3151 PetscErrorCode  MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*);
3152 EXTERN_C_END
3153 
3154 EXTERN_C_BEGIN
3155 #undef __FUNCT__
3156 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ"
3157 PetscErrorCode  MatConvert_MPIBAIJ_MPIAIJ(Mat A,const MatType newtype,MatReuse reuse,Mat *newmat)
3158 {
3159   PetscErrorCode ierr;
3160   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3161   Mat            B;
3162   Mat_MPIAIJ     *b;
3163 
3164   PetscFunctionBegin;
3165   if (!A->assembled) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Matrix must be assembled");
3166 
3167   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
3168   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3169   ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
3170   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3171   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
3172   b = (Mat_MPIAIJ*) B->data;
3173 
3174   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
3175   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
3176   ierr = DisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
3177   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
3178   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
3179   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3180   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3181   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3182   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3183   if (reuse == MAT_REUSE_MATRIX) {
3184     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
3185   } else {
3186    *newmat = B;
3187   }
3188   PetscFunctionReturn(0);
3189 }
3190 EXTERN_C_END
3191 
3192 EXTERN_C_BEGIN
3193 #if defined(PETSC_HAVE_MUMPS)
3194 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3195 #endif
3196 EXTERN_C_END
3197 
3198 /*MC
3199    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
3200 
3201    Options Database Keys:
3202 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
3203 . -mat_block_size <bs> - set the blocksize used to store the matrix
3204 - -mat_use_hash_table <fact>
3205 
3206   Level: beginner
3207 
3208 .seealso: MatCreateMPIBAIJ
3209 M*/
3210 
3211 EXTERN_C_BEGIN
3212 extern PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,const MatType,MatReuse,Mat*);
3213 EXTERN_C_END
3214 
3215 EXTERN_C_BEGIN
3216 #undef __FUNCT__
3217 #define __FUNCT__ "MatCreate_MPIBAIJ"
3218 PetscErrorCode  MatCreate_MPIBAIJ(Mat B)
3219 {
3220   Mat_MPIBAIJ    *b;
3221   PetscErrorCode ierr;
3222   PetscBool      flg;
3223 
3224   PetscFunctionBegin;
3225   ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
3226   B->data = (void*)b;
3227 
3228   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3229   B->assembled  = PETSC_FALSE;
3230 
3231   B->insertmode = NOT_SET_VALUES;
3232   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
3233   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
3234 
3235   /* build local table of row and column ownerships */
3236   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
3237 
3238   /* build cache for off array entries formed */
3239   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
3240   b->donotstash  = PETSC_FALSE;
3241   b->colmap      = PETSC_NULL;
3242   b->garray      = PETSC_NULL;
3243   b->roworiented = PETSC_TRUE;
3244 
3245   /* stuff used in block assembly */
3246   b->barray       = 0;
3247 
3248   /* stuff used for matrix vector multiply */
3249   b->lvec         = 0;
3250   b->Mvctx        = 0;
3251 
3252   /* stuff for MatGetRow() */
3253   b->rowindices   = 0;
3254   b->rowvalues    = 0;
3255   b->getrowactive = PETSC_FALSE;
3256 
3257   /* hash table stuff */
3258   b->ht           = 0;
3259   b->hd           = 0;
3260   b->ht_size      = 0;
3261   b->ht_flag      = PETSC_FALSE;
3262   b->ht_fact      = 0;
3263   b->ht_total_ct  = 0;
3264   b->ht_insert_ct = 0;
3265 
3266   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
3267   b->ijonly       = PETSC_FALSE;
3268 
3269   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3270     ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3271     if (flg) {
3272       PetscReal fact = 1.39;
3273       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3274       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
3275       if (fact <= 1.0) fact = 1.39;
3276       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3277       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3278     }
3279   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3280 
3281 #if defined(PETSC_HAVE_MUMPS)
3282   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr);
3283 #endif
3284   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",
3285                                      "MatConvert_MPIBAIJ_MPIAdj",
3286                                       MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3287   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",
3288                                      "MatConvert_MPIBAIJ_MPIAIJ",
3289                                       MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3290   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",
3291                                      "MatConvert_MPIBAIJ_MPISBAIJ",
3292                                       MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3293   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3294                                      "MatStoreValues_MPIBAIJ",
3295                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3296   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3297                                      "MatRetrieveValues_MPIBAIJ",
3298                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3299   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3300                                      "MatGetDiagonalBlock_MPIBAIJ",
3301                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3302   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
3303                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
3304                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3305   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
3306 				     "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
3307 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3308   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3309                                      "MatDiagonalScaleLocal_MPIBAIJ",
3310                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3311   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
3312                                      "MatSetHashTableFactor_MPIBAIJ",
3313                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3314   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",
3315                                      "MatConvert_MPIBAIJ_MPIBSTRM",
3316                                       MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr);
3317   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3318   PetscFunctionReturn(0);
3319 }
3320 EXTERN_C_END
3321 
3322 /*MC
3323    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3324 
3325    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3326    and MATMPIBAIJ otherwise.
3327 
3328    Options Database Keys:
3329 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3330 
3331   Level: beginner
3332 
3333 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3334 M*/
3335 
3336 #undef __FUNCT__
3337 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3338 /*@C
3339    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3340    (block compressed row).  For good matrix assembly performance
3341    the user should preallocate the matrix storage by setting the parameters
3342    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3343    performance can be increased by more than a factor of 50.
3344 
3345    Collective on Mat
3346 
3347    Input Parameters:
3348 +  A - the matrix
3349 .  bs   - size of blockk
3350 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3351            submatrix  (same for all local rows)
3352 .  d_nnz - array containing the number of block nonzeros in the various block rows
3353            of the in diagonal portion of the local (possibly different for each block
3354            row) or PETSC_NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3355            set it even if it is zero.
3356 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3357            submatrix (same for all local rows).
3358 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3359            off-diagonal portion of the local submatrix (possibly different for
3360            each block row) or PETSC_NULL.
3361 
3362    If the *_nnz parameter is given then the *_nz parameter is ignored
3363 
3364    Options Database Keys:
3365 +   -mat_block_size - size of the blocks to use
3366 -   -mat_use_hash_table <fact>
3367 
3368    Notes:
3369    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3370    than it must be used on all processors that share the object for that argument.
3371 
3372    Storage Information:
3373    For a square global matrix we define each processor's diagonal portion
3374    to be its local rows and the corresponding columns (a square submatrix);
3375    each processor's off-diagonal portion encompasses the remainder of the
3376    local matrix (a rectangular submatrix).
3377 
3378    The user can specify preallocated storage for the diagonal part of
3379    the local submatrix with either d_nz or d_nnz (not both).  Set
3380    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3381    memory allocation.  Likewise, specify preallocated storage for the
3382    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3383 
3384    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3385    the figure below we depict these three local rows and all columns (0-11).
3386 
3387 .vb
3388            0 1 2 3 4 5 6 7 8 9 10 11
3389           -------------------
3390    row 3  |  o o o d d d o o o o o o
3391    row 4  |  o o o d d d o o o o o o
3392    row 5  |  o o o d d d o o o o o o
3393           -------------------
3394 .ve
3395 
3396    Thus, any entries in the d locations are stored in the d (diagonal)
3397    submatrix, and any entries in the o locations are stored in the
3398    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3399    stored simply in the MATSEQBAIJ format for compressed row storage.
3400 
3401    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3402    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3403    In general, for PDE problems in which most nonzeros are near the diagonal,
3404    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3405    or you will get TERRIBLE performance; see the users' manual chapter on
3406    matrices.
3407 
3408    You can call MatGetInfo() to get information on how effective the preallocation was;
3409    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3410    You can also run with the option -info and look for messages with the string
3411    malloc in them to see if additional memory allocation was needed.
3412 
3413    Level: intermediate
3414 
3415 .keywords: matrix, block, aij, compressed row, sparse, parallel
3416 
3417 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
3418 @*/
3419 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3420 {
3421   PetscErrorCode ierr;
3422 
3423   PetscFunctionBegin;
3424   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3425   PetscValidType(B,1);
3426   PetscValidLogicalCollectiveInt(B,bs,2);
3427   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3428   PetscFunctionReturn(0);
3429 }
3430 
3431 #undef __FUNCT__
3432 #define __FUNCT__ "MatCreateMPIBAIJ"
3433 /*@C
3434    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
3435    (block compressed row).  For good matrix assembly performance
3436    the user should preallocate the matrix storage by setting the parameters
3437    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3438    performance can be increased by more than a factor of 50.
3439 
3440    Collective on MPI_Comm
3441 
3442    Input Parameters:
3443 +  comm - MPI communicator
3444 .  bs   - size of blockk
3445 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3446            This value should be the same as the local size used in creating the
3447            y vector for the matrix-vector product y = Ax.
3448 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3449            This value should be the same as the local size used in creating the
3450            x vector for the matrix-vector product y = Ax.
3451 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3452 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3453 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3454            submatrix  (same for all local rows)
3455 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3456            of the in diagonal portion of the local (possibly different for each block
3457            row) or PETSC_NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3458            and set it even if it is zero.
3459 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3460            submatrix (same for all local rows).
3461 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3462            off-diagonal portion of the local submatrix (possibly different for
3463            each block row) or PETSC_NULL.
3464 
3465    Output Parameter:
3466 .  A - the matrix
3467 
3468    Options Database Keys:
3469 +   -mat_block_size - size of the blocks to use
3470 -   -mat_use_hash_table <fact>
3471 
3472    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3473    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3474    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3475 
3476    Notes:
3477    If the *_nnz parameter is given then the *_nz parameter is ignored
3478 
3479    A nonzero block is any block that as 1 or more nonzeros in it
3480 
3481    The user MUST specify either the local or global matrix dimensions
3482    (possibly both).
3483 
3484    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3485    than it must be used on all processors that share the object for that argument.
3486 
3487    Storage Information:
3488    For a square global matrix we define each processor's diagonal portion
3489    to be its local rows and the corresponding columns (a square submatrix);
3490    each processor's off-diagonal portion encompasses the remainder of the
3491    local matrix (a rectangular submatrix).
3492 
3493    The user can specify preallocated storage for the diagonal part of
3494    the local submatrix with either d_nz or d_nnz (not both).  Set
3495    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3496    memory allocation.  Likewise, specify preallocated storage for the
3497    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3498 
3499    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3500    the figure below we depict these three local rows and all columns (0-11).
3501 
3502 .vb
3503            0 1 2 3 4 5 6 7 8 9 10 11
3504           -------------------
3505    row 3  |  o o o d d d o o o o o o
3506    row 4  |  o o o d d d o o o o o o
3507    row 5  |  o o o d d d o o o o o o
3508           -------------------
3509 .ve
3510 
3511    Thus, any entries in the d locations are stored in the d (diagonal)
3512    submatrix, and any entries in the o locations are stored in the
3513    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3514    stored simply in the MATSEQBAIJ format for compressed row storage.
3515 
3516    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3517    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3518    In general, for PDE problems in which most nonzeros are near the diagonal,
3519    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3520    or you will get TERRIBLE performance; see the users' manual chapter on
3521    matrices.
3522 
3523    Level: intermediate
3524 
3525 .keywords: matrix, block, aij, compressed row, sparse, parallel
3526 
3527 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3528 @*/
3529 PetscErrorCode  MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3530 {
3531   PetscErrorCode ierr;
3532   PetscMPIInt    size;
3533 
3534   PetscFunctionBegin;
3535   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3536   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3537   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3538   if (size > 1) {
3539     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3540     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3541   } else {
3542     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3543     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3544   }
3545   PetscFunctionReturn(0);
3546 }
3547 
3548 #undef __FUNCT__
3549 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3550 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3551 {
3552   Mat            mat;
3553   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3554   PetscErrorCode ierr;
3555   PetscInt       len=0;
3556 
3557   PetscFunctionBegin;
3558   *newmat       = 0;
3559   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3560   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3561   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3562   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3563 
3564   mat->factortype   = matin->factortype;
3565   mat->preallocated = PETSC_TRUE;
3566   mat->assembled    = PETSC_TRUE;
3567   mat->insertmode   = NOT_SET_VALUES;
3568 
3569   a      = (Mat_MPIBAIJ*)mat->data;
3570   mat->rmap->bs  = matin->rmap->bs;
3571   a->bs2   = oldmat->bs2;
3572   a->mbs   = oldmat->mbs;
3573   a->nbs   = oldmat->nbs;
3574   a->Mbs   = oldmat->Mbs;
3575   a->Nbs   = oldmat->Nbs;
3576 
3577   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3578   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3579 
3580   a->size         = oldmat->size;
3581   a->rank         = oldmat->rank;
3582   a->donotstash   = oldmat->donotstash;
3583   a->roworiented  = oldmat->roworiented;
3584   a->rowindices   = 0;
3585   a->rowvalues    = 0;
3586   a->getrowactive = PETSC_FALSE;
3587   a->barray       = 0;
3588   a->rstartbs     = oldmat->rstartbs;
3589   a->rendbs       = oldmat->rendbs;
3590   a->cstartbs     = oldmat->cstartbs;
3591   a->cendbs       = oldmat->cendbs;
3592 
3593   /* hash table stuff */
3594   a->ht           = 0;
3595   a->hd           = 0;
3596   a->ht_size      = 0;
3597   a->ht_flag      = oldmat->ht_flag;
3598   a->ht_fact      = oldmat->ht_fact;
3599   a->ht_total_ct  = 0;
3600   a->ht_insert_ct = 0;
3601 
3602   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3603   if (oldmat->colmap) {
3604 #if defined (PETSC_USE_CTABLE)
3605   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3606 #else
3607   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3608   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3609   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3610 #endif
3611   } else a->colmap = 0;
3612 
3613   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3614     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3615     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3616     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3617   } else a->garray = 0;
3618 
3619   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3620   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3621   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3622   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3623   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3624 
3625   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3626   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3627   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3628   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3629   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3630   *newmat = mat;
3631 
3632   PetscFunctionReturn(0);
3633 }
3634 
3635 #undef __FUNCT__
3636 #define __FUNCT__ "MatLoad_MPIBAIJ"
3637 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3638 {
3639   PetscErrorCode ierr;
3640   int            fd;
3641   PetscInt       i,nz,j,rstart,rend;
3642   PetscScalar    *vals,*buf;
3643   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3644   MPI_Status     status;
3645   PetscMPIInt    rank,size,maxnz;
3646   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3647   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
3648   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3649   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
3650   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
3651   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3652 
3653   PetscFunctionBegin;
3654   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3655     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3656   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3657 
3658   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3659   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3660   if (!rank) {
3661     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3662     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3663     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3664   }
3665 
3666   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3667 
3668   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3669   M = header[1]; N = header[2];
3670 
3671   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3672   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3673   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3674 
3675   /* If global sizes are set, check if they are consistent with that given in the file */
3676   if (sizesset) {
3677     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3678   }
3679   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3680   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3681 
3682   if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices");
3683 
3684   /*
3685      This code adds extra rows to make sure the number of rows is
3686      divisible by the blocksize
3687   */
3688   Mbs        = M/bs;
3689   extra_rows = bs - M + bs*Mbs;
3690   if (extra_rows == bs) extra_rows = 0;
3691   else                  Mbs++;
3692   if (extra_rows && !rank) {
3693     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3694   }
3695 
3696   /* determine ownership of all rows */
3697   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3698     mbs        = Mbs/size + ((Mbs % size) > rank);
3699     m          = mbs*bs;
3700   } else { /* User set */
3701     m          = newmat->rmap->n;
3702     mbs        = m/bs;
3703   }
3704   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
3705   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3706 
3707   /* process 0 needs enough room for process with most rows */
3708   if (!rank) {
3709     mmax = rowners[1];
3710     for (i=2; i<size; i++) {
3711       mmax = PetscMax(mmax,rowners[i]);
3712     }
3713     mmax*=bs;
3714   } else mmax = m;
3715 
3716   rowners[0] = 0;
3717   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
3718   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
3719   rstart = rowners[rank];
3720   rend   = rowners[rank+1];
3721 
3722   /* distribute row lengths to all processors */
3723   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
3724   if (!rank) {
3725     mend = m;
3726     if (size == 1) mend = mend - extra_rows;
3727     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3728     for (j=mend; j<m; j++) locrowlens[j] = 1;
3729     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3730     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3731     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3732     for (j=0; j<m; j++) {
3733       procsnz[0] += locrowlens[j];
3734     }
3735     for (i=1; i<size; i++) {
3736       mend = browners[i+1] - browners[i];
3737       if (i == size-1) mend = mend - extra_rows;
3738       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3739       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3740       /* calculate the number of nonzeros on each processor */
3741       for (j=0; j<browners[i+1]-browners[i]; j++) {
3742         procsnz[i] += rowlengths[j];
3743       }
3744       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3745     }
3746     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3747   } else {
3748     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3749   }
3750 
3751   if (!rank) {
3752     /* determine max buffer needed and allocate it */
3753     maxnz = procsnz[0];
3754     for (i=1; i<size; i++) {
3755       maxnz = PetscMax(maxnz,procsnz[i]);
3756     }
3757     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3758 
3759     /* read in my part of the matrix column indices  */
3760     nz     = procsnz[0];
3761     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3762     mycols = ibuf;
3763     if (size == 1)  nz -= extra_rows;
3764     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3765     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
3766 
3767     /* read in every ones (except the last) and ship off */
3768     for (i=1; i<size-1; i++) {
3769       nz   = procsnz[i];
3770       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3771       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3772     }
3773     /* read in the stuff for the last proc */
3774     if (size != 1) {
3775       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3776       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3777       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3778       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3779     }
3780     ierr = PetscFree(cols);CHKERRQ(ierr);
3781   } else {
3782     /* determine buffer space needed for message */
3783     nz = 0;
3784     for (i=0; i<m; i++) {
3785       nz += locrowlens[i];
3786     }
3787     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3788     mycols = ibuf;
3789     /* receive message of column indices*/
3790     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3791     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3792     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3793   }
3794 
3795   /* loop over local rows, determining number of off diagonal entries */
3796   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
3797   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
3798   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3799   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3800   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3801   rowcount = 0; nzcount = 0;
3802   for (i=0; i<mbs; i++) {
3803     dcount  = 0;
3804     odcount = 0;
3805     for (j=0; j<bs; j++) {
3806       kmax = locrowlens[rowcount];
3807       for (k=0; k<kmax; k++) {
3808         tmp = mycols[nzcount++]/bs;
3809         if (!mask[tmp]) {
3810           mask[tmp] = 1;
3811           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3812           else masked1[dcount++] = tmp;
3813         }
3814       }
3815       rowcount++;
3816     }
3817 
3818     dlens[i]  = dcount;
3819     odlens[i] = odcount;
3820 
3821     /* zero out the mask elements we set */
3822     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3823     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3824   }
3825 
3826 
3827   if (!sizesset) {
3828     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3829   }
3830   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3831 
3832   if (!rank) {
3833     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3834     /* read in my part of the matrix numerical values  */
3835     nz = procsnz[0];
3836     vals = buf;
3837     mycols = ibuf;
3838     if (size == 1)  nz -= extra_rows;
3839     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3840     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
3841 
3842     /* insert into matrix */
3843     jj      = rstart*bs;
3844     for (i=0; i<m; i++) {
3845       ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3846       mycols += locrowlens[i];
3847       vals   += locrowlens[i];
3848       jj++;
3849     }
3850     /* read in other processors (except the last one) and ship out */
3851     for (i=1; i<size-1; i++) {
3852       nz   = procsnz[i];
3853       vals = buf;
3854       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3855       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3856     }
3857     /* the last proc */
3858     if (size != 1){
3859       nz   = procsnz[i] - extra_rows;
3860       vals = buf;
3861       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3862       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3863       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3864     }
3865     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3866   } else {
3867     /* receive numeric values */
3868     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3869 
3870     /* receive message of values*/
3871     vals   = buf;
3872     mycols = ibuf;
3873     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
3874     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
3875     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3876 
3877     /* insert into matrix */
3878     jj      = rstart*bs;
3879     for (i=0; i<m; i++) {
3880       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3881       mycols += locrowlens[i];
3882       vals   += locrowlens[i];
3883       jj++;
3884     }
3885   }
3886   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3887   ierr = PetscFree(buf);CHKERRQ(ierr);
3888   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3889   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3890   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3891   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3892   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3893   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3894 
3895   PetscFunctionReturn(0);
3896 }
3897 
3898 #undef __FUNCT__
3899 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3900 /*@
3901    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3902 
3903    Input Parameters:
3904 .  mat  - the matrix
3905 .  fact - factor
3906 
3907    Not Collective, each process can use a different factor
3908 
3909    Level: advanced
3910 
3911   Notes:
3912    This can also be set by the command line option: -mat_use_hash_table <fact>
3913 
3914 .keywords: matrix, hashtable, factor, HT
3915 
3916 .seealso: MatSetOption()
3917 @*/
3918 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3919 {
3920   PetscErrorCode ierr;
3921 
3922   PetscFunctionBegin;
3923   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3924   PetscFunctionReturn(0);
3925 }
3926 
3927 EXTERN_C_BEGIN
3928 #undef __FUNCT__
3929 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3930 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3931 {
3932   Mat_MPIBAIJ *baij;
3933 
3934   PetscFunctionBegin;
3935   baij = (Mat_MPIBAIJ*)mat->data;
3936   baij->ht_fact = fact;
3937   PetscFunctionReturn(0);
3938 }
3939 EXTERN_C_END
3940 
3941 #undef __FUNCT__
3942 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3943 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3944 {
3945   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3946   PetscFunctionBegin;
3947   *Ad     = a->A;
3948   *Ao     = a->B;
3949   *colmap = a->garray;
3950   PetscFunctionReturn(0);
3951 }
3952 
3953 /*
3954     Special version for direct calls from Fortran (to eliminate two function call overheads
3955 */
3956 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3957 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3958 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3959 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3960 #endif
3961 
3962 #undef __FUNCT__
3963 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3964 /*@C
3965   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3966 
3967   Collective on Mat
3968 
3969   Input Parameters:
3970 + mat - the matrix
3971 . min - number of input rows
3972 . im - input rows
3973 . nin - number of input columns
3974 . in - input columns
3975 . v - numerical values input
3976 - addvin - INSERT_VALUES or ADD_VALUES
3977 
3978   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3979 
3980   Level: advanced
3981 
3982 .seealso:   MatSetValuesBlocked()
3983 @*/
3984 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3985 {
3986   /* convert input arguments to C version */
3987   Mat             mat = *matin;
3988   PetscInt        m = *min, n = *nin;
3989   InsertMode      addv = *addvin;
3990 
3991   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3992   const MatScalar *value;
3993   MatScalar       *barray=baij->barray;
3994   PetscBool       roworiented = baij->roworiented;
3995   PetscErrorCode  ierr;
3996   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3997   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3998   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3999 
4000   PetscFunctionBegin;
4001   /* tasks normally handled by MatSetValuesBlocked() */
4002   if (mat->insertmode == NOT_SET_VALUES) {
4003     mat->insertmode = addv;
4004   }
4005 #if defined(PETSC_USE_DEBUG)
4006   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
4007   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
4008 #endif
4009   if (mat->assembled) {
4010     mat->was_assembled = PETSC_TRUE;
4011     mat->assembled     = PETSC_FALSE;
4012   }
4013   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
4014 
4015 
4016   if(!barray) {
4017     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
4018     baij->barray = barray;
4019   }
4020 
4021   if (roworiented) {
4022     stepval = (n-1)*bs;
4023   } else {
4024     stepval = (m-1)*bs;
4025   }
4026   for (i=0; i<m; i++) {
4027     if (im[i] < 0) continue;
4028 #if defined(PETSC_USE_DEBUG)
4029     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
4030 #endif
4031     if (im[i] >= rstart && im[i] < rend) {
4032       row = im[i] - rstart;
4033       for (j=0; j<n; j++) {
4034         /* If NumCol = 1 then a copy is not required */
4035         if ((roworiented) && (n == 1)) {
4036           barray = (MatScalar*)v + i*bs2;
4037         } else if((!roworiented) && (m == 1)) {
4038           barray = (MatScalar*)v + j*bs2;
4039         } else { /* Here a copy is required */
4040           if (roworiented) {
4041             value = v + i*(stepval+bs)*bs + j*bs;
4042           } else {
4043             value = v + j*(stepval+bs)*bs + i*bs;
4044           }
4045           for (ii=0; ii<bs; ii++,value+=stepval) {
4046             for (jj=0; jj<bs; jj++) {
4047               *barray++  = *value++;
4048             }
4049           }
4050           barray -=bs2;
4051         }
4052 
4053         if (in[j] >= cstart && in[j] < cend){
4054           col  = in[j] - cstart;
4055           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4056         }
4057         else if (in[j] < 0) continue;
4058 #if defined(PETSC_USE_DEBUG)
4059         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
4060 #endif
4061         else {
4062           if (mat->was_assembled) {
4063             if (!baij->colmap) {
4064               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
4065             }
4066 
4067 #if defined(PETSC_USE_DEBUG)
4068 #if defined (PETSC_USE_CTABLE)
4069             { PetscInt data;
4070               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
4071               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4072             }
4073 #else
4074             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4075 #endif
4076 #endif
4077 #if defined (PETSC_USE_CTABLE)
4078 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4079             col  = (col - 1)/bs;
4080 #else
4081             col = (baij->colmap[in[j]] - 1)/bs;
4082 #endif
4083             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
4084               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
4085               col =  in[j];
4086             }
4087           }
4088           else col = in[j];
4089           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4090         }
4091       }
4092     } else {
4093       if (!baij->donotstash) {
4094         if (roworiented) {
4095           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4096         } else {
4097           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4098         }
4099       }
4100     }
4101   }
4102 
4103   /* task normally handled by MatSetValuesBlocked() */
4104   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
4105   PetscFunctionReturn(0);
4106 }
4107 
4108 #undef __FUNCT__
4109 #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
4110 /*@
4111      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
4112          CSR format the local rows.
4113 
4114    Collective on MPI_Comm
4115 
4116    Input Parameters:
4117 +  comm - MPI communicator
4118 .  bs - the block size, only a block size of 1 is supported
4119 .  m - number of local rows (Cannot be PETSC_DECIDE)
4120 .  n - This value should be the same as the local size used in creating the
4121        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4122        calculated if N is given) For square matrices n is almost always m.
4123 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4124 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4125 .   i - row indices
4126 .   j - column indices
4127 -   a - matrix values
4128 
4129    Output Parameter:
4130 .   mat - the matrix
4131 
4132    Level: intermediate
4133 
4134    Notes:
4135        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4136      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4137      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4138 
4139        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4140 
4141 .keywords: matrix, aij, compressed row, sparse, parallel
4142 
4143 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4144           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
4145 @*/
4146 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4147 {
4148   PetscErrorCode ierr;
4149 
4150 
4151  PetscFunctionBegin;
4152   if (i[0]) {
4153     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4154   }
4155   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4156   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4157   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4158   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
4159   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
4160   PetscFunctionReturn(0);
4161 }
4162