xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision f73d5cc4fab9985badaa1a29b152f456bf59ad34)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 #include <petscblaslapack.h>
4 
5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat);
7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec);
13 
14 #undef __FUNCT__
15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
17 {
18   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
19   PetscErrorCode ierr;
20   PetscInt       i,*idxb = 0;
21   PetscScalar    *va,*vb;
22   Vec            vtmp;
23 
24   PetscFunctionBegin;
25   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
26   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
27   if (idx) {
28     for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;}
29   }
30 
31   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
32   if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
33   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
34   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
35 
36   for (i=0; i<A->rmap->n; i++){
37     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);}
38   }
39 
40   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
41   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
42   ierr = PetscFree(idxb);CHKERRQ(ierr);
43   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
44   PetscFunctionReturn(0);
45 }
46 
47 EXTERN_C_BEGIN
48 #undef __FUNCT__
49 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
50 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
51 {
52   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
53   PetscErrorCode ierr;
54 
55   PetscFunctionBegin;
56   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
57   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
58   PetscFunctionReturn(0);
59 }
60 EXTERN_C_END
61 
62 EXTERN_C_BEGIN
63 #undef __FUNCT__
64 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
65 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
66 {
67   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
68   PetscErrorCode ierr;
69 
70   PetscFunctionBegin;
71   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
72   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
73   PetscFunctionReturn(0);
74 }
75 EXTERN_C_END
76 
77 /*
78      Local utility routine that creates a mapping from the global column
79    number to the local number in the off-diagonal part of the local
80    storage of the matrix.  This is done in a non scalable way since the
81    length of colmap equals the global matrix length.
82 */
83 #undef __FUNCT__
84 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private"
85 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
86 {
87   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
88   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
89   PetscErrorCode ierr;
90   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
91 
92   PetscFunctionBegin;
93 #if defined (PETSC_USE_CTABLE)
94   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
95   for (i=0; i<nbs; i++){
96     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
97   }
98 #else
99   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
100   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
101   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
102   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
103 #endif
104   PetscFunctionReturn(0);
105 }
106 
107 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
108 { \
109  \
110     brow = row/bs;  \
111     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
112     rmax = aimax[brow]; nrow = ailen[brow]; \
113       bcol = col/bs; \
114       ridx = row % bs; cidx = col % bs; \
115       low = 0; high = nrow; \
116       while (high-low > 3) { \
117         t = (low+high)/2; \
118         if (rp[t] > bcol) high = t; \
119         else              low  = t; \
120       } \
121       for (_i=low; _i<high; _i++) { \
122         if (rp[_i] > bcol) break; \
123         if (rp[_i] == bcol) { \
124           bap  = ap +  bs2*_i + bs*cidx + ridx; \
125           if (addv == ADD_VALUES) *bap += value;  \
126           else                    *bap  = value;  \
127           goto a_noinsert; \
128         } \
129       } \
130       if (a->nonew == 1) goto a_noinsert; \
131       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
132       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
133       N = nrow++ - 1;  \
134       /* shift up all the later entries in this row */ \
135       for (ii=N; ii>=_i; ii--) { \
136         rp[ii+1] = rp[ii]; \
137         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
138       } \
139       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
140       rp[_i]                      = bcol;  \
141       ap[bs2*_i + bs*cidx + ridx] = value;  \
142       a_noinsert:; \
143     ailen[brow] = nrow; \
144 }
145 
146 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
147 { \
148     brow = row/bs;  \
149     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
150     rmax = bimax[brow]; nrow = bilen[brow]; \
151       bcol = col/bs; \
152       ridx = row % bs; cidx = col % bs; \
153       low = 0; high = nrow; \
154       while (high-low > 3) { \
155         t = (low+high)/2; \
156         if (rp[t] > bcol) high = t; \
157         else              low  = t; \
158       } \
159       for (_i=low; _i<high; _i++) { \
160         if (rp[_i] > bcol) break; \
161         if (rp[_i] == bcol) { \
162           bap  = ap +  bs2*_i + bs*cidx + ridx; \
163           if (addv == ADD_VALUES) *bap += value;  \
164           else                    *bap  = value;  \
165           goto b_noinsert; \
166         } \
167       } \
168       if (b->nonew == 1) goto b_noinsert; \
169       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
170       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
171       CHKMEMQ;\
172       N = nrow++ - 1;  \
173       /* shift up all the later entries in this row */ \
174       for (ii=N; ii>=_i; ii--) { \
175         rp[ii+1] = rp[ii]; \
176         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
177       } \
178       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
179       rp[_i]                      = bcol;  \
180       ap[bs2*_i + bs*cidx + ridx] = value;  \
181       b_noinsert:; \
182     bilen[brow] = nrow; \
183 }
184 
185 #undef __FUNCT__
186 #define __FUNCT__ "MatSetValues_MPIBAIJ"
187 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
188 {
189   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
190   MatScalar      value;
191   PetscBool      roworiented = baij->roworiented;
192   PetscErrorCode ierr;
193   PetscInt       i,j,row,col;
194   PetscInt       rstart_orig=mat->rmap->rstart;
195   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
196   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
197 
198   /* Some Variables required in the macro */
199   Mat            A = baij->A;
200   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
201   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
202   MatScalar      *aa=a->a;
203 
204   Mat            B = baij->B;
205   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
206   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
207   MatScalar      *ba=b->a;
208 
209   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
210   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
211   MatScalar      *ap,*bap;
212 
213   PetscFunctionBegin;
214   if (v) PetscValidScalarPointer(v,6);
215   for (i=0; i<m; i++) {
216     if (im[i] < 0) continue;
217 #if defined(PETSC_USE_DEBUG)
218     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
219 #endif
220     if (im[i] >= rstart_orig && im[i] < rend_orig) {
221       row = im[i] - rstart_orig;
222       for (j=0; j<n; j++) {
223         if (in[j] >= cstart_orig && in[j] < cend_orig){
224           col = in[j] - cstart_orig;
225           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
226           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
227           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
228         } else if (in[j] < 0) continue;
229 #if defined(PETSC_USE_DEBUG)
230         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
231 #endif
232         else {
233           if (mat->was_assembled) {
234             if (!baij->colmap) {
235               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
236             }
237 #if defined (PETSC_USE_CTABLE)
238             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
239             col  = col - 1;
240 #else
241             col = baij->colmap[in[j]/bs] - 1;
242 #endif
243             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
244               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
245               col =  in[j];
246               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
247               B = baij->B;
248               b = (Mat_SeqBAIJ*)(B)->data;
249               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
250               ba=b->a;
251             } else col += in[j]%bs;
252           } else col = in[j];
253           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
254           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
255           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
256         }
257       }
258     } else {
259       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
260       if (!baij->donotstash) {
261         mat->assembled = PETSC_FALSE;
262         if (roworiented) {
263           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
264         } else {
265           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
266         }
267       }
268     }
269   }
270   PetscFunctionReturn(0);
271 }
272 
273 #undef __FUNCT__
274 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
275 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
276 {
277   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
278   const PetscScalar *value;
279   MatScalar         *barray=baij->barray;
280   PetscBool         roworiented = baij->roworiented;
281   PetscErrorCode    ierr;
282   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
283   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
284   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
285 
286   PetscFunctionBegin;
287   if(!barray) {
288     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
289     baij->barray = barray;
290   }
291 
292   if (roworiented) {
293     stepval = (n-1)*bs;
294   } else {
295     stepval = (m-1)*bs;
296   }
297   for (i=0; i<m; i++) {
298     if (im[i] < 0) continue;
299 #if defined(PETSC_USE_DEBUG)
300     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
301 #endif
302     if (im[i] >= rstart && im[i] < rend) {
303       row = im[i] - rstart;
304       for (j=0; j<n; j++) {
305         /* If NumCol = 1 then a copy is not required */
306         if ((roworiented) && (n == 1)) {
307           barray = (MatScalar*)v + i*bs2;
308         } else if((!roworiented) && (m == 1)) {
309           barray = (MatScalar*)v + j*bs2;
310         } else { /* Here a copy is required */
311           if (roworiented) {
312             value = v + (i*(stepval+bs) + j)*bs;
313           } else {
314 	    value = v + (j*(stepval+bs) + i)*bs;
315           }
316           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
317             for (jj=0; jj<bs; jj++) {
318               barray[jj]  = value[jj];
319             }
320             barray += bs;
321           }
322           barray -= bs2;
323         }
324 
325         if (in[j] >= cstart && in[j] < cend){
326           col  = in[j] - cstart;
327           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
328         }
329         else if (in[j] < 0) continue;
330 #if defined(PETSC_USE_DEBUG)
331         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
332 #endif
333         else {
334           if (mat->was_assembled) {
335             if (!baij->colmap) {
336               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
337             }
338 
339 #if defined(PETSC_USE_DEBUG)
340 #if defined (PETSC_USE_CTABLE)
341             { PetscInt data;
342               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
343               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
344             }
345 #else
346             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
347 #endif
348 #endif
349 #if defined (PETSC_USE_CTABLE)
350 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
351             col  = (col - 1)/bs;
352 #else
353             col = (baij->colmap[in[j]] - 1)/bs;
354 #endif
355             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
356               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
357               col =  in[j];
358             }
359           }
360           else col = in[j];
361           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
362         }
363       }
364     } else {
365       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
366       if (!baij->donotstash) {
367         if (roworiented) {
368           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
369         } else {
370           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
371         }
372       }
373     }
374   }
375   PetscFunctionReturn(0);
376 }
377 
378 #define HASH_KEY 0.6180339887
379 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
380 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
381 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
382 #undef __FUNCT__
383 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
384 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
385 {
386   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
387   PetscBool      roworiented = baij->roworiented;
388   PetscErrorCode ierr;
389   PetscInt       i,j,row,col;
390   PetscInt       rstart_orig=mat->rmap->rstart;
391   PetscInt       rend_orig=mat->rmap->rend,Nbs=baij->Nbs;
392   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
393   PetscReal      tmp;
394   MatScalar      **HD = baij->hd,value;
395 #if defined(PETSC_USE_DEBUG)
396   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
397 #endif
398 
399   PetscFunctionBegin;
400   if (v) PetscValidScalarPointer(v,6);
401   for (i=0; i<m; i++) {
402 #if defined(PETSC_USE_DEBUG)
403     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
404     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
405 #endif
406       row = im[i];
407     if (row >= rstart_orig && row < rend_orig) {
408       for (j=0; j<n; j++) {
409         col = in[j];
410         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
411         /* Look up PetscInto the Hash Table */
412         key = (row/bs)*Nbs+(col/bs)+1;
413         h1  = HASH(size,key,tmp);
414 
415 
416         idx = h1;
417 #if defined(PETSC_USE_DEBUG)
418         insert_ct++;
419         total_ct++;
420         if (HT[idx] != key) {
421           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
422           if (idx == size) {
423             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
424             if (idx == h1) {
425               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
426             }
427           }
428         }
429 #else
430         if (HT[idx] != key) {
431           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
432           if (idx == size) {
433             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
434             if (idx == h1) {
435               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
436             }
437           }
438         }
439 #endif
440         /* A HASH table entry is found, so insert the values at the correct address */
441         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
442         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
443       }
444     } else {
445       if (!baij->donotstash) {
446         if (roworiented) {
447           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
448         } else {
449           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
450         }
451       }
452     }
453   }
454 #if defined(PETSC_USE_DEBUG)
455   baij->ht_total_ct = total_ct;
456   baij->ht_insert_ct = insert_ct;
457 #endif
458   PetscFunctionReturn(0);
459 }
460 
461 #undef __FUNCT__
462 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
463 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
464 {
465   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
466   PetscBool         roworiented = baij->roworiented;
467   PetscErrorCode    ierr;
468   PetscInt          i,j,ii,jj,row,col;
469   PetscInt          rstart=baij->rstartbs;
470   PetscInt          rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
471   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
472   PetscReal         tmp;
473   MatScalar         **HD = baij->hd,*baij_a;
474   const PetscScalar *v_t,*value;
475 #if defined(PETSC_USE_DEBUG)
476   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
477 #endif
478 
479   PetscFunctionBegin;
480 
481   if (roworiented) {
482     stepval = (n-1)*bs;
483   } else {
484     stepval = (m-1)*bs;
485   }
486   for (i=0; i<m; i++) {
487 #if defined(PETSC_USE_DEBUG)
488     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
489     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
490 #endif
491     row   = im[i];
492     v_t   = v + i*nbs2;
493     if (row >= rstart && row < rend) {
494       for (j=0; j<n; j++) {
495         col = in[j];
496 
497         /* Look up into the Hash Table */
498         key = row*Nbs+col+1;
499         h1  = HASH(size,key,tmp);
500 
501         idx = h1;
502 #if defined(PETSC_USE_DEBUG)
503         total_ct++;
504         insert_ct++;
505        if (HT[idx] != key) {
506           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
507           if (idx == size) {
508             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
509             if (idx == h1) {
510               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
511             }
512           }
513         }
514 #else
515         if (HT[idx] != key) {
516           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
517           if (idx == size) {
518             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
519             if (idx == h1) {
520               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
521             }
522           }
523         }
524 #endif
525         baij_a = HD[idx];
526         if (roworiented) {
527           /*value = v + i*(stepval+bs)*bs + j*bs;*/
528           /* value = v + (i*(stepval+bs)+j)*bs; */
529           value = v_t;
530           v_t  += bs;
531           if (addv == ADD_VALUES) {
532             for (ii=0; ii<bs; ii++,value+=stepval) {
533               for (jj=ii; jj<bs2; jj+=bs) {
534                 baij_a[jj]  += *value++;
535               }
536             }
537           } else {
538             for (ii=0; ii<bs; ii++,value+=stepval) {
539               for (jj=ii; jj<bs2; jj+=bs) {
540                 baij_a[jj]  = *value++;
541               }
542             }
543           }
544         } else {
545           value = v + j*(stepval+bs)*bs + i*bs;
546           if (addv == ADD_VALUES) {
547             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
548               for (jj=0; jj<bs; jj++) {
549                 baij_a[jj]  += *value++;
550               }
551             }
552           } else {
553             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
554               for (jj=0; jj<bs; jj++) {
555                 baij_a[jj]  = *value++;
556               }
557             }
558           }
559         }
560       }
561     } else {
562       if (!baij->donotstash) {
563         if (roworiented) {
564           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
565         } else {
566           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
567         }
568       }
569     }
570   }
571 #if defined(PETSC_USE_DEBUG)
572   baij->ht_total_ct = total_ct;
573   baij->ht_insert_ct = insert_ct;
574 #endif
575   PetscFunctionReturn(0);
576 }
577 
578 #undef __FUNCT__
579 #define __FUNCT__ "MatGetValues_MPIBAIJ"
580 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
581 {
582   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
583   PetscErrorCode ierr;
584   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
585   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
586 
587   PetscFunctionBegin;
588   for (i=0; i<m; i++) {
589     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
590     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
591     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
592       row = idxm[i] - bsrstart;
593       for (j=0; j<n; j++) {
594         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
595         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
596         if (idxn[j] >= bscstart && idxn[j] < bscend){
597           col = idxn[j] - bscstart;
598           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
599         } else {
600           if (!baij->colmap) {
601             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
602           }
603 #if defined (PETSC_USE_CTABLE)
604           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
605           data --;
606 #else
607           data = baij->colmap[idxn[j]/bs]-1;
608 #endif
609           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
610           else {
611             col  = data + idxn[j]%bs;
612             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
613           }
614         }
615       }
616     } else {
617       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
618     }
619   }
620  PetscFunctionReturn(0);
621 }
622 
623 #undef __FUNCT__
624 #define __FUNCT__ "MatNorm_MPIBAIJ"
625 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
626 {
627   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
628   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
629   PetscErrorCode ierr;
630   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
631   PetscReal      sum = 0.0;
632   MatScalar      *v;
633 
634   PetscFunctionBegin;
635   if (baij->size == 1) {
636     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
637   } else {
638     if (type == NORM_FROBENIUS) {
639       v = amat->a;
640       nz = amat->nz*bs2;
641       for (i=0; i<nz; i++) {
642 #if defined(PETSC_USE_COMPLEX)
643         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
644 #else
645         sum += (*v)*(*v); v++;
646 #endif
647       }
648       v = bmat->a;
649       nz = bmat->nz*bs2;
650       for (i=0; i<nz; i++) {
651 #if defined(PETSC_USE_COMPLEX)
652         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
653 #else
654         sum += (*v)*(*v); v++;
655 #endif
656       }
657       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
658       *nrm = PetscSqrtReal(*nrm);
659     } else if (type == NORM_1) { /* max column sum */
660       PetscReal *tmp,*tmp2;
661       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
662       ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
663       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
664       v = amat->a; jj = amat->j;
665       for (i=0; i<amat->nz; i++) {
666         for (j=0; j<bs; j++){
667           col = bs*(cstart + *jj) + j; /* column index */
668           for (row=0; row<bs; row++){
669             tmp[col] += PetscAbsScalar(*v);  v++;
670           }
671         }
672         jj++;
673       }
674       v = bmat->a; jj = bmat->j;
675       for (i=0; i<bmat->nz; i++) {
676         for (j=0; j<bs; j++){
677           col = bs*garray[*jj] + j;
678           for (row=0; row<bs; row++){
679             tmp[col] += PetscAbsScalar(*v); v++;
680           }
681         }
682         jj++;
683       }
684       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
685       *nrm = 0.0;
686       for (j=0; j<mat->cmap->N; j++) {
687         if (tmp2[j] > *nrm) *nrm = tmp2[j];
688       }
689       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
690     } else if (type == NORM_INFINITY) { /* max row sum */
691       PetscReal *sums;
692       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr);
693       sum = 0.0;
694       for (j=0; j<amat->mbs; j++) {
695         for (row=0; row<bs; row++) sums[row] = 0.0;
696         v = amat->a + bs2*amat->i[j];
697         nz = amat->i[j+1]-amat->i[j];
698         for (i=0; i<nz; i++) {
699           for (col=0; col<bs; col++){
700             for (row=0; row<bs; row++){
701               sums[row] += PetscAbsScalar(*v); v++;
702             }
703           }
704         }
705         v = bmat->a + bs2*bmat->i[j];
706         nz = bmat->i[j+1]-bmat->i[j];
707         for (i=0; i<nz; i++) {
708           for (col=0; col<bs; col++){
709             for (row=0; row<bs; row++){
710               sums[row] += PetscAbsScalar(*v); v++;
711             }
712           }
713         }
714         for (row=0; row<bs; row++){
715           if (sums[row] > sum) sum = sums[row];
716         }
717       }
718       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
719       ierr = PetscFree(sums);CHKERRQ(ierr);
720     } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet");
721   }
722   PetscFunctionReturn(0);
723 }
724 
725 /*
726   Creates the hash table, and sets the table
727   This table is created only once.
728   If new entried need to be added to the matrix
729   then the hash table has to be destroyed and
730   recreated.
731 */
732 #undef __FUNCT__
733 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
734 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
735 {
736   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
737   Mat            A = baij->A,B=baij->B;
738   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
739   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
740   PetscErrorCode ierr;
741   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
742   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
743   PetscInt       *HT,key;
744   MatScalar      **HD;
745   PetscReal      tmp;
746 #if defined(PETSC_USE_INFO)
747   PetscInt       ct=0,max=0;
748 #endif
749 
750   PetscFunctionBegin;
751   if (baij->ht) PetscFunctionReturn(0);
752 
753   baij->ht_size = (PetscInt)(factor*nz);
754   ht_size       = baij->ht_size;
755 
756   /* Allocate Memory for Hash Table */
757   ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr);
758   ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr);
759   ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr);
760   HD   = baij->hd;
761   HT   = baij->ht;
762 
763   /* Loop Over A */
764   for (i=0; i<a->mbs; i++) {
765     for (j=ai[i]; j<ai[i+1]; j++) {
766       row = i+rstart;
767       col = aj[j]+cstart;
768 
769       key = row*Nbs + col + 1;
770       h1  = HASH(ht_size,key,tmp);
771       for (k=0; k<ht_size; k++){
772         if (!HT[(h1+k)%ht_size]) {
773           HT[(h1+k)%ht_size] = key;
774           HD[(h1+k)%ht_size] = a->a + j*bs2;
775           break;
776 #if defined(PETSC_USE_INFO)
777         } else {
778           ct++;
779 #endif
780         }
781       }
782 #if defined(PETSC_USE_INFO)
783       if (k> max) max = k;
784 #endif
785     }
786   }
787   /* Loop Over B */
788   for (i=0; i<b->mbs; i++) {
789     for (j=bi[i]; j<bi[i+1]; j++) {
790       row = i+rstart;
791       col = garray[bj[j]];
792       key = row*Nbs + col + 1;
793       h1  = HASH(ht_size,key,tmp);
794       for (k=0; k<ht_size; k++){
795         if (!HT[(h1+k)%ht_size]) {
796           HT[(h1+k)%ht_size] = key;
797           HD[(h1+k)%ht_size] = b->a + j*bs2;
798           break;
799 #if defined(PETSC_USE_INFO)
800         } else {
801           ct++;
802 #endif
803         }
804       }
805 #if defined(PETSC_USE_INFO)
806       if (k> max) max = k;
807 #endif
808     }
809   }
810 
811   /* Print Summary */
812 #if defined(PETSC_USE_INFO)
813   for (i=0,j=0; i<ht_size; i++) {
814     if (HT[i]) {j++;}
815   }
816   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
817 #endif
818   PetscFunctionReturn(0);
819 }
820 
821 #undef __FUNCT__
822 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
823 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
824 {
825   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
826   PetscErrorCode ierr;
827   PetscInt       nstash,reallocs;
828   InsertMode     addv;
829 
830   PetscFunctionBegin;
831   if (baij->donotstash || mat->nooffprocentries) {
832     PetscFunctionReturn(0);
833   }
834 
835   /* make sure all processors are either in INSERTMODE or ADDMODE */
836   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
837   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
838   mat->insertmode = addv; /* in case this processor had no cache */
839 
840   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
841   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
842   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
843   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
844   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
845   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
846   PetscFunctionReturn(0);
847 }
848 
849 #undef __FUNCT__
850 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
851 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
852 {
853   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
854   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
855   PetscErrorCode ierr;
856   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
857   PetscInt       *row,*col;
858   PetscBool      r1,r2,r3,other_disassembled;
859   MatScalar      *val;
860   InsertMode     addv = mat->insertmode;
861   PetscMPIInt    n;
862 
863   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
864   PetscFunctionBegin;
865   if (!baij->donotstash && !mat->nooffprocentries) {
866     while (1) {
867       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
868       if (!flg) break;
869 
870       for (i=0; i<n;) {
871         /* Now identify the consecutive vals belonging to the same row */
872         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
873         if (j < n) ncols = j-i;
874         else       ncols = n-i;
875         /* Now assemble all these values with a single function call */
876         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
877         i = j;
878       }
879     }
880     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
881     /* Now process the block-stash. Since the values are stashed column-oriented,
882        set the roworiented flag to column oriented, and after MatSetValues()
883        restore the original flags */
884     r1 = baij->roworiented;
885     r2 = a->roworiented;
886     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
887     baij->roworiented = PETSC_FALSE;
888     a->roworiented    = PETSC_FALSE;
889     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
890     while (1) {
891       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
892       if (!flg) break;
893 
894       for (i=0; i<n;) {
895         /* Now identify the consecutive vals belonging to the same row */
896         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
897         if (j < n) ncols = j-i;
898         else       ncols = n-i;
899         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
900         i = j;
901       }
902     }
903     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
904     baij->roworiented = r1;
905     a->roworiented    = r2;
906     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
907   }
908 
909   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
910   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
911 
912   /* determine if any processor has disassembled, if so we must
913      also disassemble ourselfs, in order that we may reassemble. */
914   /*
915      if nonzero structure of submatrix B cannot change then we know that
916      no processor disassembled thus we can skip this stuff
917   */
918   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
919     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
920     if (mat->was_assembled && !other_disassembled) {
921       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
922     }
923   }
924 
925   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
926     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
927   }
928   ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
929   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
930   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
931 
932 #if defined(PETSC_USE_INFO)
933   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
934     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
935     baij->ht_total_ct  = 0;
936     baij->ht_insert_ct = 0;
937   }
938 #endif
939   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
940     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
941     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
942     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
943   }
944 
945   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
946   baij->rowvalues = 0;
947   PetscFunctionReturn(0);
948 }
949 
950 #undef __FUNCT__
951 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
952 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
953 {
954   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
955   PetscErrorCode    ierr;
956   PetscMPIInt       size = baij->size,rank = baij->rank;
957   PetscInt          bs = mat->rmap->bs;
958   PetscBool         iascii,isdraw;
959   PetscViewer       sviewer;
960   PetscViewerFormat format;
961 
962   PetscFunctionBegin;
963   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
964   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
965   if (iascii) {
966     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
967     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
968       MatInfo info;
969       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
970       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
971       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
972       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
973              rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
974       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
975       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
976       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
977       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
978       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
979       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
980       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
981       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
982       PetscFunctionReturn(0);
983     } else if (format == PETSC_VIEWER_ASCII_INFO) {
984       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
985       PetscFunctionReturn(0);
986     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
987       PetscFunctionReturn(0);
988     }
989   }
990 
991   if (isdraw) {
992     PetscDraw       draw;
993     PetscBool  isnull;
994     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
995     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
996   }
997 
998   if (size == 1) {
999     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1000     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1001   } else {
1002     /* assemble the entire matrix onto first processor. */
1003     Mat         A;
1004     Mat_SeqBAIJ *Aloc;
1005     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1006     MatScalar   *a;
1007 
1008     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1009     /* Perhaps this should be the type of mat? */
1010     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1011     if (!rank) {
1012       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1013     } else {
1014       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1015     }
1016     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1017     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1018     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1019     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1020 
1021     /* copy over the A part */
1022     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1023     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1024     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1025 
1026     for (i=0; i<mbs; i++) {
1027       rvals[0] = bs*(baij->rstartbs + i);
1028       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1029       for (j=ai[i]; j<ai[i+1]; j++) {
1030         col = (baij->cstartbs+aj[j])*bs;
1031         for (k=0; k<bs; k++) {
1032           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1033           col++; a += bs;
1034         }
1035       }
1036     }
1037     /* copy over the B part */
1038     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1039     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1040     for (i=0; i<mbs; i++) {
1041       rvals[0] = bs*(baij->rstartbs + i);
1042       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1043       for (j=ai[i]; j<ai[i+1]; j++) {
1044         col = baij->garray[aj[j]]*bs;
1045         for (k=0; k<bs; k++) {
1046           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1047           col++; a += bs;
1048         }
1049       }
1050     }
1051     ierr = PetscFree(rvals);CHKERRQ(ierr);
1052     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1053     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1054     /*
1055        Everyone has to call to draw the matrix since the graphics waits are
1056        synchronized across all processors that share the PetscDraw object
1057     */
1058     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1059     if (!rank) {
1060       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1061     /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/
1062       PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ);
1063       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1064     }
1065     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1066     ierr = MatDestroy(&A);CHKERRQ(ierr);
1067   }
1068   PetscFunctionReturn(0);
1069 }
1070 
1071 #undef __FUNCT__
1072 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1073 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1074 {
1075   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1076   Mat_SeqBAIJ*   A = (Mat_SeqBAIJ*)a->A->data;
1077   Mat_SeqBAIJ*   B = (Mat_SeqBAIJ*)a->B->data;
1078   PetscErrorCode ierr;
1079   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1080   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1081   int            fd;
1082   PetscScalar    *column_values;
1083   FILE           *file;
1084   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1085   PetscInt       message_count,flowcontrolcount;
1086 
1087   PetscFunctionBegin;
1088   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1089   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1090   nz   = bs2*(A->nz + B->nz);
1091   rlen = mat->rmap->n;
1092   if (!rank) {
1093     header[0] = MAT_FILE_CLASSID;
1094     header[1] = mat->rmap->N;
1095     header[2] = mat->cmap->N;
1096     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1097     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1098     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1099     /* get largest number of rows any processor has */
1100     range = mat->rmap->range;
1101     for (i=1; i<size; i++) {
1102       rlen = PetscMax(rlen,range[i+1] - range[i]);
1103     }
1104   } else {
1105     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1106   }
1107 
1108   ierr  = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr);
1109   /* compute lengths of each row  */
1110   for (i=0; i<a->mbs; i++) {
1111     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1112   }
1113   /* store the row lengths to the file */
1114   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1115   if (!rank) {
1116     MPI_Status status;
1117     ierr  = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr);
1118     rlen  = (range[1] - range[0])/bs;
1119     for (i=0; i<rlen; i++) {
1120       for (j=0; j<bs; j++) {
1121         row_lens[i*bs+j] = bs*crow_lens[i];
1122       }
1123     }
1124     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1125     for (i=1; i<size; i++) {
1126       rlen = (range[i+1] - range[i])/bs;
1127       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1128       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1129       for (k=0; k<rlen; k++) {
1130 	for (j=0; j<bs; j++) {
1131 	  row_lens[k*bs+j] = bs*crow_lens[k];
1132 	}
1133       }
1134       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1135     }
1136     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1137     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1138   } else {
1139     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1140     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1141     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1142   }
1143   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1144 
1145   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1146      information needed to make it for each row from a block row. This does require more communication but still not more than
1147      the communication needed for the nonzero values  */
1148   nzmax = nz; /*  space a largest processor needs */
1149   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1150   ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1151   cnt  = 0;
1152   for (i=0; i<a->mbs; i++) {
1153     pcnt = cnt;
1154     for (j=B->i[i]; j<B->i[i+1]; j++) {
1155       if ( (col = garray[B->j[j]]) > cstart) break;
1156       for (l=0; l<bs; l++) {
1157 	column_indices[cnt++] = bs*col+l;
1158       }
1159     }
1160     for (k=A->i[i]; k<A->i[i+1]; k++) {
1161       for (l=0; l<bs; l++) {
1162         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1163       }
1164     }
1165     for (; j<B->i[i+1]; j++) {
1166       for (l=0; l<bs; l++) {
1167         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1168       }
1169     }
1170     len = cnt - pcnt;
1171     for (k=1; k<bs; k++) {
1172       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1173       cnt += len;
1174     }
1175   }
1176   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1177 
1178   /* store the columns to the file */
1179   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1180   if (!rank) {
1181     MPI_Status status;
1182     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1183     for (i=1; i<size; i++) {
1184       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1185       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1186       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1187       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1188     }
1189     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1190   } else {
1191     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1192     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1193     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1194     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1195   }
1196   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1197 
1198   /* load up the numerical values */
1199   ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1200   cnt = 0;
1201   for (i=0; i<a->mbs; i++) {
1202     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1203     for (j=B->i[i]; j<B->i[i+1]; j++) {
1204       if ( garray[B->j[j]] > cstart) break;
1205       for (l=0; l<bs; l++) {
1206         for (ll=0; ll<bs; ll++) {
1207 	  column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1208         }
1209       }
1210       cnt += bs;
1211     }
1212     for (k=A->i[i]; k<A->i[i+1]; k++) {
1213       for (l=0; l<bs; l++) {
1214         for (ll=0; ll<bs; ll++) {
1215           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1216         }
1217       }
1218       cnt += bs;
1219     }
1220     for (; j<B->i[i+1]; j++) {
1221       for (l=0; l<bs; l++) {
1222         for (ll=0; ll<bs; ll++) {
1223 	  column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1224         }
1225       }
1226       cnt += bs;
1227     }
1228     cnt += (bs-1)*rlen;
1229   }
1230   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1231 
1232   /* store the column values to the file */
1233   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1234   if (!rank) {
1235     MPI_Status status;
1236     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1237     for (i=1; i<size; i++) {
1238       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1239       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1240       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1241       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1242     }
1243     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1244   } else {
1245     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1246     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1247     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1248     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1249   }
1250   ierr = PetscFree(column_values);CHKERRQ(ierr);
1251 
1252   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1253   if (file) {
1254     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1255   }
1256   PetscFunctionReturn(0);
1257 }
1258 
1259 #undef __FUNCT__
1260 #define __FUNCT__ "MatView_MPIBAIJ"
1261 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1262 {
1263   PetscErrorCode ierr;
1264   PetscBool      iascii,isdraw,issocket,isbinary;
1265 
1266   PetscFunctionBegin;
1267   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1268   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1269   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1270   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1271   if (iascii || isdraw || issocket) {
1272     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1273   } else if (isbinary) {
1274     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1275   } else {
1276     SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1277   }
1278   PetscFunctionReturn(0);
1279 }
1280 
1281 #undef __FUNCT__
1282 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1283 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1284 {
1285   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1286   PetscErrorCode ierr;
1287 
1288   PetscFunctionBegin;
1289 #if defined(PETSC_USE_LOG)
1290   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1291 #endif
1292   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1293   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1294   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1295   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1296 #if defined (PETSC_USE_CTABLE)
1297   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1298 #else
1299   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1300 #endif
1301   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1302   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1303   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1304   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1305   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1306   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1307   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1308   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1309 
1310   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1311   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1312   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1313   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1314   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1315   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1316   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1317   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1318   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1319   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C","",PETSC_NULL);CHKERRQ(ierr);
1320   PetscFunctionReturn(0);
1321 }
1322 
1323 #undef __FUNCT__
1324 #define __FUNCT__ "MatMult_MPIBAIJ"
1325 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1326 {
1327   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1328   PetscErrorCode ierr;
1329   PetscInt       nt;
1330 
1331   PetscFunctionBegin;
1332   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1333   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1334   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1335   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1336   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1337   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1338   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1339   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1340   PetscFunctionReturn(0);
1341 }
1342 
1343 #undef __FUNCT__
1344 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1345 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1346 {
1347   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1348   PetscErrorCode ierr;
1349 
1350   PetscFunctionBegin;
1351   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1352   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1353   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1354   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1355   PetscFunctionReturn(0);
1356 }
1357 
1358 #undef __FUNCT__
1359 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1360 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1361 {
1362   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1363   PetscErrorCode ierr;
1364   PetscBool      merged;
1365 
1366   PetscFunctionBegin;
1367   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1368   /* do nondiagonal part */
1369   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1370   if (!merged) {
1371     /* send it on its way */
1372     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1373     /* do local part */
1374     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1375     /* receive remote parts: note this assumes the values are not actually */
1376     /* inserted in yy until the next line */
1377     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1378   } else {
1379     /* do local part */
1380     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1381     /* send it on its way */
1382     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1383     /* values actually were received in the Begin() but we need to call this nop */
1384     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1385   }
1386   PetscFunctionReturn(0);
1387 }
1388 
1389 #undef __FUNCT__
1390 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1391 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1392 {
1393   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1394   PetscErrorCode ierr;
1395 
1396   PetscFunctionBegin;
1397   /* do nondiagonal part */
1398   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1399   /* send it on its way */
1400   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1401   /* do local part */
1402   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1403   /* receive remote parts: note this assumes the values are not actually */
1404   /* inserted in yy until the next line, which is true for my implementation*/
1405   /* but is not perhaps always true. */
1406   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1407   PetscFunctionReturn(0);
1408 }
1409 
1410 /*
1411   This only works correctly for square matrices where the subblock A->A is the
1412    diagonal block
1413 */
1414 #undef __FUNCT__
1415 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1416 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1417 {
1418   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1419   PetscErrorCode ierr;
1420 
1421   PetscFunctionBegin;
1422   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1423   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1424   PetscFunctionReturn(0);
1425 }
1426 
1427 #undef __FUNCT__
1428 #define __FUNCT__ "MatScale_MPIBAIJ"
1429 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1430 {
1431   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1432   PetscErrorCode ierr;
1433 
1434   PetscFunctionBegin;
1435   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1436   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1437   PetscFunctionReturn(0);
1438 }
1439 
1440 #undef __FUNCT__
1441 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1442 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1443 {
1444   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1445   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1446   PetscErrorCode ierr;
1447   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1448   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1449   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1450 
1451   PetscFunctionBegin;
1452   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1453   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1454   mat->getrowactive = PETSC_TRUE;
1455 
1456   if (!mat->rowvalues && (idx || v)) {
1457     /*
1458         allocate enough space to hold information from the longest row.
1459     */
1460     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1461     PetscInt     max = 1,mbs = mat->mbs,tmp;
1462     for (i=0; i<mbs; i++) {
1463       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1464       if (max < tmp) { max = tmp; }
1465     }
1466     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1467   }
1468   lrow = row - brstart;
1469 
1470   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1471   if (!v)   {pvA = 0; pvB = 0;}
1472   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1473   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1474   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1475   nztot = nzA + nzB;
1476 
1477   cmap  = mat->garray;
1478   if (v  || idx) {
1479     if (nztot) {
1480       /* Sort by increasing column numbers, assuming A and B already sorted */
1481       PetscInt imark = -1;
1482       if (v) {
1483         *v = v_p = mat->rowvalues;
1484         for (i=0; i<nzB; i++) {
1485           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1486           else break;
1487         }
1488         imark = i;
1489         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1490         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1491       }
1492       if (idx) {
1493         *idx = idx_p = mat->rowindices;
1494         if (imark > -1) {
1495           for (i=0; i<imark; i++) {
1496             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1497           }
1498         } else {
1499           for (i=0; i<nzB; i++) {
1500             if (cmap[cworkB[i]/bs] < cstart)
1501               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1502             else break;
1503           }
1504           imark = i;
1505         }
1506         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1507         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1508       }
1509     } else {
1510       if (idx) *idx = 0;
1511       if (v)   *v   = 0;
1512     }
1513   }
1514   *nz = nztot;
1515   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1516   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1517   PetscFunctionReturn(0);
1518 }
1519 
1520 #undef __FUNCT__
1521 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1522 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1523 {
1524   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1525 
1526   PetscFunctionBegin;
1527   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1528   baij->getrowactive = PETSC_FALSE;
1529   PetscFunctionReturn(0);
1530 }
1531 
1532 #undef __FUNCT__
1533 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1534 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1535 {
1536   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1537   PetscErrorCode ierr;
1538 
1539   PetscFunctionBegin;
1540   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1541   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1542   PetscFunctionReturn(0);
1543 }
1544 
1545 #undef __FUNCT__
1546 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1547 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1548 {
1549   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1550   Mat            A = a->A,B = a->B;
1551   PetscErrorCode ierr;
1552   PetscReal      isend[5],irecv[5];
1553 
1554   PetscFunctionBegin;
1555   info->block_size     = (PetscReal)matin->rmap->bs;
1556   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1557   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1558   isend[3] = info->memory;  isend[4] = info->mallocs;
1559   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1560   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1561   isend[3] += info->memory;  isend[4] += info->mallocs;
1562   if (flag == MAT_LOCAL) {
1563     info->nz_used      = isend[0];
1564     info->nz_allocated = isend[1];
1565     info->nz_unneeded  = isend[2];
1566     info->memory       = isend[3];
1567     info->mallocs      = isend[4];
1568   } else if (flag == MAT_GLOBAL_MAX) {
1569     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1570     info->nz_used      = irecv[0];
1571     info->nz_allocated = irecv[1];
1572     info->nz_unneeded  = irecv[2];
1573     info->memory       = irecv[3];
1574     info->mallocs      = irecv[4];
1575   } else if (flag == MAT_GLOBAL_SUM) {
1576     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1577     info->nz_used      = irecv[0];
1578     info->nz_allocated = irecv[1];
1579     info->nz_unneeded  = irecv[2];
1580     info->memory       = irecv[3];
1581     info->mallocs      = irecv[4];
1582   } else {
1583     SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1584   }
1585   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1586   info->fill_ratio_needed = 0;
1587   info->factor_mallocs    = 0;
1588   PetscFunctionReturn(0);
1589 }
1590 
1591 #undef __FUNCT__
1592 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1593 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool  flg)
1594 {
1595   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1596   PetscErrorCode ierr;
1597 
1598   PetscFunctionBegin;
1599   switch (op) {
1600   case MAT_NEW_NONZERO_LOCATIONS:
1601   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1602   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1603   case MAT_KEEP_NONZERO_PATTERN:
1604   case MAT_NEW_NONZERO_LOCATION_ERR:
1605     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1606     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1607     break;
1608   case MAT_ROW_ORIENTED:
1609     a->roworiented = flg;
1610     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1611     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1612     break;
1613   case MAT_NEW_DIAGONALS:
1614     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1615     break;
1616   case MAT_IGNORE_OFF_PROC_ENTRIES:
1617     a->donotstash = flg;
1618     break;
1619   case MAT_USE_HASH_TABLE:
1620     a->ht_flag = flg;
1621     break;
1622   case MAT_SYMMETRIC:
1623   case MAT_STRUCTURALLY_SYMMETRIC:
1624   case MAT_HERMITIAN:
1625   case MAT_SYMMETRY_ETERNAL:
1626     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1627     break;
1628   default:
1629     SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op);
1630   }
1631   PetscFunctionReturn(0);
1632 }
1633 
1634 #undef __FUNCT__
1635 #define __FUNCT__ "MatTranspose_MPIBAIJ"
1636 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1637 {
1638   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1639   Mat_SeqBAIJ    *Aloc;
1640   Mat            B;
1641   PetscErrorCode ierr;
1642   PetscInt       M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1643   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1644   MatScalar      *a;
1645 
1646   PetscFunctionBegin;
1647   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1648   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1649     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1650     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1651     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1652     /* Do not know preallocation information, but must set block size */
1653     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,PETSC_NULL,PETSC_DECIDE,PETSC_NULL);CHKERRQ(ierr);
1654   } else {
1655     B = *matout;
1656   }
1657 
1658   /* copy over the A part */
1659   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1660   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1661   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1662 
1663   for (i=0; i<mbs; i++) {
1664     rvals[0] = bs*(baij->rstartbs + i);
1665     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1666     for (j=ai[i]; j<ai[i+1]; j++) {
1667       col = (baij->cstartbs+aj[j])*bs;
1668       for (k=0; k<bs; k++) {
1669         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1670         col++; a += bs;
1671       }
1672     }
1673   }
1674   /* copy over the B part */
1675   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1676   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1677   for (i=0; i<mbs; i++) {
1678     rvals[0] = bs*(baij->rstartbs + i);
1679     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1680     for (j=ai[i]; j<ai[i+1]; j++) {
1681       col = baij->garray[aj[j]]*bs;
1682       for (k=0; k<bs; k++) {
1683         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1684         col++; a += bs;
1685       }
1686     }
1687   }
1688   ierr = PetscFree(rvals);CHKERRQ(ierr);
1689   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1690   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1691 
1692   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1693     *matout = B;
1694   } else {
1695     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1696   }
1697   PetscFunctionReturn(0);
1698 }
1699 
1700 #undef __FUNCT__
1701 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1702 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1703 {
1704   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1705   Mat            a = baij->A,b = baij->B;
1706   PetscErrorCode ierr;
1707   PetscInt       s1,s2,s3;
1708 
1709   PetscFunctionBegin;
1710   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1711   if (rr) {
1712     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1713     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1714     /* Overlap communication with computation. */
1715     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1716   }
1717   if (ll) {
1718     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1719     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1720     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1721   }
1722   /* scale  the diagonal block */
1723   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1724 
1725   if (rr) {
1726     /* Do a scatter end and then right scale the off-diagonal block */
1727     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1728     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1729   }
1730 
1731   PetscFunctionReturn(0);
1732 }
1733 
1734 #undef __FUNCT__
1735 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1736 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1737 {
1738   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1739   PetscErrorCode    ierr;
1740   PetscMPIInt       imdex,size = l->size,n,rank = l->rank;
1741   PetscInt          i,*owners = A->rmap->range;
1742   PetscInt          *nprocs,j,idx,nsends,row;
1743   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
1744   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1745   PetscInt          *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1746   MPI_Comm          comm = ((PetscObject)A)->comm;
1747   MPI_Request       *send_waits,*recv_waits;
1748   MPI_Status        recv_status,*send_status;
1749   const PetscScalar *xx;
1750   PetscScalar       *bb;
1751 #if defined(PETSC_DEBUG)
1752   PetscBool         found = PETSC_FALSE;
1753 #endif
1754 
1755   PetscFunctionBegin;
1756   /*  first count number of contributors to each processor */
1757   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1758   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1759   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1760   j = 0;
1761   for (i=0; i<N; i++) {
1762     if (lastidx > (idx = rows[i])) j = 0;
1763     lastidx = idx;
1764     for (; j<size; j++) {
1765       if (idx >= owners[j] && idx < owners[j+1]) {
1766         nprocs[2*j]++;
1767         nprocs[2*j+1] = 1;
1768         owner[i] = j;
1769 #if defined(PETSC_DEBUG)
1770         found = PETSC_TRUE;
1771 #endif
1772         break;
1773       }
1774     }
1775 #if defined(PETSC_DEBUG)
1776     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1777     found = PETSC_FALSE;
1778 #endif
1779   }
1780   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1781 
1782   if (A->nooffproczerorows) {
1783     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
1784     nrecvs = nsends;
1785     nmax   = N;
1786   } else {
1787     /* inform other processors of number of messages and max length*/
1788     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1789   }
1790 
1791   /* post receives:   */
1792   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1793   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1794   for (i=0; i<nrecvs; i++) {
1795     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1796   }
1797 
1798   /* do sends:
1799      1) starts[i] gives the starting index in svalues for stuff going to
1800      the ith processor
1801   */
1802   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1803   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1804   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1805   starts[0]  = 0;
1806   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1807   for (i=0; i<N; i++) {
1808     svalues[starts[owner[i]]++] = rows[i];
1809   }
1810 
1811   starts[0] = 0;
1812   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1813   count = 0;
1814   for (i=0; i<size; i++) {
1815     if (nprocs[2*i+1]) {
1816       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1817     }
1818   }
1819   ierr = PetscFree(starts);CHKERRQ(ierr);
1820 
1821   base = owners[rank];
1822 
1823   /*  wait on receives */
1824   ierr   = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr);
1825   count  = nrecvs;
1826   slen = 0;
1827   while (count) {
1828     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1829     /* unpack receives into our local space */
1830     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1831     source[imdex]  = recv_status.MPI_SOURCE;
1832     lens[imdex]    = n;
1833     slen          += n;
1834     count--;
1835   }
1836   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1837 
1838   /* move the data into the send scatter */
1839   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1840   count = 0;
1841   for (i=0; i<nrecvs; i++) {
1842     values = rvalues + i*nmax;
1843     for (j=0; j<lens[i]; j++) {
1844       lrows[count++] = values[j] - base;
1845     }
1846   }
1847   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1848   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1849   ierr = PetscFree(owner);CHKERRQ(ierr);
1850   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1851 
1852   /* fix right hand side if needed */
1853   if (x && b) {
1854     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1855     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1856     for (i=0; i<slen; i++) {
1857       bb[lrows[i]] = diag*xx[lrows[i]];
1858     }
1859     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1860     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1861   }
1862 
1863   /* actually zap the local rows */
1864   /*
1865         Zero the required rows. If the "diagonal block" of the matrix
1866      is square and the user wishes to set the diagonal we use separate
1867      code so that MatSetValues() is not called for each diagonal allocating
1868      new memory, thus calling lots of mallocs and slowing things down.
1869 
1870   */
1871   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1872   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1873   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1874     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
1875   } else if (diag != 0.0) {
1876     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1877     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1878        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1879     for (i=0; i<slen; i++) {
1880       row  = lrows[i] + rstart_bs;
1881       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1882     }
1883     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1884     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1885   } else {
1886     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1887   }
1888 
1889   ierr = PetscFree(lrows);CHKERRQ(ierr);
1890 
1891   /* wait on sends */
1892   if (nsends) {
1893     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1894     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1895     ierr = PetscFree(send_status);CHKERRQ(ierr);
1896   }
1897   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1898   ierr = PetscFree(svalues);CHKERRQ(ierr);
1899 
1900   PetscFunctionReturn(0);
1901 }
1902 
1903 #undef __FUNCT__
1904 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1905 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1906 {
1907   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1908   PetscErrorCode ierr;
1909 
1910   PetscFunctionBegin;
1911   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1912   PetscFunctionReturn(0);
1913 }
1914 
1915 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1916 
1917 #undef __FUNCT__
1918 #define __FUNCT__ "MatEqual_MPIBAIJ"
1919 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1920 {
1921   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1922   Mat            a,b,c,d;
1923   PetscBool      flg;
1924   PetscErrorCode ierr;
1925 
1926   PetscFunctionBegin;
1927   a = matA->A; b = matA->B;
1928   c = matB->A; d = matB->B;
1929 
1930   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1931   if (flg) {
1932     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1933   }
1934   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1935   PetscFunctionReturn(0);
1936 }
1937 
1938 #undef __FUNCT__
1939 #define __FUNCT__ "MatCopy_MPIBAIJ"
1940 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1941 {
1942   PetscErrorCode ierr;
1943   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1944   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1945 
1946   PetscFunctionBegin;
1947   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1948   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1949     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1950   } else {
1951     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1952     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1953   }
1954   PetscFunctionReturn(0);
1955 }
1956 
1957 #undef __FUNCT__
1958 #define __FUNCT__ "MatSetUp_MPIBAIJ"
1959 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1960 {
1961   PetscErrorCode ierr;
1962 
1963   PetscFunctionBegin;
1964   ierr =  MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1965   PetscFunctionReturn(0);
1966 }
1967 
1968 #undef __FUNCT__
1969 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1970 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1971 {
1972   PetscErrorCode ierr;
1973   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1974   PetscBLASInt   bnz,one=1;
1975   Mat_SeqBAIJ    *x,*y;
1976 
1977   PetscFunctionBegin;
1978   if (str == SAME_NONZERO_PATTERN) {
1979     PetscScalar alpha = a;
1980     x = (Mat_SeqBAIJ *)xx->A->data;
1981     y = (Mat_SeqBAIJ *)yy->A->data;
1982     bnz = PetscBLASIntCast(x->nz);
1983     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1984     x = (Mat_SeqBAIJ *)xx->B->data;
1985     y = (Mat_SeqBAIJ *)yy->B->data;
1986     bnz = PetscBLASIntCast(x->nz);
1987     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1988   } else {
1989     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1990   }
1991   PetscFunctionReturn(0);
1992 }
1993 
1994 #undef __FUNCT__
1995 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1996 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1997 {
1998   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1999   PetscErrorCode ierr;
2000 
2001   PetscFunctionBegin;
2002   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2003   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2004   PetscFunctionReturn(0);
2005 }
2006 
2007 #undef __FUNCT__
2008 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
2009 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2010 {
2011   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
2012   PetscErrorCode ierr;
2013 
2014   PetscFunctionBegin;
2015   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2016   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2017   PetscFunctionReturn(0);
2018 }
2019 
2020 #undef __FUNCT__
2021 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
2022 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2023 {
2024   PetscErrorCode ierr;
2025   IS             iscol_local;
2026   PetscInt       csize;
2027 
2028   PetscFunctionBegin;
2029   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2030   if (call == MAT_REUSE_MATRIX) {
2031     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2032     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2033   } else {
2034     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2035   }
2036   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2037   if (call == MAT_INITIAL_MATRIX) {
2038     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2039     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2040   }
2041   PetscFunctionReturn(0);
2042 }
2043 
2044 #undef __FUNCT__
2045 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
2046 /*
2047     Not great since it makes two copies of the submatrix, first an SeqBAIJ
2048   in local and then by concatenating the local matrices the end result.
2049   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
2050 */
2051 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2052 {
2053   PetscErrorCode ierr;
2054   PetscMPIInt    rank,size;
2055   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2056   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2057   Mat            *local,M,Mreuse;
2058   MatScalar      *vwork,*aa;
2059   MPI_Comm       comm = ((PetscObject)mat)->comm;
2060   Mat_SeqBAIJ    *aij;
2061 
2062 
2063   PetscFunctionBegin;
2064   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2065   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2066 
2067   if (call ==  MAT_REUSE_MATRIX) {
2068     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2069     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2070     local = &Mreuse;
2071     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2072   } else {
2073     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2074     Mreuse = *local;
2075     ierr   = PetscFree(local);CHKERRQ(ierr);
2076   }
2077 
2078   /*
2079       m - number of local rows
2080       n - number of columns (same on all processors)
2081       rstart - first row in new global matrix generated
2082   */
2083   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2084   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2085   m    = m/bs;
2086   n    = n/bs;
2087 
2088   if (call == MAT_INITIAL_MATRIX) {
2089     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2090     ii  = aij->i;
2091     jj  = aij->j;
2092 
2093     /*
2094         Determine the number of non-zeros in the diagonal and off-diagonal
2095         portions of the matrix in order to do correct preallocation
2096     */
2097 
2098     /* first get start and end of "diagonal" columns */
2099     if (csize == PETSC_DECIDE) {
2100       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2101       if (mglobal == n*bs) { /* square matrix */
2102 	nlocal = m;
2103       } else {
2104         nlocal = n/size + ((n % size) > rank);
2105       }
2106     } else {
2107       nlocal = csize/bs;
2108     }
2109     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2110     rstart = rend - nlocal;
2111     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2112 
2113     /* next, compute all the lengths */
2114     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2115     olens = dlens + m;
2116     for (i=0; i<m; i++) {
2117       jend = ii[i+1] - ii[i];
2118       olen = 0;
2119       dlen = 0;
2120       for (j=0; j<jend; j++) {
2121         if (*jj < rstart || *jj >= rend) olen++;
2122         else dlen++;
2123         jj++;
2124       }
2125       olens[i] = olen;
2126       dlens[i] = dlen;
2127     }
2128     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2129     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2130     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2131     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2132     ierr = PetscFree(dlens);CHKERRQ(ierr);
2133   } else {
2134     PetscInt ml,nl;
2135 
2136     M = *newmat;
2137     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2138     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2139     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2140     /*
2141          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2142        rather than the slower MatSetValues().
2143     */
2144     M->was_assembled = PETSC_TRUE;
2145     M->assembled     = PETSC_FALSE;
2146   }
2147   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2148   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2149   aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2150   ii  = aij->i;
2151   jj  = aij->j;
2152   aa  = aij->a;
2153   for (i=0; i<m; i++) {
2154     row   = rstart/bs + i;
2155     nz    = ii[i+1] - ii[i];
2156     cwork = jj;     jj += nz;
2157     vwork = aa;     aa += nz;
2158     ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2159   }
2160 
2161   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2162   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2163   *newmat = M;
2164 
2165   /* save submatrix used in processor for next request */
2166   if (call ==  MAT_INITIAL_MATRIX) {
2167     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2168     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2169   }
2170 
2171   PetscFunctionReturn(0);
2172 }
2173 
2174 #undef __FUNCT__
2175 #define __FUNCT__ "MatPermute_MPIBAIJ"
2176 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2177 {
2178   MPI_Comm       comm,pcomm;
2179   PetscInt       first,local_size,nrows;
2180   const PetscInt *rows;
2181   PetscMPIInt    size;
2182   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
2183   PetscErrorCode ierr;
2184 
2185   PetscFunctionBegin;
2186   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2187   /* make a collective version of 'rowp' */
2188   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2189   if (pcomm==comm) {
2190     crowp = rowp;
2191   } else {
2192     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2193     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2194     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2195     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2196   }
2197   /* collect the global row permutation and invert it */
2198   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
2199   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
2200   if (pcomm!=comm) {
2201     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2202   }
2203   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
2204   /* get the local target indices */
2205   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
2206   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
2207   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
2208   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
2209   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
2210   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
2211   /* the column permutation is so much easier;
2212      make a local version of 'colp' and invert it */
2213   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2214   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2215   if (size==1) {
2216     lcolp = colp;
2217   } else {
2218     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
2219     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
2220     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr);
2221   }
2222   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2223   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
2224   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
2225   if (size>1) {
2226     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
2227     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2228   }
2229   /* now we just get the submatrix */
2230   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2231   /* clean up */
2232   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
2233   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
2234   PetscFunctionReturn(0);
2235 }
2236 
2237 #undef __FUNCT__
2238 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2239 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2240 {
2241   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*) mat->data;
2242   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
2243 
2244   PetscFunctionBegin;
2245   if (nghosts) { *nghosts = B->nbs;}
2246   if (ghosts) {*ghosts = baij->garray;}
2247   PetscFunctionReturn(0);
2248 }
2249 
2250 extern PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat);
2251 
2252 #undef __FUNCT__
2253 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ"
2254 /*
2255     This routine is almost identical to MatFDColoringCreate_MPIBAIJ()!
2256 */
2257 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
2258 {
2259   Mat_MPIBAIJ            *baij = (Mat_MPIBAIJ*)mat->data;
2260   PetscErrorCode        ierr;
2261   PetscMPIInt           size,*ncolsonproc,*disp,nn;
2262   PetscInt              bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col;
2263   const PetscInt        *is;
2264   PetscInt              nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj;
2265   PetscInt              *rowhit,M,cstart,cend,colb;
2266   PetscInt              *columnsforrow,l;
2267   IS                    *isa;
2268   PetscBool              done,flg;
2269   ISLocalToGlobalMapping map = mat->cmap->bmapping;
2270   PetscInt               *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype;
2271 
2272   PetscFunctionBegin;
2273   if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();");
2274   if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock");
2275 
2276   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
2277   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2278   M                = mat->rmap->n/bs;
2279   cstart           = mat->cmap->rstart/bs;
2280   cend             = mat->cmap->rend/bs;
2281   c->M             = mat->rmap->N/bs;  /* set the global rows and columns and local rows */
2282   c->N             = mat->cmap->N/bs;
2283   c->m             = mat->rmap->n/bs;
2284   c->rstart        = mat->rmap->rstart/bs;
2285 
2286   c->ncolors       = nis;
2287   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
2288   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
2289   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
2290   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
2291   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
2292   ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
2293 
2294   /* Allow access to data structures of local part of matrix */
2295   if (!baij->colmap) {
2296     ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
2297   }
2298   ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2299   ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2300 
2301   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
2302   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
2303 
2304   for (i=0; i<nis; i++) {
2305     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
2306     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
2307     c->ncolumns[i] = n;
2308     if (n) {
2309       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
2310       ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr);
2311       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
2312     } else {
2313       c->columns[i]  = 0;
2314     }
2315 
2316     if (ctype == IS_COLORING_GLOBAL){
2317       /* Determine the total (parallel) number of columns of this color */
2318       ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
2319       ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr);
2320 
2321       nn   = PetscMPIIntCast(n);
2322       ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2323       nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];}
2324       if (!nctot) {
2325         ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
2326       }
2327 
2328       disp[0] = 0;
2329       for (j=1; j<size; j++) {
2330         disp[j] = disp[j-1] + ncolsonproc[j-1];
2331       }
2332 
2333       /* Get complete list of columns for color on each processor */
2334       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2335       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2336       ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr);
2337     } else if (ctype == IS_COLORING_GHOSTED){
2338       /* Determine local number of columns of this color on this process, including ghost points */
2339       nctot = n;
2340       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2341       ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
2342     } else {
2343       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
2344     }
2345 
2346     /*
2347        Mark all rows affect by these columns
2348     */
2349     /* Temporary option to allow for debugging/testing */
2350     flg  = PETSC_FALSE;
2351     ierr = PetscOptionsGetBool(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr);
2352     if (!flg) {/*-----------------------------------------------------------------------------*/
2353       /* crude, fast version */
2354       ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
2355       /* loop over columns*/
2356       for (j=0; j<nctot; j++) {
2357         if (ctype == IS_COLORING_GHOSTED) {
2358           col = ltog[cols[j]];
2359         } else {
2360           col  = cols[j];
2361         }
2362         if (col >= cstart && col < cend) {
2363           /* column is in diagonal block of matrix */
2364           rows = A_cj + A_ci[col-cstart];
2365           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2366         } else {
2367 #if defined (PETSC_USE_CTABLE)
2368           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2369 	  colb --;
2370 #else
2371           colb = baij->colmap[col] - 1;
2372 #endif
2373           if (colb == -1) {
2374             m = 0;
2375           } else {
2376             colb = colb/bs;
2377             rows = B_cj + B_ci[colb];
2378             m    = B_ci[colb+1] - B_ci[colb];
2379           }
2380         }
2381         /* loop over columns marking them in rowhit */
2382         for (k=0; k<m; k++) {
2383           rowhit[*rows++] = col + 1;
2384         }
2385       }
2386 
2387       /* count the number of hits */
2388       nrows = 0;
2389       for (j=0; j<M; j++) {
2390         if (rowhit[j]) nrows++;
2391       }
2392       c->nrows[i]         = nrows;
2393       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2394       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2395       ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2396       nrows = 0;
2397       for (j=0; j<M; j++) {
2398         if (rowhit[j]) {
2399           c->rows[i][nrows]           = j;
2400           c->columnsforrow[i][nrows] = rowhit[j] - 1;
2401           nrows++;
2402         }
2403       }
2404     } else {/*-------------------------------------------------------------------------------*/
2405       /* slow version, using rowhit as a linked list */
2406       PetscInt currentcol,fm,mfm;
2407       rowhit[M] = M;
2408       nrows     = 0;
2409       /* loop over columns*/
2410       for (j=0; j<nctot; j++) {
2411         if (ctype == IS_COLORING_GHOSTED) {
2412           col = ltog[cols[j]];
2413         } else {
2414           col  = cols[j];
2415         }
2416         if (col >= cstart && col < cend) {
2417           /* column is in diagonal block of matrix */
2418           rows = A_cj + A_ci[col-cstart];
2419           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2420         } else {
2421 #if defined (PETSC_USE_CTABLE)
2422           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2423           colb --;
2424 #else
2425           colb = baij->colmap[col] - 1;
2426 #endif
2427           if (colb == -1) {
2428             m = 0;
2429           } else {
2430             colb = colb/bs;
2431             rows = B_cj + B_ci[colb];
2432             m    = B_ci[colb+1] - B_ci[colb];
2433           }
2434         }
2435 
2436         /* loop over columns marking them in rowhit */
2437         fm    = M; /* fm points to first entry in linked list */
2438         for (k=0; k<m; k++) {
2439           currentcol = *rows++;
2440 	  /* is it already in the list? */
2441           do {
2442             mfm  = fm;
2443             fm   = rowhit[fm];
2444           } while (fm < currentcol);
2445           /* not in list so add it */
2446           if (fm != currentcol) {
2447             nrows++;
2448             columnsforrow[currentcol] = col;
2449             /* next three lines insert new entry into linked list */
2450             rowhit[mfm]               = currentcol;
2451             rowhit[currentcol]        = fm;
2452             fm                        = currentcol;
2453             /* fm points to present position in list since we know the columns are sorted */
2454           } else {
2455             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
2456           }
2457         }
2458       }
2459       c->nrows[i]         = nrows;
2460       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2461       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2462       ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2463       /* now store the linked list of rows into c->rows[i] */
2464       nrows = 0;
2465       fm    = rowhit[M];
2466       do {
2467         c->rows[i][nrows]            = fm;
2468         c->columnsforrow[i][nrows++] = columnsforrow[fm];
2469         fm                           = rowhit[fm];
2470       } while (fm < M);
2471     } /* ---------------------------------------------------------------------------------------*/
2472     ierr = PetscFree(cols);CHKERRQ(ierr);
2473   }
2474 
2475   /* Optimize by adding the vscale, and scaleforrow[][] fields */
2476   /*
2477        vscale will contain the "diagonal" on processor scalings followed by the off processor
2478   */
2479   if (ctype == IS_COLORING_GLOBAL) {
2480     PetscInt *garray;
2481     ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2482     for (i=0; i<baij->B->cmap->n/bs; i++) {
2483       for (j=0; j<bs; j++) {
2484         garray[i*bs+j] = bs*baij->garray[i]+j;
2485       }
2486     }
2487     ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr);
2488     ierr = PetscFree(garray);CHKERRQ(ierr);
2489     CHKMEMQ;
2490     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2491     for (k=0; k<c->ncolors; k++) {
2492       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2493       for (l=0; l<c->nrows[k]; l++) {
2494         col = c->columnsforrow[k][l];
2495         if (col >= cstart && col < cend) {
2496           /* column is in diagonal block of matrix */
2497           colb = col - cstart;
2498         } else {
2499           /* column  is in "off-processor" part */
2500 #if defined (PETSC_USE_CTABLE)
2501           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2502           colb --;
2503 #else
2504           colb = baij->colmap[col] - 1;
2505 #endif
2506           colb = colb/bs;
2507           colb += cend - cstart;
2508         }
2509         c->vscaleforrow[k][l] = colb;
2510       }
2511     }
2512   } else if (ctype == IS_COLORING_GHOSTED) {
2513     /* Get gtol mapping */
2514     PetscInt N = mat->cmap->N, *gtol;
2515     ierr = PetscMalloc((N+1)*sizeof(PetscInt),&gtol);CHKERRQ(ierr);
2516     for (i=0; i<N; i++) gtol[i] = -1;
2517     for (i=0; i<map->n; i++) gtol[ltog[i]] = i;
2518 
2519     c->vscale = 0; /* will be created in MatFDColoringApply() */
2520     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2521     for (k=0; k<c->ncolors; k++) {
2522       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2523       for (l=0; l<c->nrows[k]; l++) {
2524         col = c->columnsforrow[k][l];      /* global column index */
2525         c->vscaleforrow[k][l] = gtol[col]; /* local column index */
2526       }
2527     }
2528     ierr = PetscFree(gtol);CHKERRQ(ierr);
2529   }
2530   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
2531 
2532   ierr = PetscFree(rowhit);CHKERRQ(ierr);
2533   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
2534   ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2535   ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2536     CHKMEMQ;
2537   PetscFunctionReturn(0);
2538 }
2539 
2540 #undef __FUNCT__
2541 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ"
2542 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2543 {
2544   Mat            B;
2545   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
2546   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2547   Mat_SeqAIJ     *b;
2548   PetscErrorCode ierr;
2549   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2550   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2551   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2552 
2553   PetscFunctionBegin;
2554   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2555   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2556 
2557   /* ----------------------------------------------------------------
2558      Tell every processor the number of nonzeros per row
2559   */
2560   ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
2561   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2562     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2563   }
2564   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2565   ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr);
2566   displs     = recvcounts + size;
2567   for (i=0; i<size; i++) {
2568     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2569     displs[i]     = A->rmap->range[i]/bs;
2570   }
2571 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2572   ierr  = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2573 #else
2574   ierr  = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2575 #endif
2576   /* ---------------------------------------------------------------
2577      Create the sequential matrix of the same type as the local block diagonal
2578   */
2579   ierr  = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2580   ierr  = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2581   ierr  = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2582   ierr  = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2583   b = (Mat_SeqAIJ *)B->data;
2584 
2585   /*--------------------------------------------------------------------
2586     Copy my part of matrix column indices over
2587   */
2588   sendcount  = ad->nz + bd->nz;
2589   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2590   a_jsendbuf = ad->j;
2591   b_jsendbuf = bd->j;
2592   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2593   cnt        = 0;
2594   for (i=0; i<n; i++) {
2595 
2596     /* put in lower diagonal portion */
2597     m = bd->i[i+1] - bd->i[i];
2598     while (m > 0) {
2599       /* is it above diagonal (in bd (compressed) numbering) */
2600       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2601       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2602       m--;
2603     }
2604 
2605     /* put in diagonal portion */
2606     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2607       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2608     }
2609 
2610     /* put in upper diagonal portion */
2611     while (m-- > 0) {
2612       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2613     }
2614   }
2615   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2616 
2617   /*--------------------------------------------------------------------
2618     Gather all column indices to all processors
2619   */
2620   for (i=0; i<size; i++) {
2621     recvcounts[i] = 0;
2622     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2623       recvcounts[i] += lens[j];
2624     }
2625   }
2626   displs[0]  = 0;
2627   for (i=1; i<size; i++) {
2628     displs[i] = displs[i-1] + recvcounts[i-1];
2629   }
2630 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2631   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2632 #else
2633   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2634 #endif
2635   /*--------------------------------------------------------------------
2636     Assemble the matrix into useable form (note numerical values not yet set)
2637   */
2638   /* set the b->ilen (length of each row) values */
2639   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2640   /* set the b->i indices */
2641   b->i[0] = 0;
2642   for (i=1; i<=A->rmap->N/bs; i++) {
2643     b->i[i] = b->i[i-1] + lens[i-1];
2644   }
2645   ierr = PetscFree(lens);CHKERRQ(ierr);
2646   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2647   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2648   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2649 
2650   if (A->symmetric){
2651     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2652   } else if (A->hermitian) {
2653     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2654   } else if (A->structurally_symmetric) {
2655     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2656   }
2657   *newmat = B;
2658   PetscFunctionReturn(0);
2659 }
2660 
2661 #undef __FUNCT__
2662 #define __FUNCT__ "MatSOR_MPIBAIJ"
2663 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2664 {
2665   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2666   PetscErrorCode ierr;
2667   Vec            bb1 = 0;
2668 
2669   PetscFunctionBegin;
2670   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2671     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2672   }
2673 
2674   if (flag == SOR_APPLY_UPPER) {
2675     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2676     PetscFunctionReturn(0);
2677   }
2678 
2679   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2680     if (flag & SOR_ZERO_INITIAL_GUESS) {
2681       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2682       its--;
2683     }
2684 
2685     while (its--) {
2686       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2687       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2688 
2689       /* update rhs: bb1 = bb - B*x */
2690       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2691       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2692 
2693       /* local sweep */
2694       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2695     }
2696   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
2697     if (flag & SOR_ZERO_INITIAL_GUESS) {
2698       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2699       its--;
2700     }
2701     while (its--) {
2702       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2703       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2704 
2705       /* update rhs: bb1 = bb - B*x */
2706       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2707       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2708 
2709       /* local sweep */
2710       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2711     }
2712   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
2713     if (flag & SOR_ZERO_INITIAL_GUESS) {
2714       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2715       its--;
2716     }
2717     while (its--) {
2718       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2719       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2720 
2721       /* update rhs: bb1 = bb - B*x */
2722       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2723       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2724 
2725       /* local sweep */
2726       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2727     }
2728   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2729 
2730   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2731   PetscFunctionReturn(0);
2732 }
2733 
2734 extern PetscErrorCode  MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2735 
2736 #undef __FUNCT__
2737 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ"
2738 PetscErrorCode  MatInvertBlockDiagonal_MPIBAIJ(Mat A,PetscScalar **values)
2739 {
2740   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2741   PetscErrorCode ierr;
2742 
2743   PetscFunctionBegin;
2744   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2745   PetscFunctionReturn(0);
2746 }
2747 
2748 
2749 /* -------------------------------------------------------------------*/
2750 static struct _MatOps MatOps_Values = {
2751        MatSetValues_MPIBAIJ,
2752        MatGetRow_MPIBAIJ,
2753        MatRestoreRow_MPIBAIJ,
2754        MatMult_MPIBAIJ,
2755 /* 4*/ MatMultAdd_MPIBAIJ,
2756        MatMultTranspose_MPIBAIJ,
2757        MatMultTransposeAdd_MPIBAIJ,
2758        0,
2759        0,
2760        0,
2761 /*10*/ 0,
2762        0,
2763        0,
2764        MatSOR_MPIBAIJ,
2765        MatTranspose_MPIBAIJ,
2766 /*15*/ MatGetInfo_MPIBAIJ,
2767        MatEqual_MPIBAIJ,
2768        MatGetDiagonal_MPIBAIJ,
2769        MatDiagonalScale_MPIBAIJ,
2770        MatNorm_MPIBAIJ,
2771 /*20*/ MatAssemblyBegin_MPIBAIJ,
2772        MatAssemblyEnd_MPIBAIJ,
2773        MatSetOption_MPIBAIJ,
2774        MatZeroEntries_MPIBAIJ,
2775 /*24*/ MatZeroRows_MPIBAIJ,
2776        0,
2777        0,
2778        0,
2779        0,
2780 /*29*/ MatSetUp_MPIBAIJ,
2781        0,
2782        0,
2783        0,
2784        0,
2785 /*34*/ MatDuplicate_MPIBAIJ,
2786        0,
2787        0,
2788        0,
2789        0,
2790 /*39*/ MatAXPY_MPIBAIJ,
2791        MatGetSubMatrices_MPIBAIJ,
2792        MatIncreaseOverlap_MPIBAIJ,
2793        MatGetValues_MPIBAIJ,
2794        MatCopy_MPIBAIJ,
2795 /*44*/ 0,
2796        MatScale_MPIBAIJ,
2797        0,
2798        0,
2799        0,
2800 /*49*/ 0,
2801        0,
2802        0,
2803        0,
2804        0,
2805 /*54*/ MatFDColoringCreate_MPIBAIJ,
2806        0,
2807        MatSetUnfactored_MPIBAIJ,
2808        MatPermute_MPIBAIJ,
2809        MatSetValuesBlocked_MPIBAIJ,
2810 /*59*/ MatGetSubMatrix_MPIBAIJ,
2811        MatDestroy_MPIBAIJ,
2812        MatView_MPIBAIJ,
2813        0,
2814        0,
2815 /*64*/ 0,
2816        0,
2817        0,
2818        0,
2819        0,
2820 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2821        0,
2822        0,
2823        0,
2824        0,
2825 /*74*/ 0,
2826        MatFDColoringApply_BAIJ,
2827        0,
2828        0,
2829        0,
2830 /*79*/ 0,
2831        0,
2832        0,
2833        0,
2834        MatLoad_MPIBAIJ,
2835 /*84*/ 0,
2836        0,
2837        0,
2838        0,
2839        0,
2840 /*89*/ 0,
2841        0,
2842        0,
2843        0,
2844        0,
2845 /*94*/ 0,
2846        0,
2847        0,
2848        0,
2849        0,
2850 /*99*/ 0,
2851        0,
2852        0,
2853        0,
2854        0,
2855 /*104*/0,
2856        MatRealPart_MPIBAIJ,
2857        MatImaginaryPart_MPIBAIJ,
2858        0,
2859        0,
2860 /*109*/0,
2861        0,
2862        0,
2863        0,
2864        0,
2865 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2866        0,
2867        MatGetGhosts_MPIBAIJ,
2868        0,
2869        0,
2870 /*119*/0,
2871        0,
2872        0,
2873        0,
2874        0,
2875 /*124*/0,
2876        0,
2877        MatInvertBlockDiagonal_MPIBAIJ
2878 };
2879 
2880 EXTERN_C_BEGIN
2881 #undef __FUNCT__
2882 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2883 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2884 {
2885   PetscFunctionBegin;
2886   *a = ((Mat_MPIBAIJ *)A->data)->A;
2887   PetscFunctionReturn(0);
2888 }
2889 EXTERN_C_END
2890 
2891 EXTERN_C_BEGIN
2892 extern PetscErrorCode  MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2893 EXTERN_C_END
2894 
2895 EXTERN_C_BEGIN
2896 #undef __FUNCT__
2897 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2898 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2899 {
2900   PetscInt       m,rstart,cstart,cend;
2901   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2902   const PetscInt *JJ=0;
2903   PetscScalar    *values=0;
2904   PetscErrorCode ierr;
2905 
2906   PetscFunctionBegin;
2907 
2908   if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2909   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2910   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2911   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2912   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2913   m      = B->rmap->n/bs;
2914   rstart = B->rmap->rstart/bs;
2915   cstart = B->cmap->rstart/bs;
2916   cend   = B->cmap->rend/bs;
2917 
2918   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2919   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
2920   for (i=0; i<m; i++) {
2921     nz = ii[i+1] - ii[i];
2922     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2923     nz_max = PetscMax(nz_max,nz);
2924     JJ  = jj + ii[i];
2925     for (j=0; j<nz; j++) {
2926       if (*JJ >= cstart) break;
2927       JJ++;
2928     }
2929     d = 0;
2930     for (; j<nz; j++) {
2931       if (*JJ++ >= cend) break;
2932       d++;
2933     }
2934     d_nnz[i] = d;
2935     o_nnz[i] = nz - d;
2936   }
2937   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2938   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2939 
2940   values = (PetscScalar*)V;
2941   if (!values) {
2942     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2943     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2944   }
2945   for (i=0; i<m; i++) {
2946     PetscInt          row    = i + rstart;
2947     PetscInt          ncols  = ii[i+1] - ii[i];
2948     const PetscInt    *icols = jj + ii[i];
2949     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2950     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2951   }
2952 
2953   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2954   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2955   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2956   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2957   PetscFunctionReturn(0);
2958 }
2959 EXTERN_C_END
2960 
2961 #undef __FUNCT__
2962 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2963 /*@C
2964    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2965    (the default parallel PETSc format).
2966 
2967    Collective on MPI_Comm
2968 
2969    Input Parameters:
2970 +  A - the matrix
2971 .  bs - the block size
2972 .  i - the indices into j for the start of each local row (starts with zero)
2973 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2974 -  v - optional values in the matrix
2975 
2976    Level: developer
2977 
2978 .keywords: matrix, aij, compressed row, sparse, parallel
2979 
2980 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2981 @*/
2982 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2983 {
2984   PetscErrorCode ierr;
2985 
2986   PetscFunctionBegin;
2987   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2988   PetscValidType(B,1);
2989   PetscValidLogicalCollectiveInt(B,bs,2);
2990   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2991   PetscFunctionReturn(0);
2992 }
2993 
2994 EXTERN_C_BEGIN
2995 #undef __FUNCT__
2996 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2997 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2998 {
2999   Mat_MPIBAIJ    *b;
3000   PetscErrorCode ierr;
3001   PetscInt       i, newbs = PetscAbs(bs);
3002   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3003 
3004   PetscFunctionBegin;
3005   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3006   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3007   if (bs < 0) {
3008     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr);
3009       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
3010     ierr = PetscOptionsEnd();CHKERRQ(ierr);
3011     bs   = PetscAbs(bs);
3012   }
3013   if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
3014   bs = newbs;
3015 
3016 
3017   if (bs < 1) SETERRQ(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
3018   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3019   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3020   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3021   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3022 
3023   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3024   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3025   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3026   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3027 
3028   if (d_nnz) {
3029     for (i=0; i<B->rmap->n/bs; i++) {
3030       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
3031     }
3032   }
3033   if (o_nnz) {
3034     for (i=0; i<B->rmap->n/bs; i++) {
3035       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
3036     }
3037   }
3038 
3039   b = (Mat_MPIBAIJ*)B->data;
3040   b->bs2 = bs*bs;
3041   b->mbs = B->rmap->n/bs;
3042   b->nbs = B->cmap->n/bs;
3043   b->Mbs = B->rmap->N/bs;
3044   b->Nbs = B->cmap->N/bs;
3045 
3046   for (i=0; i<=b->size; i++) {
3047     b->rangebs[i] = B->rmap->range[i]/bs;
3048   }
3049   b->rstartbs = B->rmap->rstart/bs;
3050   b->rendbs   = B->rmap->rend/bs;
3051   b->cstartbs = B->cmap->rstart/bs;
3052   b->cendbs   = B->cmap->rend/bs;
3053 
3054   if (!B->preallocated) {
3055     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3056     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3057     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
3058     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3059     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3060     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3061     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
3062     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3063     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
3064   }
3065 
3066   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3067   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
3068   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3069   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3070   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3071   B->preallocated = PETSC_TRUE;
3072   PetscFunctionReturn(0);
3073 }
3074 EXTERN_C_END
3075 
3076 EXTERN_C_BEGIN
3077 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
3078 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
3079 EXTERN_C_END
3080 
3081 
3082 EXTERN_C_BEGIN
3083 #undef __FUNCT__
3084 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
3085 PetscErrorCode  MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj)
3086 {
3087   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
3088   PetscErrorCode ierr;
3089   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
3090   PetscInt       M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
3091   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
3092 
3093   PetscFunctionBegin;
3094   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr);
3095   ii[0] = 0;
3096   CHKMEMQ;
3097   for (i=0; i<M; i++) {
3098     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
3099     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
3100     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
3101     /* remove one from count of matrix has diagonal */
3102     for (j=id[i]; j<id[i+1]; j++) {
3103       if (jd[j] == i) {ii[i+1]--;break;}
3104     }
3105   CHKMEMQ;
3106   }
3107   ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3108   cnt = 0;
3109   for (i=0; i<M; i++) {
3110     for (j=io[i]; j<io[i+1]; j++) {
3111       if (garray[jo[j]] > rstart) break;
3112       jj[cnt++] = garray[jo[j]];
3113   CHKMEMQ;
3114     }
3115     for (k=id[i]; k<id[i+1]; k++) {
3116       if (jd[k] != i) {
3117         jj[cnt++] = rstart + jd[k];
3118   CHKMEMQ;
3119       }
3120     }
3121     for (;j<io[i+1]; j++) {
3122       jj[cnt++] = garray[jo[j]];
3123   CHKMEMQ;
3124     }
3125   }
3126   ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr);
3127   PetscFunctionReturn(0);
3128 }
3129 EXTERN_C_END
3130 
3131 #include <../src/mat/impls/aij/mpi/mpiaij.h>
3132 EXTERN_C_BEGIN
3133 PetscErrorCode  MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*);
3134 EXTERN_C_END
3135 
3136 EXTERN_C_BEGIN
3137 #undef __FUNCT__
3138 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ"
3139 PetscErrorCode  MatConvert_MPIBAIJ_MPIAIJ(Mat A,const MatType newtype,MatReuse reuse,Mat *newmat)
3140 {
3141   PetscErrorCode ierr;
3142   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3143   Mat            B;
3144   Mat_MPIAIJ     *b;
3145 
3146   PetscFunctionBegin;
3147   if (!A->assembled) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Matrix must be assembled");
3148 
3149   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
3150   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3151   ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
3152   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3153   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
3154   b = (Mat_MPIAIJ*) B->data;
3155 
3156   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
3157   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
3158   ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
3159   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
3160   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
3161   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3162   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3163   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3164   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3165   if (reuse == MAT_REUSE_MATRIX) {
3166     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
3167   } else {
3168    *newmat = B;
3169   }
3170   PetscFunctionReturn(0);
3171 }
3172 EXTERN_C_END
3173 
3174 EXTERN_C_BEGIN
3175 #if defined(PETSC_HAVE_MUMPS)
3176 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3177 #endif
3178 EXTERN_C_END
3179 
3180 /*MC
3181    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
3182 
3183    Options Database Keys:
3184 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
3185 . -mat_block_size <bs> - set the blocksize used to store the matrix
3186 - -mat_use_hash_table <fact>
3187 
3188   Level: beginner
3189 
3190 .seealso: MatCreateMPIBAIJ
3191 M*/
3192 
3193 EXTERN_C_BEGIN
3194 extern PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,const MatType,MatReuse,Mat*);
3195 EXTERN_C_END
3196 
3197 EXTERN_C_BEGIN
3198 #undef __FUNCT__
3199 #define __FUNCT__ "MatCreate_MPIBAIJ"
3200 PetscErrorCode  MatCreate_MPIBAIJ(Mat B)
3201 {
3202   Mat_MPIBAIJ    *b;
3203   PetscErrorCode ierr;
3204   PetscBool      flg;
3205 
3206   PetscFunctionBegin;
3207   ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
3208   B->data = (void*)b;
3209 
3210   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3211   B->assembled  = PETSC_FALSE;
3212 
3213   B->insertmode = NOT_SET_VALUES;
3214   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
3215   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
3216 
3217   /* build local table of row and column ownerships */
3218   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
3219 
3220   /* build cache for off array entries formed */
3221   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
3222   b->donotstash  = PETSC_FALSE;
3223   b->colmap      = PETSC_NULL;
3224   b->garray      = PETSC_NULL;
3225   b->roworiented = PETSC_TRUE;
3226 
3227   /* stuff used in block assembly */
3228   b->barray       = 0;
3229 
3230   /* stuff used for matrix vector multiply */
3231   b->lvec         = 0;
3232   b->Mvctx        = 0;
3233 
3234   /* stuff for MatGetRow() */
3235   b->rowindices   = 0;
3236   b->rowvalues    = 0;
3237   b->getrowactive = PETSC_FALSE;
3238 
3239   /* hash table stuff */
3240   b->ht           = 0;
3241   b->hd           = 0;
3242   b->ht_size      = 0;
3243   b->ht_flag      = PETSC_FALSE;
3244   b->ht_fact      = 0;
3245   b->ht_total_ct  = 0;
3246   b->ht_insert_ct = 0;
3247 
3248   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
3249   b->ijonly       = PETSC_FALSE;
3250 
3251   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3252     ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3253     if (flg) {
3254       PetscReal fact = 1.39;
3255       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3256       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
3257       if (fact <= 1.0) fact = 1.39;
3258       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3259       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3260     }
3261   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3262 
3263 #if defined(PETSC_HAVE_MUMPS)
3264   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr);
3265 #endif
3266   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",
3267                                      "MatConvert_MPIBAIJ_MPIAdj",
3268                                       MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3269   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",
3270                                      "MatConvert_MPIBAIJ_MPIAIJ",
3271                                       MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3272   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",
3273                                      "MatConvert_MPIBAIJ_MPISBAIJ",
3274                                       MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3275   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3276                                      "MatStoreValues_MPIBAIJ",
3277                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3278   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3279                                      "MatRetrieveValues_MPIBAIJ",
3280                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3281   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3282                                      "MatGetDiagonalBlock_MPIBAIJ",
3283                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3284   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
3285                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
3286                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3287   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
3288 				     "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
3289 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3290   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3291                                      "MatDiagonalScaleLocal_MPIBAIJ",
3292                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3293   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
3294                                      "MatSetHashTableFactor_MPIBAIJ",
3295                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3296   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",
3297                                      "MatConvert_MPIBAIJ_MPIBSTRM",
3298                                       MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr);
3299   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3300   PetscFunctionReturn(0);
3301 }
3302 EXTERN_C_END
3303 
3304 /*MC
3305    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3306 
3307    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3308    and MATMPIBAIJ otherwise.
3309 
3310    Options Database Keys:
3311 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3312 
3313   Level: beginner
3314 
3315 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3316 M*/
3317 
3318 #undef __FUNCT__
3319 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3320 /*@C
3321    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3322    (block compressed row).  For good matrix assembly performance
3323    the user should preallocate the matrix storage by setting the parameters
3324    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3325    performance can be increased by more than a factor of 50.
3326 
3327    Collective on Mat
3328 
3329    Input Parameters:
3330 +  A - the matrix
3331 .  bs   - size of blockk
3332 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3333            submatrix  (same for all local rows)
3334 .  d_nnz - array containing the number of block nonzeros in the various block rows
3335            of the in diagonal portion of the local (possibly different for each block
3336            row) or PETSC_NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3337            set it even if it is zero.
3338 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3339            submatrix (same for all local rows).
3340 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3341            off-diagonal portion of the local submatrix (possibly different for
3342            each block row) or PETSC_NULL.
3343 
3344    If the *_nnz parameter is given then the *_nz parameter is ignored
3345 
3346    Options Database Keys:
3347 +   -mat_block_size - size of the blocks to use
3348 -   -mat_use_hash_table <fact>
3349 
3350    Notes:
3351    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3352    than it must be used on all processors that share the object for that argument.
3353 
3354    Storage Information:
3355    For a square global matrix we define each processor's diagonal portion
3356    to be its local rows and the corresponding columns (a square submatrix);
3357    each processor's off-diagonal portion encompasses the remainder of the
3358    local matrix (a rectangular submatrix).
3359 
3360    The user can specify preallocated storage for the diagonal part of
3361    the local submatrix with either d_nz or d_nnz (not both).  Set
3362    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3363    memory allocation.  Likewise, specify preallocated storage for the
3364    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3365 
3366    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3367    the figure below we depict these three local rows and all columns (0-11).
3368 
3369 .vb
3370            0 1 2 3 4 5 6 7 8 9 10 11
3371           -------------------
3372    row 3  |  o o o d d d o o o o o o
3373    row 4  |  o o o d d d o o o o o o
3374    row 5  |  o o o d d d o o o o o o
3375           -------------------
3376 .ve
3377 
3378    Thus, any entries in the d locations are stored in the d (diagonal)
3379    submatrix, and any entries in the o locations are stored in the
3380    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3381    stored simply in the MATSEQBAIJ format for compressed row storage.
3382 
3383    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3384    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3385    In general, for PDE problems in which most nonzeros are near the diagonal,
3386    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3387    or you will get TERRIBLE performance; see the users' manual chapter on
3388    matrices.
3389 
3390    You can call MatGetInfo() to get information on how effective the preallocation was;
3391    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3392    You can also run with the option -info and look for messages with the string
3393    malloc in them to see if additional memory allocation was needed.
3394 
3395    Level: intermediate
3396 
3397 .keywords: matrix, block, aij, compressed row, sparse, parallel
3398 
3399 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR()
3400 @*/
3401 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3402 {
3403   PetscErrorCode ierr;
3404 
3405   PetscFunctionBegin;
3406   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3407   PetscValidType(B,1);
3408   PetscValidLogicalCollectiveInt(B,bs,2);
3409   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3410   PetscFunctionReturn(0);
3411 }
3412 
3413 #undef __FUNCT__
3414 #define __FUNCT__ "MatCreateBAIJ"
3415 /*@C
3416    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3417    (block compressed row).  For good matrix assembly performance
3418    the user should preallocate the matrix storage by setting the parameters
3419    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3420    performance can be increased by more than a factor of 50.
3421 
3422    Collective on MPI_Comm
3423 
3424    Input Parameters:
3425 +  comm - MPI communicator
3426 .  bs   - size of blockk
3427 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3428            This value should be the same as the local size used in creating the
3429            y vector for the matrix-vector product y = Ax.
3430 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3431            This value should be the same as the local size used in creating the
3432            x vector for the matrix-vector product y = Ax.
3433 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3434 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3435 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3436            submatrix  (same for all local rows)
3437 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3438            of the in diagonal portion of the local (possibly different for each block
3439            row) or PETSC_NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3440            and set it even if it is zero.
3441 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3442            submatrix (same for all local rows).
3443 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3444            off-diagonal portion of the local submatrix (possibly different for
3445            each block row) or PETSC_NULL.
3446 
3447    Output Parameter:
3448 .  A - the matrix
3449 
3450    Options Database Keys:
3451 +   -mat_block_size - size of the blocks to use
3452 -   -mat_use_hash_table <fact>
3453 
3454    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3455    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3456    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3457 
3458    Notes:
3459    If the *_nnz parameter is given then the *_nz parameter is ignored
3460 
3461    A nonzero block is any block that as 1 or more nonzeros in it
3462 
3463    The user MUST specify either the local or global matrix dimensions
3464    (possibly both).
3465 
3466    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3467    than it must be used on all processors that share the object for that argument.
3468 
3469    Storage Information:
3470    For a square global matrix we define each processor's diagonal portion
3471    to be its local rows and the corresponding columns (a square submatrix);
3472    each processor's off-diagonal portion encompasses the remainder of the
3473    local matrix (a rectangular submatrix).
3474 
3475    The user can specify preallocated storage for the diagonal part of
3476    the local submatrix with either d_nz or d_nnz (not both).  Set
3477    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3478    memory allocation.  Likewise, specify preallocated storage for the
3479    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3480 
3481    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3482    the figure below we depict these three local rows and all columns (0-11).
3483 
3484 .vb
3485            0 1 2 3 4 5 6 7 8 9 10 11
3486           -------------------
3487    row 3  |  o o o d d d o o o o o o
3488    row 4  |  o o o d d d o o o o o o
3489    row 5  |  o o o d d d o o o o o o
3490           -------------------
3491 .ve
3492 
3493    Thus, any entries in the d locations are stored in the d (diagonal)
3494    submatrix, and any entries in the o locations are stored in the
3495    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3496    stored simply in the MATSEQBAIJ format for compressed row storage.
3497 
3498    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3499    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3500    In general, for PDE problems in which most nonzeros are near the diagonal,
3501    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3502    or you will get TERRIBLE performance; see the users' manual chapter on
3503    matrices.
3504 
3505    Level: intermediate
3506 
3507 .keywords: matrix, block, aij, compressed row, sparse, parallel
3508 
3509 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3510 @*/
3511 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3512 {
3513   PetscErrorCode ierr;
3514   PetscMPIInt    size;
3515 
3516   PetscFunctionBegin;
3517   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3518   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3519   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3520   if (size > 1) {
3521     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3522     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3523   } else {
3524     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3525     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3526   }
3527   PetscFunctionReturn(0);
3528 }
3529 
3530 #undef __FUNCT__
3531 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3532 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3533 {
3534   Mat            mat;
3535   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3536   PetscErrorCode ierr;
3537   PetscInt       len=0;
3538 
3539   PetscFunctionBegin;
3540   *newmat       = 0;
3541   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3542   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3543   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3544   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3545 
3546   mat->factortype   = matin->factortype;
3547   mat->preallocated = PETSC_TRUE;
3548   mat->assembled    = PETSC_TRUE;
3549   mat->insertmode   = NOT_SET_VALUES;
3550 
3551   a      = (Mat_MPIBAIJ*)mat->data;
3552   mat->rmap->bs  = matin->rmap->bs;
3553   a->bs2   = oldmat->bs2;
3554   a->mbs   = oldmat->mbs;
3555   a->nbs   = oldmat->nbs;
3556   a->Mbs   = oldmat->Mbs;
3557   a->Nbs   = oldmat->Nbs;
3558 
3559   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3560   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3561 
3562   a->size         = oldmat->size;
3563   a->rank         = oldmat->rank;
3564   a->donotstash   = oldmat->donotstash;
3565   a->roworiented  = oldmat->roworiented;
3566   a->rowindices   = 0;
3567   a->rowvalues    = 0;
3568   a->getrowactive = PETSC_FALSE;
3569   a->barray       = 0;
3570   a->rstartbs     = oldmat->rstartbs;
3571   a->rendbs       = oldmat->rendbs;
3572   a->cstartbs     = oldmat->cstartbs;
3573   a->cendbs       = oldmat->cendbs;
3574 
3575   /* hash table stuff */
3576   a->ht           = 0;
3577   a->hd           = 0;
3578   a->ht_size      = 0;
3579   a->ht_flag      = oldmat->ht_flag;
3580   a->ht_fact      = oldmat->ht_fact;
3581   a->ht_total_ct  = 0;
3582   a->ht_insert_ct = 0;
3583 
3584   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3585   if (oldmat->colmap) {
3586 #if defined (PETSC_USE_CTABLE)
3587   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3588 #else
3589   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3590   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3591   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3592 #endif
3593   } else a->colmap = 0;
3594 
3595   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3596     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3597     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3598     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3599   } else a->garray = 0;
3600 
3601   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3602   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3603   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3604   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3605   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3606 
3607   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3608   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3609   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3610   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3611   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3612   *newmat = mat;
3613 
3614   PetscFunctionReturn(0);
3615 }
3616 
3617 #undef __FUNCT__
3618 #define __FUNCT__ "MatLoad_MPIBAIJ"
3619 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3620 {
3621   PetscErrorCode ierr;
3622   int            fd;
3623   PetscInt       i,nz,j,rstart,rend;
3624   PetscScalar    *vals,*buf;
3625   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3626   MPI_Status     status;
3627   PetscMPIInt    rank,size,maxnz;
3628   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3629   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
3630   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3631   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
3632   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
3633   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3634 
3635   PetscFunctionBegin;
3636   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3637     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3638   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3639 
3640   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3641   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3642   if (!rank) {
3643     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3644     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3645     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3646   }
3647 
3648   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3649 
3650   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3651   M = header[1]; N = header[2];
3652 
3653   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3654   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3655   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3656 
3657   /* If global sizes are set, check if they are consistent with that given in the file */
3658   if (sizesset) {
3659     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3660   }
3661   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3662   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3663 
3664   if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices");
3665 
3666   /*
3667      This code adds extra rows to make sure the number of rows is
3668      divisible by the blocksize
3669   */
3670   Mbs        = M/bs;
3671   extra_rows = bs - M + bs*Mbs;
3672   if (extra_rows == bs) extra_rows = 0;
3673   else                  Mbs++;
3674   if (extra_rows && !rank) {
3675     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3676   }
3677 
3678   /* determine ownership of all rows */
3679   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3680     mbs        = Mbs/size + ((Mbs % size) > rank);
3681     m          = mbs*bs;
3682   } else { /* User set */
3683     m          = newmat->rmap->n;
3684     mbs        = m/bs;
3685   }
3686   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
3687   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3688 
3689   /* process 0 needs enough room for process with most rows */
3690   if (!rank) {
3691     mmax = rowners[1];
3692     for (i=2; i<size; i++) {
3693       mmax = PetscMax(mmax,rowners[i]);
3694     }
3695     mmax*=bs;
3696   } else mmax = m;
3697 
3698   rowners[0] = 0;
3699   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
3700   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
3701   rstart = rowners[rank];
3702   rend   = rowners[rank+1];
3703 
3704   /* distribute row lengths to all processors */
3705   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
3706   if (!rank) {
3707     mend = m;
3708     if (size == 1) mend = mend - extra_rows;
3709     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3710     for (j=mend; j<m; j++) locrowlens[j] = 1;
3711     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3712     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3713     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3714     for (j=0; j<m; j++) {
3715       procsnz[0] += locrowlens[j];
3716     }
3717     for (i=1; i<size; i++) {
3718       mend = browners[i+1] - browners[i];
3719       if (i == size-1) mend = mend - extra_rows;
3720       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3721       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3722       /* calculate the number of nonzeros on each processor */
3723       for (j=0; j<browners[i+1]-browners[i]; j++) {
3724         procsnz[i] += rowlengths[j];
3725       }
3726       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3727     }
3728     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3729   } else {
3730     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3731   }
3732 
3733   if (!rank) {
3734     /* determine max buffer needed and allocate it */
3735     maxnz = procsnz[0];
3736     for (i=1; i<size; i++) {
3737       maxnz = PetscMax(maxnz,procsnz[i]);
3738     }
3739     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3740 
3741     /* read in my part of the matrix column indices  */
3742     nz     = procsnz[0];
3743     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3744     mycols = ibuf;
3745     if (size == 1)  nz -= extra_rows;
3746     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3747     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
3748 
3749     /* read in every ones (except the last) and ship off */
3750     for (i=1; i<size-1; i++) {
3751       nz   = procsnz[i];
3752       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3753       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3754     }
3755     /* read in the stuff for the last proc */
3756     if (size != 1) {
3757       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3758       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3759       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3760       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3761     }
3762     ierr = PetscFree(cols);CHKERRQ(ierr);
3763   } else {
3764     /* determine buffer space needed for message */
3765     nz = 0;
3766     for (i=0; i<m; i++) {
3767       nz += locrowlens[i];
3768     }
3769     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3770     mycols = ibuf;
3771     /* receive message of column indices*/
3772     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3773     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3774     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3775   }
3776 
3777   /* loop over local rows, determining number of off diagonal entries */
3778   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
3779   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
3780   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3781   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3782   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3783   rowcount = 0; nzcount = 0;
3784   for (i=0; i<mbs; i++) {
3785     dcount  = 0;
3786     odcount = 0;
3787     for (j=0; j<bs; j++) {
3788       kmax = locrowlens[rowcount];
3789       for (k=0; k<kmax; k++) {
3790         tmp = mycols[nzcount++]/bs;
3791         if (!mask[tmp]) {
3792           mask[tmp] = 1;
3793           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3794           else masked1[dcount++] = tmp;
3795         }
3796       }
3797       rowcount++;
3798     }
3799 
3800     dlens[i]  = dcount;
3801     odlens[i] = odcount;
3802 
3803     /* zero out the mask elements we set */
3804     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3805     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3806   }
3807 
3808 
3809   if (!sizesset) {
3810     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3811   }
3812   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3813 
3814   if (!rank) {
3815     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3816     /* read in my part of the matrix numerical values  */
3817     nz = procsnz[0];
3818     vals = buf;
3819     mycols = ibuf;
3820     if (size == 1)  nz -= extra_rows;
3821     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3822     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
3823 
3824     /* insert into matrix */
3825     jj      = rstart*bs;
3826     for (i=0; i<m; i++) {
3827       ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3828       mycols += locrowlens[i];
3829       vals   += locrowlens[i];
3830       jj++;
3831     }
3832     /* read in other processors (except the last one) and ship out */
3833     for (i=1; i<size-1; i++) {
3834       nz   = procsnz[i];
3835       vals = buf;
3836       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3837       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3838     }
3839     /* the last proc */
3840     if (size != 1){
3841       nz   = procsnz[i] - extra_rows;
3842       vals = buf;
3843       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3844       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3845       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3846     }
3847     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3848   } else {
3849     /* receive numeric values */
3850     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3851 
3852     /* receive message of values*/
3853     vals   = buf;
3854     mycols = ibuf;
3855     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
3856     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
3857     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3858 
3859     /* insert into matrix */
3860     jj      = rstart*bs;
3861     for (i=0; i<m; i++) {
3862       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3863       mycols += locrowlens[i];
3864       vals   += locrowlens[i];
3865       jj++;
3866     }
3867   }
3868   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3869   ierr = PetscFree(buf);CHKERRQ(ierr);
3870   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3871   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3872   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3873   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3874   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3875   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3876 
3877   PetscFunctionReturn(0);
3878 }
3879 
3880 #undef __FUNCT__
3881 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3882 /*@
3883    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3884 
3885    Input Parameters:
3886 .  mat  - the matrix
3887 .  fact - factor
3888 
3889    Not Collective, each process can use a different factor
3890 
3891    Level: advanced
3892 
3893   Notes:
3894    This can also be set by the command line option: -mat_use_hash_table <fact>
3895 
3896 .keywords: matrix, hashtable, factor, HT
3897 
3898 .seealso: MatSetOption()
3899 @*/
3900 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3901 {
3902   PetscErrorCode ierr;
3903 
3904   PetscFunctionBegin;
3905   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3906   PetscFunctionReturn(0);
3907 }
3908 
3909 EXTERN_C_BEGIN
3910 #undef __FUNCT__
3911 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3912 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3913 {
3914   Mat_MPIBAIJ *baij;
3915 
3916   PetscFunctionBegin;
3917   baij = (Mat_MPIBAIJ*)mat->data;
3918   baij->ht_fact = fact;
3919   PetscFunctionReturn(0);
3920 }
3921 EXTERN_C_END
3922 
3923 #undef __FUNCT__
3924 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3925 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3926 {
3927   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3928   PetscFunctionBegin;
3929   *Ad     = a->A;
3930   *Ao     = a->B;
3931   *colmap = a->garray;
3932   PetscFunctionReturn(0);
3933 }
3934 
3935 /*
3936     Special version for direct calls from Fortran (to eliminate two function call overheads
3937 */
3938 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3939 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3940 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3941 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3942 #endif
3943 
3944 #undef __FUNCT__
3945 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3946 /*@C
3947   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3948 
3949   Collective on Mat
3950 
3951   Input Parameters:
3952 + mat - the matrix
3953 . min - number of input rows
3954 . im - input rows
3955 . nin - number of input columns
3956 . in - input columns
3957 . v - numerical values input
3958 - addvin - INSERT_VALUES or ADD_VALUES
3959 
3960   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3961 
3962   Level: advanced
3963 
3964 .seealso:   MatSetValuesBlocked()
3965 @*/
3966 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3967 {
3968   /* convert input arguments to C version */
3969   Mat             mat = *matin;
3970   PetscInt        m = *min, n = *nin;
3971   InsertMode      addv = *addvin;
3972 
3973   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3974   const MatScalar *value;
3975   MatScalar       *barray=baij->barray;
3976   PetscBool       roworiented = baij->roworiented;
3977   PetscErrorCode  ierr;
3978   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3979   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3980   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3981 
3982   PetscFunctionBegin;
3983   /* tasks normally handled by MatSetValuesBlocked() */
3984   if (mat->insertmode == NOT_SET_VALUES) {
3985     mat->insertmode = addv;
3986   }
3987 #if defined(PETSC_USE_DEBUG)
3988   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3989   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3990 #endif
3991   if (mat->assembled) {
3992     mat->was_assembled = PETSC_TRUE;
3993     mat->assembled     = PETSC_FALSE;
3994   }
3995   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3996 
3997 
3998   if(!barray) {
3999     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
4000     baij->barray = barray;
4001   }
4002 
4003   if (roworiented) {
4004     stepval = (n-1)*bs;
4005   } else {
4006     stepval = (m-1)*bs;
4007   }
4008   for (i=0; i<m; i++) {
4009     if (im[i] < 0) continue;
4010 #if defined(PETSC_USE_DEBUG)
4011     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
4012 #endif
4013     if (im[i] >= rstart && im[i] < rend) {
4014       row = im[i] - rstart;
4015       for (j=0; j<n; j++) {
4016         /* If NumCol = 1 then a copy is not required */
4017         if ((roworiented) && (n == 1)) {
4018           barray = (MatScalar*)v + i*bs2;
4019         } else if((!roworiented) && (m == 1)) {
4020           barray = (MatScalar*)v + j*bs2;
4021         } else { /* Here a copy is required */
4022           if (roworiented) {
4023             value = v + i*(stepval+bs)*bs + j*bs;
4024           } else {
4025             value = v + j*(stepval+bs)*bs + i*bs;
4026           }
4027           for (ii=0; ii<bs; ii++,value+=stepval) {
4028             for (jj=0; jj<bs; jj++) {
4029               *barray++  = *value++;
4030             }
4031           }
4032           barray -=bs2;
4033         }
4034 
4035         if (in[j] >= cstart && in[j] < cend){
4036           col  = in[j] - cstart;
4037           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4038         }
4039         else if (in[j] < 0) continue;
4040 #if defined(PETSC_USE_DEBUG)
4041         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
4042 #endif
4043         else {
4044           if (mat->was_assembled) {
4045             if (!baij->colmap) {
4046               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
4047             }
4048 
4049 #if defined(PETSC_USE_DEBUG)
4050 #if defined (PETSC_USE_CTABLE)
4051             { PetscInt data;
4052               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
4053               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4054             }
4055 #else
4056             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4057 #endif
4058 #endif
4059 #if defined (PETSC_USE_CTABLE)
4060 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4061             col  = (col - 1)/bs;
4062 #else
4063             col = (baij->colmap[in[j]] - 1)/bs;
4064 #endif
4065             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
4066               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
4067               col =  in[j];
4068             }
4069           }
4070           else col = in[j];
4071           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4072         }
4073       }
4074     } else {
4075       if (!baij->donotstash) {
4076         if (roworiented) {
4077           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4078         } else {
4079           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4080         }
4081       }
4082     }
4083   }
4084 
4085   /* task normally handled by MatSetValuesBlocked() */
4086   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
4087   PetscFunctionReturn(0);
4088 }
4089 
4090 #undef __FUNCT__
4091 #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
4092 /*@
4093      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
4094          CSR format the local rows.
4095 
4096    Collective on MPI_Comm
4097 
4098    Input Parameters:
4099 +  comm - MPI communicator
4100 .  bs - the block size, only a block size of 1 is supported
4101 .  m - number of local rows (Cannot be PETSC_DECIDE)
4102 .  n - This value should be the same as the local size used in creating the
4103        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4104        calculated if N is given) For square matrices n is almost always m.
4105 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4106 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4107 .   i - row indices
4108 .   j - column indices
4109 -   a - matrix values
4110 
4111    Output Parameter:
4112 .   mat - the matrix
4113 
4114    Level: intermediate
4115 
4116    Notes:
4117        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4118      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4119      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4120 
4121        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4122 
4123 .keywords: matrix, aij, compressed row, sparse, parallel
4124 
4125 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4126           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4127 @*/
4128 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4129 {
4130   PetscErrorCode ierr;
4131 
4132 
4133  PetscFunctionBegin;
4134   if (i[0]) {
4135     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4136   }
4137   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4138   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4139   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4140   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
4141   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
4142   PetscFunctionReturn(0);
4143 }
4144