xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision fcc2139e2b6db8cad23d587d944bcbf09bc8202e)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4 #include "petscblaslapack.h"
5 
6 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
7 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
8 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
9 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
10 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
11 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
12 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
13 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
14 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
15 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar);
16 
17 #undef __FUNCT__
18 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
19 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
20 {
21   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
22   PetscErrorCode ierr;
23   PetscInt       i,*idxb = 0;
24   PetscScalar    *va,*vb;
25   Vec            vtmp;
26 
27   PetscFunctionBegin;
28   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
29   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
30   if (idx) {
31     for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;}
32   }
33 
34   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
35   if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
36   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
37   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
38 
39   for (i=0; i<A->rmap->n; i++){
40     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);}
41   }
42 
43   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
44   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
45   if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);}
46   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
47   PetscFunctionReturn(0);
48 }
49 
50 EXTERN_C_BEGIN
51 #undef __FUNCT__
52 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
53 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat)
54 {
55   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
56   PetscErrorCode ierr;
57 
58   PetscFunctionBegin;
59   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
60   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
61   PetscFunctionReturn(0);
62 }
63 EXTERN_C_END
64 
65 EXTERN_C_BEGIN
66 #undef __FUNCT__
67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
68 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat)
69 {
70   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
71   PetscErrorCode ierr;
72 
73   PetscFunctionBegin;
74   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
75   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
76   PetscFunctionReturn(0);
77 }
78 EXTERN_C_END
79 
80 /*
81      Local utility routine that creates a mapping from the global column
82    number to the local number in the off-diagonal part of the local
83    storage of the matrix.  This is done in a non scalable way since the
84    length of colmap equals the global matrix length.
85 */
86 #undef __FUNCT__
87 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
88 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
89 {
90   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
91   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
92   PetscErrorCode ierr;
93   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
94 
95   PetscFunctionBegin;
96 #if defined (PETSC_USE_CTABLE)
97   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
98   for (i=0; i<nbs; i++){
99     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
100   }
101 #else
102   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
103   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
104   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
105   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
106 #endif
107   PetscFunctionReturn(0);
108 }
109 
110 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
111 { \
112  \
113     brow = row/bs;  \
114     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
115     rmax = aimax[brow]; nrow = ailen[brow]; \
116       bcol = col/bs; \
117       ridx = row % bs; cidx = col % bs; \
118       low = 0; high = nrow; \
119       while (high-low > 3) { \
120         t = (low+high)/2; \
121         if (rp[t] > bcol) high = t; \
122         else              low  = t; \
123       } \
124       for (_i=low; _i<high; _i++) { \
125         if (rp[_i] > bcol) break; \
126         if (rp[_i] == bcol) { \
127           bap  = ap +  bs2*_i + bs*cidx + ridx; \
128           if (addv == ADD_VALUES) *bap += value;  \
129           else                    *bap  = value;  \
130           goto a_noinsert; \
131         } \
132       } \
133       if (a->nonew == 1) goto a_noinsert; \
134       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
135       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
136       N = nrow++ - 1;  \
137       /* shift up all the later entries in this row */ \
138       for (ii=N; ii>=_i; ii--) { \
139         rp[ii+1] = rp[ii]; \
140         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
141       } \
142       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
143       rp[_i]                      = bcol;  \
144       ap[bs2*_i + bs*cidx + ridx] = value;  \
145       a_noinsert:; \
146     ailen[brow] = nrow; \
147 }
148 
149 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
150 { \
151     brow = row/bs;  \
152     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
153     rmax = bimax[brow]; nrow = bilen[brow]; \
154       bcol = col/bs; \
155       ridx = row % bs; cidx = col % bs; \
156       low = 0; high = nrow; \
157       while (high-low > 3) { \
158         t = (low+high)/2; \
159         if (rp[t] > bcol) high = t; \
160         else              low  = t; \
161       } \
162       for (_i=low; _i<high; _i++) { \
163         if (rp[_i] > bcol) break; \
164         if (rp[_i] == bcol) { \
165           bap  = ap +  bs2*_i + bs*cidx + ridx; \
166           if (addv == ADD_VALUES) *bap += value;  \
167           else                    *bap  = value;  \
168           goto b_noinsert; \
169         } \
170       } \
171       if (b->nonew == 1) goto b_noinsert; \
172       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
173       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
174       CHKMEMQ;\
175       N = nrow++ - 1;  \
176       /* shift up all the later entries in this row */ \
177       for (ii=N; ii>=_i; ii--) { \
178         rp[ii+1] = rp[ii]; \
179         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
180       } \
181       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
182       rp[_i]                      = bcol;  \
183       ap[bs2*_i + bs*cidx + ridx] = value;  \
184       b_noinsert:; \
185     bilen[brow] = nrow; \
186 }
187 
188 #undef __FUNCT__
189 #define __FUNCT__ "MatSetValues_MPIBAIJ"
190 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
191 {
192   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
193   MatScalar      value;
194   PetscTruth     roworiented = baij->roworiented;
195   PetscErrorCode ierr;
196   PetscInt       i,j,row,col;
197   PetscInt       rstart_orig=mat->rmap->rstart;
198   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
199   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
200 
201   /* Some Variables required in the macro */
202   Mat            A = baij->A;
203   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
204   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
205   MatScalar      *aa=a->a;
206 
207   Mat            B = baij->B;
208   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
209   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
210   MatScalar      *ba=b->a;
211 
212   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
213   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
214   MatScalar      *ap,*bap;
215 
216   PetscFunctionBegin;
217   if (v) PetscValidScalarPointer(v,6);
218   for (i=0; i<m; i++) {
219     if (im[i] < 0) continue;
220 #if defined(PETSC_USE_DEBUG)
221     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
222 #endif
223     if (im[i] >= rstart_orig && im[i] < rend_orig) {
224       row = im[i] - rstart_orig;
225       for (j=0; j<n; j++) {
226         if (in[j] >= cstart_orig && in[j] < cend_orig){
227           col = in[j] - cstart_orig;
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
230           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
231         } else if (in[j] < 0) continue;
232 #if defined(PETSC_USE_DEBUG)
233         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap->N-1);
234 #endif
235         else {
236           if (mat->was_assembled) {
237             if (!baij->colmap) {
238               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
239             }
240 #if defined (PETSC_USE_CTABLE)
241             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
242             col  = col - 1;
243 #else
244             col = baij->colmap[in[j]/bs] - 1;
245 #endif
246             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
247               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
248               col =  in[j];
249               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
250               B = baij->B;
251               b = (Mat_SeqBAIJ*)(B)->data;
252               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
253               ba=b->a;
254             } else col += in[j]%bs;
255           } else col = in[j];
256           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
257           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
258           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
259         }
260       }
261     } else {
262       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
263       if (!baij->donotstash) {
264         if (roworiented) {
265           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
266         } else {
267           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
268         }
269       }
270     }
271   }
272   PetscFunctionReturn(0);
273 }
274 
275 #undef __FUNCT__
276 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
277 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
278 {
279   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
280   const PetscScalar *value;
281   MatScalar         *barray=baij->barray;
282   PetscTruth        roworiented = baij->roworiented;
283   PetscErrorCode    ierr;
284   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
285   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
286   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
287 
288   PetscFunctionBegin;
289   if(!barray) {
290     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
291     baij->barray = barray;
292   }
293 
294   if (roworiented) {
295     stepval = (n-1)*bs;
296   } else {
297     stepval = (m-1)*bs;
298   }
299   for (i=0; i<m; i++) {
300     if (im[i] < 0) continue;
301 #if defined(PETSC_USE_DEBUG)
302     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
303 #endif
304     if (im[i] >= rstart && im[i] < rend) {
305       row = im[i] - rstart;
306       for (j=0; j<n; j++) {
307         /* If NumCol = 1 then a copy is not required */
308         if ((roworiented) && (n == 1)) {
309           barray = (MatScalar*)v + i*bs2;
310         } else if((!roworiented) && (m == 1)) {
311           barray = (MatScalar*)v + j*bs2;
312         } else { /* Here a copy is required */
313           if (roworiented) {
314             value = v + (i*(stepval+bs) + j)*bs;
315           } else {
316 	    value = v + (j*(stepval+bs) + i)*bs;
317           }
318           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
319             for (jj=0; jj<bs; jj++) {
320               barray[jj]  = value[jj];
321             }
322             barray += bs;
323           }
324           barray -= bs2;
325         }
326 
327         if (in[j] >= cstart && in[j] < cend){
328           col  = in[j] - cstart;
329           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
330         }
331         else if (in[j] < 0) continue;
332 #if defined(PETSC_USE_DEBUG)
333         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
334 #endif
335         else {
336           if (mat->was_assembled) {
337             if (!baij->colmap) {
338               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
339             }
340 
341 #if defined(PETSC_USE_DEBUG)
342 #if defined (PETSC_USE_CTABLE)
343             { PetscInt data;
344               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
345               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
346             }
347 #else
348             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
349 #endif
350 #endif
351 #if defined (PETSC_USE_CTABLE)
352 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
353             col  = (col - 1)/bs;
354 #else
355             col = (baij->colmap[in[j]] - 1)/bs;
356 #endif
357             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
358               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
359               col =  in[j];
360             }
361           }
362           else col = in[j];
363           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
364         }
365       }
366     } else {
367       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
368       if (!baij->donotstash) {
369         if (roworiented) {
370           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
371         } else {
372           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
373         }
374       }
375     }
376   }
377   PetscFunctionReturn(0);
378 }
379 
380 #define HASH_KEY 0.6180339887
381 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
382 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
383 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
384 #undef __FUNCT__
385 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
386 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
387 {
388   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
389   PetscTruth     roworiented = baij->roworiented;
390   PetscErrorCode ierr;
391   PetscInt       i,j,row,col;
392   PetscInt       rstart_orig=mat->rmap->rstart;
393   PetscInt       rend_orig=mat->rmap->rend,Nbs=baij->Nbs;
394   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
395   PetscReal      tmp;
396   MatScalar      **HD = baij->hd,value;
397 #if defined(PETSC_USE_DEBUG)
398   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
399 #endif
400 
401   PetscFunctionBegin;
402   if (v) PetscValidScalarPointer(v,6);
403   for (i=0; i<m; i++) {
404 #if defined(PETSC_USE_DEBUG)
405     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
406     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
407 #endif
408       row = im[i];
409     if (row >= rstart_orig && row < rend_orig) {
410       for (j=0; j<n; j++) {
411         col = in[j];
412         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
413         /* Look up PetscInto the Hash Table */
414         key = (row/bs)*Nbs+(col/bs)+1;
415         h1  = HASH(size,key,tmp);
416 
417 
418         idx = h1;
419 #if defined(PETSC_USE_DEBUG)
420         insert_ct++;
421         total_ct++;
422         if (HT[idx] != key) {
423           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
424           if (idx == size) {
425             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
426             if (idx == h1) {
427               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
428             }
429           }
430         }
431 #else
432         if (HT[idx] != key) {
433           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
434           if (idx == size) {
435             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
436             if (idx == h1) {
437               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
438             }
439           }
440         }
441 #endif
442         /* A HASH table entry is found, so insert the values at the correct address */
443         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
444         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
445       }
446     } else {
447       if (!baij->donotstash) {
448         if (roworiented) {
449           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
450         } else {
451           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
452         }
453       }
454     }
455   }
456 #if defined(PETSC_USE_DEBUG)
457   baij->ht_total_ct = total_ct;
458   baij->ht_insert_ct = insert_ct;
459 #endif
460   PetscFunctionReturn(0);
461 }
462 
463 #undef __FUNCT__
464 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
465 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
466 {
467   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
468   PetscTruth        roworiented = baij->roworiented;
469   PetscErrorCode    ierr;
470   PetscInt          i,j,ii,jj,row,col;
471   PetscInt          rstart=baij->rstartbs;
472   PetscInt          rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
473   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
474   PetscReal         tmp;
475   MatScalar         **HD = baij->hd,*baij_a;
476   const PetscScalar *v_t,*value;
477 #if defined(PETSC_USE_DEBUG)
478   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
479 #endif
480 
481   PetscFunctionBegin;
482 
483   if (roworiented) {
484     stepval = (n-1)*bs;
485   } else {
486     stepval = (m-1)*bs;
487   }
488   for (i=0; i<m; i++) {
489 #if defined(PETSC_USE_DEBUG)
490     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
491     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
492 #endif
493     row   = im[i];
494     v_t   = v + i*nbs2;
495     if (row >= rstart && row < rend) {
496       for (j=0; j<n; j++) {
497         col = in[j];
498 
499         /* Look up into the Hash Table */
500         key = row*Nbs+col+1;
501         h1  = HASH(size,key,tmp);
502 
503         idx = h1;
504 #if defined(PETSC_USE_DEBUG)
505         total_ct++;
506         insert_ct++;
507        if (HT[idx] != key) {
508           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
509           if (idx == size) {
510             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
511             if (idx == h1) {
512               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
513             }
514           }
515         }
516 #else
517         if (HT[idx] != key) {
518           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
519           if (idx == size) {
520             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
521             if (idx == h1) {
522               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
523             }
524           }
525         }
526 #endif
527         baij_a = HD[idx];
528         if (roworiented) {
529           /*value = v + i*(stepval+bs)*bs + j*bs;*/
530           /* value = v + (i*(stepval+bs)+j)*bs; */
531           value = v_t;
532           v_t  += bs;
533           if (addv == ADD_VALUES) {
534             for (ii=0; ii<bs; ii++,value+=stepval) {
535               for (jj=ii; jj<bs2; jj+=bs) {
536                 baij_a[jj]  += *value++;
537               }
538             }
539           } else {
540             for (ii=0; ii<bs; ii++,value+=stepval) {
541               for (jj=ii; jj<bs2; jj+=bs) {
542                 baij_a[jj]  = *value++;
543               }
544             }
545           }
546         } else {
547           value = v + j*(stepval+bs)*bs + i*bs;
548           if (addv == ADD_VALUES) {
549             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
550               for (jj=0; jj<bs; jj++) {
551                 baij_a[jj]  += *value++;
552               }
553             }
554           } else {
555             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
556               for (jj=0; jj<bs; jj++) {
557                 baij_a[jj]  = *value++;
558               }
559             }
560           }
561         }
562       }
563     } else {
564       if (!baij->donotstash) {
565         if (roworiented) {
566           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
567         } else {
568           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
569         }
570       }
571     }
572   }
573 #if defined(PETSC_USE_DEBUG)
574   baij->ht_total_ct = total_ct;
575   baij->ht_insert_ct = insert_ct;
576 #endif
577   PetscFunctionReturn(0);
578 }
579 
580 #undef __FUNCT__
581 #define __FUNCT__ "MatGetValues_MPIBAIJ"
582 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
583 {
584   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
585   PetscErrorCode ierr;
586   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
587   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
588 
589   PetscFunctionBegin;
590   for (i=0; i<m; i++) {
591     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
592     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
593     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
594       row = idxm[i] - bsrstart;
595       for (j=0; j<n; j++) {
596         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
597         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
598         if (idxn[j] >= bscstart && idxn[j] < bscend){
599           col = idxn[j] - bscstart;
600           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
601         } else {
602           if (!baij->colmap) {
603             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
604           }
605 #if defined (PETSC_USE_CTABLE)
606           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
607           data --;
608 #else
609           data = baij->colmap[idxn[j]/bs]-1;
610 #endif
611           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
612           else {
613             col  = data + idxn[j]%bs;
614             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
615           }
616         }
617       }
618     } else {
619       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
620     }
621   }
622  PetscFunctionReturn(0);
623 }
624 
625 #undef __FUNCT__
626 #define __FUNCT__ "MatNorm_MPIBAIJ"
627 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
628 {
629   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
630   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
631   PetscErrorCode ierr;
632   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
633   PetscReal      sum = 0.0;
634   MatScalar      *v;
635 
636   PetscFunctionBegin;
637   if (baij->size == 1) {
638     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
639   } else {
640     if (type == NORM_FROBENIUS) {
641       v = amat->a;
642       nz = amat->nz*bs2;
643       for (i=0; i<nz; i++) {
644 #if defined(PETSC_USE_COMPLEX)
645         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
646 #else
647         sum += (*v)*(*v); v++;
648 #endif
649       }
650       v = bmat->a;
651       nz = bmat->nz*bs2;
652       for (i=0; i<nz; i++) {
653 #if defined(PETSC_USE_COMPLEX)
654         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
655 #else
656         sum += (*v)*(*v); v++;
657 #endif
658       }
659       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
660       *nrm = sqrt(*nrm);
661     } else if (type == NORM_1) { /* max column sum */
662       PetscReal *tmp,*tmp2;
663       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
664       ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
665       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
666       v = amat->a; jj = amat->j;
667       for (i=0; i<amat->nz; i++) {
668         for (j=0; j<bs; j++){
669           col = bs*(cstart + *jj) + j; /* column index */
670           for (row=0; row<bs; row++){
671             tmp[col] += PetscAbsScalar(*v);  v++;
672           }
673         }
674         jj++;
675       }
676       v = bmat->a; jj = bmat->j;
677       for (i=0; i<bmat->nz; i++) {
678         for (j=0; j<bs; j++){
679           col = bs*garray[*jj] + j;
680           for (row=0; row<bs; row++){
681             tmp[col] += PetscAbsScalar(*v); v++;
682           }
683         }
684         jj++;
685       }
686       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
687       *nrm = 0.0;
688       for (j=0; j<mat->cmap->N; j++) {
689         if (tmp2[j] > *nrm) *nrm = tmp2[j];
690       }
691       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
692     } else if (type == NORM_INFINITY) { /* max row sum */
693       PetscReal *sums;
694       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr);
695       sum = 0.0;
696       for (j=0; j<amat->mbs; j++) {
697         for (row=0; row<bs; row++) sums[row] = 0.0;
698         v = amat->a + bs2*amat->i[j];
699         nz = amat->i[j+1]-amat->i[j];
700         for (i=0; i<nz; i++) {
701           for (col=0; col<bs; col++){
702             for (row=0; row<bs; row++){
703               sums[row] += PetscAbsScalar(*v); v++;
704             }
705           }
706         }
707         v = bmat->a + bs2*bmat->i[j];
708         nz = bmat->i[j+1]-bmat->i[j];
709         for (i=0; i<nz; i++) {
710           for (col=0; col<bs; col++){
711             for (row=0; row<bs; row++){
712               sums[row] += PetscAbsScalar(*v); v++;
713             }
714           }
715         }
716         for (row=0; row<bs; row++){
717           if (sums[row] > sum) sum = sums[row];
718         }
719       }
720       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
721       ierr = PetscFree(sums);CHKERRQ(ierr);
722     } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet");
723   }
724   PetscFunctionReturn(0);
725 }
726 
727 /*
728   Creates the hash table, and sets the table
729   This table is created only once.
730   If new entried need to be added to the matrix
731   then the hash table has to be destroyed and
732   recreated.
733 */
734 #undef __FUNCT__
735 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
736 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
737 {
738   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
739   Mat            A = baij->A,B=baij->B;
740   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
741   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
742   PetscErrorCode ierr;
743   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
744   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
745   PetscInt       *HT,key;
746   MatScalar      **HD;
747   PetscReal      tmp;
748 #if defined(PETSC_USE_INFO)
749   PetscInt       ct=0,max=0;
750 #endif
751 
752   PetscFunctionBegin;
753   if (baij->ht) PetscFunctionReturn(0);
754 
755   baij->ht_size = (PetscInt)(factor*nz);
756   ht_size       = baij->ht_size;
757 
758   /* Allocate Memory for Hash Table */
759   ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr);
760   ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr);
761   ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr);
762   HD   = baij->hd;
763   HT   = baij->ht;
764 
765   /* Loop Over A */
766   for (i=0; i<a->mbs; i++) {
767     for (j=ai[i]; j<ai[i+1]; j++) {
768       row = i+rstart;
769       col = aj[j]+cstart;
770 
771       key = row*Nbs + col + 1;
772       h1  = HASH(ht_size,key,tmp);
773       for (k=0; k<ht_size; k++){
774         if (!HT[(h1+k)%ht_size]) {
775           HT[(h1+k)%ht_size] = key;
776           HD[(h1+k)%ht_size] = a->a + j*bs2;
777           break;
778 #if defined(PETSC_USE_INFO)
779         } else {
780           ct++;
781 #endif
782         }
783       }
784 #if defined(PETSC_USE_INFO)
785       if (k> max) max = k;
786 #endif
787     }
788   }
789   /* Loop Over B */
790   for (i=0; i<b->mbs; i++) {
791     for (j=bi[i]; j<bi[i+1]; j++) {
792       row = i+rstart;
793       col = garray[bj[j]];
794       key = row*Nbs + col + 1;
795       h1  = HASH(ht_size,key,tmp);
796       for (k=0; k<ht_size; k++){
797         if (!HT[(h1+k)%ht_size]) {
798           HT[(h1+k)%ht_size] = key;
799           HD[(h1+k)%ht_size] = b->a + j*bs2;
800           break;
801 #if defined(PETSC_USE_INFO)
802         } else {
803           ct++;
804 #endif
805         }
806       }
807 #if defined(PETSC_USE_INFO)
808       if (k> max) max = k;
809 #endif
810     }
811   }
812 
813   /* Print Summary */
814 #if defined(PETSC_USE_INFO)
815   for (i=0,j=0; i<ht_size; i++) {
816     if (HT[i]) {j++;}
817   }
818   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
819 #endif
820   PetscFunctionReturn(0);
821 }
822 
823 #undef __FUNCT__
824 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
825 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
826 {
827   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
828   PetscErrorCode ierr;
829   PetscInt       nstash,reallocs;
830   InsertMode     addv;
831 
832   PetscFunctionBegin;
833   if (baij->donotstash || mat->nooffprocentries) {
834     PetscFunctionReturn(0);
835   }
836 
837   /* make sure all processors are either in INSERTMODE or ADDMODE */
838   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
839   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
840   mat->insertmode = addv; /* in case this processor had no cache */
841 
842   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
843   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
844   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
845   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
846   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
847   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
848   PetscFunctionReturn(0);
849 }
850 
851 #undef __FUNCT__
852 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
853 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
854 {
855   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
856   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
857   PetscErrorCode ierr;
858   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
859   PetscInt       *row,*col;
860   PetscTruth     r1,r2,r3,other_disassembled;
861   MatScalar      *val;
862   InsertMode     addv = mat->insertmode;
863   PetscMPIInt    n;
864 
865   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
866   PetscFunctionBegin;
867   if (!baij->donotstash && !mat->nooffprocentries) {
868     while (1) {
869       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
870       if (!flg) break;
871 
872       for (i=0; i<n;) {
873         /* Now identify the consecutive vals belonging to the same row */
874         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
875         if (j < n) ncols = j-i;
876         else       ncols = n-i;
877         /* Now assemble all these values with a single function call */
878         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
879         i = j;
880       }
881     }
882     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
883     /* Now process the block-stash. Since the values are stashed column-oriented,
884        set the roworiented flag to column oriented, and after MatSetValues()
885        restore the original flags */
886     r1 = baij->roworiented;
887     r2 = a->roworiented;
888     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
889     baij->roworiented = PETSC_FALSE;
890     a->roworiented    = PETSC_FALSE;
891     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
892     while (1) {
893       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
894       if (!flg) break;
895 
896       for (i=0; i<n;) {
897         /* Now identify the consecutive vals belonging to the same row */
898         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
899         if (j < n) ncols = j-i;
900         else       ncols = n-i;
901         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
902         i = j;
903       }
904     }
905     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
906     baij->roworiented = r1;
907     a->roworiented    = r2;
908     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
909   }
910 
911   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
912   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
913 
914   /* determine if any processor has disassembled, if so we must
915      also disassemble ourselfs, in order that we may reassemble. */
916   /*
917      if nonzero structure of submatrix B cannot change then we know that
918      no processor disassembled thus we can skip this stuff
919   */
920   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
921     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
922     if (mat->was_assembled && !other_disassembled) {
923       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
924     }
925   }
926 
927   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
928     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
929   }
930   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
931   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
932   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
933 
934 #if defined(PETSC_USE_INFO)
935   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
936     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
937     baij->ht_total_ct  = 0;
938     baij->ht_insert_ct = 0;
939   }
940 #endif
941   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
942     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
943     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
944     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
945   }
946 
947   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
948   baij->rowvalues = 0;
949   PetscFunctionReturn(0);
950 }
951 
952 #undef __FUNCT__
953 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
954 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
955 {
956   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
957   PetscErrorCode    ierr;
958   PetscMPIInt       size = baij->size,rank = baij->rank;
959   PetscInt          bs = mat->rmap->bs;
960   PetscTruth        iascii,isdraw;
961   PetscViewer       sviewer;
962   PetscViewerFormat format;
963 
964   PetscFunctionBegin;
965   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
966   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
967   if (iascii) {
968     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
969     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
970       MatInfo info;
971       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
972       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
973       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
974              rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
975       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
976       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
977       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
978       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
979       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
980       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
981       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
982       PetscFunctionReturn(0);
983     } else if (format == PETSC_VIEWER_ASCII_INFO) {
984       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
985       PetscFunctionReturn(0);
986     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
987       PetscFunctionReturn(0);
988     }
989   }
990 
991   if (isdraw) {
992     PetscDraw       draw;
993     PetscTruth isnull;
994     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
995     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
996   }
997 
998   if (size == 1) {
999     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1000     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1001   } else {
1002     /* assemble the entire matrix onto first processor. */
1003     Mat         A;
1004     Mat_SeqBAIJ *Aloc;
1005     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1006     MatScalar   *a;
1007 
1008     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1009     /* Perhaps this should be the type of mat? */
1010     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1011     if (!rank) {
1012       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1013     } else {
1014       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1015     }
1016     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1017     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1018     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1019 
1020     /* copy over the A part */
1021     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1022     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1023     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1024 
1025     for (i=0; i<mbs; i++) {
1026       rvals[0] = bs*(baij->rstartbs + i);
1027       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1028       for (j=ai[i]; j<ai[i+1]; j++) {
1029         col = (baij->cstartbs+aj[j])*bs;
1030         for (k=0; k<bs; k++) {
1031           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1032           col++; a += bs;
1033         }
1034       }
1035     }
1036     /* copy over the B part */
1037     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1038     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1039     for (i=0; i<mbs; i++) {
1040       rvals[0] = bs*(baij->rstartbs + i);
1041       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1042       for (j=ai[i]; j<ai[i+1]; j++) {
1043         col = baij->garray[aj[j]]*bs;
1044         for (k=0; k<bs; k++) {
1045           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1046           col++; a += bs;
1047         }
1048       }
1049     }
1050     ierr = PetscFree(rvals);CHKERRQ(ierr);
1051     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1052     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1053     /*
1054        Everyone has to call to draw the matrix since the graphics waits are
1055        synchronized across all processors that share the PetscDraw object
1056     */
1057     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1058     if (!rank) {
1059       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1060       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1061     }
1062     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1063     ierr = MatDestroy(A);CHKERRQ(ierr);
1064   }
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 #undef __FUNCT__
1069 #define __FUNCT__ "MatView_MPIBAIJ"
1070 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1071 {
1072   PetscErrorCode ierr;
1073   PetscTruth     iascii,isdraw,issocket,isbinary;
1074 
1075   PetscFunctionBegin;
1076   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1077   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1078   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1079   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1080   if (iascii || isdraw || issocket || isbinary) {
1081     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1082   } else {
1083     SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1084   }
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 #undef __FUNCT__
1089 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1090 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1091 {
1092   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1093   PetscErrorCode ierr;
1094 
1095   PetscFunctionBegin;
1096 #if defined(PETSC_USE_LOG)
1097   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1098 #endif
1099   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1100   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1101   if (baij->A){ierr = MatDestroy(baij->A);CHKERRQ(ierr);}
1102   if (baij->B){ierr = MatDestroy(baij->B);CHKERRQ(ierr);}
1103 #if defined (PETSC_USE_CTABLE)
1104   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
1105 #else
1106   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1107 #endif
1108   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1109   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1110   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1111   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1112   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1113   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1114   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1115   ierr = PetscFree(baij);CHKERRQ(ierr);
1116 
1117   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1118   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1119   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1120   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1121   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1122   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1123   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1124   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1125   PetscFunctionReturn(0);
1126 }
1127 
1128 #undef __FUNCT__
1129 #define __FUNCT__ "MatMult_MPIBAIJ"
1130 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1131 {
1132   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1133   PetscErrorCode ierr;
1134   PetscInt       nt;
1135 
1136   PetscFunctionBegin;
1137   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1138   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1139   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1140   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1141   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1142   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1143   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1144   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1145   PetscFunctionReturn(0);
1146 }
1147 
1148 #undef __FUNCT__
1149 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1150 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1151 {
1152   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1153   PetscErrorCode ierr;
1154 
1155   PetscFunctionBegin;
1156   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1157   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1158   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1159   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1160   PetscFunctionReturn(0);
1161 }
1162 
1163 #undef __FUNCT__
1164 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1165 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1166 {
1167   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1168   PetscErrorCode ierr;
1169   PetscTruth     merged;
1170 
1171   PetscFunctionBegin;
1172   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1173   /* do nondiagonal part */
1174   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1175   if (!merged) {
1176     /* send it on its way */
1177     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1178     /* do local part */
1179     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1180     /* receive remote parts: note this assumes the values are not actually */
1181     /* inserted in yy until the next line */
1182     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1183   } else {
1184     /* do local part */
1185     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1186     /* send it on its way */
1187     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1188     /* values actually were received in the Begin() but we need to call this nop */
1189     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1190   }
1191   PetscFunctionReturn(0);
1192 }
1193 
1194 #undef __FUNCT__
1195 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1196 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1197 {
1198   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1199   PetscErrorCode ierr;
1200 
1201   PetscFunctionBegin;
1202   /* do nondiagonal part */
1203   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1204   /* send it on its way */
1205   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1206   /* do local part */
1207   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1208   /* receive remote parts: note this assumes the values are not actually */
1209   /* inserted in yy until the next line, which is true for my implementation*/
1210   /* but is not perhaps always true. */
1211   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1212   PetscFunctionReturn(0);
1213 }
1214 
1215 /*
1216   This only works correctly for square matrices where the subblock A->A is the
1217    diagonal block
1218 */
1219 #undef __FUNCT__
1220 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1221 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1222 {
1223   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1224   PetscErrorCode ierr;
1225 
1226   PetscFunctionBegin;
1227   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1228   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1229   PetscFunctionReturn(0);
1230 }
1231 
1232 #undef __FUNCT__
1233 #define __FUNCT__ "MatScale_MPIBAIJ"
1234 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1235 {
1236   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1237   PetscErrorCode ierr;
1238 
1239   PetscFunctionBegin;
1240   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1241   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1242   PetscFunctionReturn(0);
1243 }
1244 
1245 #undef __FUNCT__
1246 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1247 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1248 {
1249   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1250   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1251   PetscErrorCode ierr;
1252   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1253   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1254   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1255 
1256   PetscFunctionBegin;
1257   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1258   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1259   mat->getrowactive = PETSC_TRUE;
1260 
1261   if (!mat->rowvalues && (idx || v)) {
1262     /*
1263         allocate enough space to hold information from the longest row.
1264     */
1265     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1266     PetscInt     max = 1,mbs = mat->mbs,tmp;
1267     for (i=0; i<mbs; i++) {
1268       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1269       if (max < tmp) { max = tmp; }
1270     }
1271     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1272   }
1273   lrow = row - brstart;
1274 
1275   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1276   if (!v)   {pvA = 0; pvB = 0;}
1277   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1278   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1279   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1280   nztot = nzA + nzB;
1281 
1282   cmap  = mat->garray;
1283   if (v  || idx) {
1284     if (nztot) {
1285       /* Sort by increasing column numbers, assuming A and B already sorted */
1286       PetscInt imark = -1;
1287       if (v) {
1288         *v = v_p = mat->rowvalues;
1289         for (i=0; i<nzB; i++) {
1290           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1291           else break;
1292         }
1293         imark = i;
1294         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1295         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1296       }
1297       if (idx) {
1298         *idx = idx_p = mat->rowindices;
1299         if (imark > -1) {
1300           for (i=0; i<imark; i++) {
1301             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1302           }
1303         } else {
1304           for (i=0; i<nzB; i++) {
1305             if (cmap[cworkB[i]/bs] < cstart)
1306               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1307             else break;
1308           }
1309           imark = i;
1310         }
1311         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1312         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1313       }
1314     } else {
1315       if (idx) *idx = 0;
1316       if (v)   *v   = 0;
1317     }
1318   }
1319   *nz = nztot;
1320   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1321   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1322   PetscFunctionReturn(0);
1323 }
1324 
1325 #undef __FUNCT__
1326 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1327 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1328 {
1329   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1330 
1331   PetscFunctionBegin;
1332   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1333   baij->getrowactive = PETSC_FALSE;
1334   PetscFunctionReturn(0);
1335 }
1336 
1337 #undef __FUNCT__
1338 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1339 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1340 {
1341   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1342   PetscErrorCode ierr;
1343 
1344   PetscFunctionBegin;
1345   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1346   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1347   PetscFunctionReturn(0);
1348 }
1349 
1350 #undef __FUNCT__
1351 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1352 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1353 {
1354   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1355   Mat            A = a->A,B = a->B;
1356   PetscErrorCode ierr;
1357   PetscReal      isend[5],irecv[5];
1358 
1359   PetscFunctionBegin;
1360   info->block_size     = (PetscReal)matin->rmap->bs;
1361   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1362   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1363   isend[3] = info->memory;  isend[4] = info->mallocs;
1364   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1365   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1366   isend[3] += info->memory;  isend[4] += info->mallocs;
1367   if (flag == MAT_LOCAL) {
1368     info->nz_used      = isend[0];
1369     info->nz_allocated = isend[1];
1370     info->nz_unneeded  = isend[2];
1371     info->memory       = isend[3];
1372     info->mallocs      = isend[4];
1373   } else if (flag == MAT_GLOBAL_MAX) {
1374     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1375     info->nz_used      = irecv[0];
1376     info->nz_allocated = irecv[1];
1377     info->nz_unneeded  = irecv[2];
1378     info->memory       = irecv[3];
1379     info->mallocs      = irecv[4];
1380   } else if (flag == MAT_GLOBAL_SUM) {
1381     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1382     info->nz_used      = irecv[0];
1383     info->nz_allocated = irecv[1];
1384     info->nz_unneeded  = irecv[2];
1385     info->memory       = irecv[3];
1386     info->mallocs      = irecv[4];
1387   } else {
1388     SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1389   }
1390   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1391   info->fill_ratio_needed = 0;
1392   info->factor_mallocs    = 0;
1393   PetscFunctionReturn(0);
1394 }
1395 
1396 #undef __FUNCT__
1397 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1398 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscTruth flg)
1399 {
1400   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1401   PetscErrorCode ierr;
1402 
1403   PetscFunctionBegin;
1404   switch (op) {
1405   case MAT_NEW_NONZERO_LOCATIONS:
1406   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1407   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1408   case MAT_KEEP_NONZERO_PATTERN:
1409   case MAT_NEW_NONZERO_LOCATION_ERR:
1410     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1411     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1412     break;
1413   case MAT_ROW_ORIENTED:
1414     a->roworiented = flg;
1415     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1416     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1417     break;
1418   case MAT_NEW_DIAGONALS:
1419     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1420     break;
1421   case MAT_IGNORE_OFF_PROC_ENTRIES:
1422     a->donotstash = flg;
1423     break;
1424   case MAT_USE_HASH_TABLE:
1425     a->ht_flag = flg;
1426     break;
1427   case MAT_SYMMETRIC:
1428   case MAT_STRUCTURALLY_SYMMETRIC:
1429   case MAT_HERMITIAN:
1430   case MAT_SYMMETRY_ETERNAL:
1431     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1432     break;
1433   default:
1434     SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op);
1435   }
1436   PetscFunctionReturn(0);
1437 }
1438 
1439 #undef __FUNCT__
1440 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1441 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1442 {
1443   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1444   Mat_SeqBAIJ    *Aloc;
1445   Mat            B;
1446   PetscErrorCode ierr;
1447   PetscInt       M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1448   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1449   MatScalar      *a;
1450 
1451   PetscFunctionBegin;
1452   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1453   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1454     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1455     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1456     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1457     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1458   } else {
1459     B = *matout;
1460   }
1461 
1462   /* copy over the A part */
1463   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1464   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1465   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1466 
1467   for (i=0; i<mbs; i++) {
1468     rvals[0] = bs*(baij->rstartbs + i);
1469     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1470     for (j=ai[i]; j<ai[i+1]; j++) {
1471       col = (baij->cstartbs+aj[j])*bs;
1472       for (k=0; k<bs; k++) {
1473         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1474         col++; a += bs;
1475       }
1476     }
1477   }
1478   /* copy over the B part */
1479   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1480   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1481   for (i=0; i<mbs; i++) {
1482     rvals[0] = bs*(baij->rstartbs + i);
1483     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1484     for (j=ai[i]; j<ai[i+1]; j++) {
1485       col = baij->garray[aj[j]]*bs;
1486       for (k=0; k<bs; k++) {
1487         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1488         col++; a += bs;
1489       }
1490     }
1491   }
1492   ierr = PetscFree(rvals);CHKERRQ(ierr);
1493   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1494   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1495 
1496   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1497     *matout = B;
1498   } else {
1499     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1500   }
1501   PetscFunctionReturn(0);
1502 }
1503 
1504 #undef __FUNCT__
1505 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1506 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1507 {
1508   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1509   Mat            a = baij->A,b = baij->B;
1510   PetscErrorCode ierr;
1511   PetscInt       s1,s2,s3;
1512 
1513   PetscFunctionBegin;
1514   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1515   if (rr) {
1516     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1517     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1518     /* Overlap communication with computation. */
1519     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1520   }
1521   if (ll) {
1522     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1523     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1524     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1525   }
1526   /* scale  the diagonal block */
1527   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1528 
1529   if (rr) {
1530     /* Do a scatter end and then right scale the off-diagonal block */
1531     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1532     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1533   }
1534 
1535   PetscFunctionReturn(0);
1536 }
1537 
1538 #undef __FUNCT__
1539 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1540 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
1541 {
1542   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1543   PetscErrorCode ierr;
1544   PetscMPIInt    imdex,size = l->size,n,rank = l->rank;
1545   PetscInt       i,*owners = A->rmap->range;
1546   PetscInt       *nprocs,j,idx,nsends,row;
1547   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
1548   PetscInt       *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1549   PetscInt       *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1550   MPI_Comm       comm = ((PetscObject)A)->comm;
1551   MPI_Request    *send_waits,*recv_waits;
1552   MPI_Status     recv_status,*send_status;
1553 #if defined(PETSC_DEBUG)
1554   PetscTruth     found = PETSC_FALSE;
1555 #endif
1556 
1557   PetscFunctionBegin;
1558   /*  first count number of contributors to each processor */
1559   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1560   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1561   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1562   j = 0;
1563   for (i=0; i<N; i++) {
1564     if (lastidx > (idx = rows[i])) j = 0;
1565     lastidx = idx;
1566     for (; j<size; j++) {
1567       if (idx >= owners[j] && idx < owners[j+1]) {
1568         nprocs[2*j]++;
1569         nprocs[2*j+1] = 1;
1570         owner[i] = j;
1571 #if defined(PETSC_DEBUG)
1572         found = PETSC_TRUE;
1573 #endif
1574         break;
1575       }
1576     }
1577 #if defined(PETSC_DEBUG)
1578     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1579     found = PETSC_FALSE;
1580 #endif
1581   }
1582   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1583 
1584   if (A->nooffproczerorows) {
1585     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
1586     nrecvs = nsends;
1587     nmax   = N;
1588   } else {
1589     /* inform other processors of number of messages and max length*/
1590     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1591   }
1592 
1593   /* post receives:   */
1594   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1595   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1596   for (i=0; i<nrecvs; i++) {
1597     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1598   }
1599 
1600   /* do sends:
1601      1) starts[i] gives the starting index in svalues for stuff going to
1602      the ith processor
1603   */
1604   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1605   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1606   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1607   starts[0]  = 0;
1608   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1609   for (i=0; i<N; i++) {
1610     svalues[starts[owner[i]]++] = rows[i];
1611   }
1612 
1613   starts[0] = 0;
1614   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1615   count = 0;
1616   for (i=0; i<size; i++) {
1617     if (nprocs[2*i+1]) {
1618       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1619     }
1620   }
1621   ierr = PetscFree(starts);CHKERRQ(ierr);
1622 
1623   base = owners[rank];
1624 
1625   /*  wait on receives */
1626   ierr   = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr);
1627   count  = nrecvs;
1628   slen = 0;
1629   while (count) {
1630     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1631     /* unpack receives into our local space */
1632     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1633     source[imdex]  = recv_status.MPI_SOURCE;
1634     lens[imdex]    = n;
1635     slen          += n;
1636     count--;
1637   }
1638   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1639 
1640   /* move the data into the send scatter */
1641   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1642   count = 0;
1643   for (i=0; i<nrecvs; i++) {
1644     values = rvalues + i*nmax;
1645     for (j=0; j<lens[i]; j++) {
1646       lrows[count++] = values[j] - base;
1647     }
1648   }
1649   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1650   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1651   ierr = PetscFree(owner);CHKERRQ(ierr);
1652   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1653 
1654   /* actually zap the local rows */
1655   /*
1656         Zero the required rows. If the "diagonal block" of the matrix
1657      is square and the user wishes to set the diagonal we use separate
1658      code so that MatSetValues() is not called for each diagonal allocating
1659      new memory, thus calling lots of mallocs and slowing things down.
1660 
1661   */
1662   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1663   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr);
1664   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1665     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr);
1666   } else if (diag != 0.0) {
1667     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1668     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1669        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1670     for (i=0; i<slen; i++) {
1671       row  = lrows[i] + rstart_bs;
1672       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1673     }
1674     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1675     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1676   } else {
1677     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1678   }
1679 
1680   ierr = PetscFree(lrows);CHKERRQ(ierr);
1681 
1682   /* wait on sends */
1683   if (nsends) {
1684     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1685     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1686     ierr = PetscFree(send_status);CHKERRQ(ierr);
1687   }
1688   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1689   ierr = PetscFree(svalues);CHKERRQ(ierr);
1690 
1691   PetscFunctionReturn(0);
1692 }
1693 
1694 #undef __FUNCT__
1695 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1696 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1697 {
1698   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1699   PetscErrorCode ierr;
1700 
1701   PetscFunctionBegin;
1702   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1703   PetscFunctionReturn(0);
1704 }
1705 
1706 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1707 
1708 #undef __FUNCT__
1709 #define __FUNCT__ "MatEqual_MPIBAIJ"
1710 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1711 {
1712   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1713   Mat            a,b,c,d;
1714   PetscTruth     flg;
1715   PetscErrorCode ierr;
1716 
1717   PetscFunctionBegin;
1718   a = matA->A; b = matA->B;
1719   c = matB->A; d = matB->B;
1720 
1721   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1722   if (flg) {
1723     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1724   }
1725   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1726   PetscFunctionReturn(0);
1727 }
1728 
1729 #undef __FUNCT__
1730 #define __FUNCT__ "MatCopy_MPIBAIJ"
1731 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1732 {
1733   PetscErrorCode ierr;
1734   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1735   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1736 
1737   PetscFunctionBegin;
1738   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1739   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1740     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1741   } else {
1742     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1743     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1744   }
1745   PetscFunctionReturn(0);
1746 }
1747 
1748 #undef __FUNCT__
1749 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1750 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1751 {
1752   PetscErrorCode ierr;
1753 
1754   PetscFunctionBegin;
1755   ierr =  MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1756   PetscFunctionReturn(0);
1757 }
1758 
1759 #undef __FUNCT__
1760 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1761 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1762 {
1763   PetscErrorCode ierr;
1764   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1765   PetscBLASInt   bnz,one=1;
1766   Mat_SeqBAIJ    *x,*y;
1767 
1768   PetscFunctionBegin;
1769   if (str == SAME_NONZERO_PATTERN) {
1770     PetscScalar alpha = a;
1771     x = (Mat_SeqBAIJ *)xx->A->data;
1772     y = (Mat_SeqBAIJ *)yy->A->data;
1773     bnz = PetscBLASIntCast(x->nz);
1774     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1775     x = (Mat_SeqBAIJ *)xx->B->data;
1776     y = (Mat_SeqBAIJ *)yy->B->data;
1777     bnz = PetscBLASIntCast(x->nz);
1778     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1779   } else {
1780     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1781   }
1782   PetscFunctionReturn(0);
1783 }
1784 
1785 #undef __FUNCT__
1786 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ"
1787 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs)
1788 {
1789   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1790   PetscInt rbs,cbs;
1791   PetscErrorCode ierr;
1792 
1793   PetscFunctionBegin;
1794   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1795   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1796   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
1797   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
1798   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
1799   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
1800   PetscFunctionReturn(0);
1801 }
1802 
1803 #undef __FUNCT__
1804 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1805 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1806 {
1807   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1808   PetscErrorCode ierr;
1809 
1810   PetscFunctionBegin;
1811   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1812   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1813   PetscFunctionReturn(0);
1814 }
1815 
1816 #undef __FUNCT__
1817 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
1818 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1819 {
1820   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1821   PetscErrorCode ierr;
1822 
1823   PetscFunctionBegin;
1824   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1825   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1826   PetscFunctionReturn(0);
1827 }
1828 
1829 #undef __FUNCT__
1830 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
1831 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1832 {
1833   PetscErrorCode ierr;
1834   IS             iscol_local;
1835   PetscInt       csize;
1836 
1837   PetscFunctionBegin;
1838   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1839   if (call == MAT_REUSE_MATRIX) {
1840     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1841     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1842   } else {
1843     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1844   }
1845   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1846   if (call == MAT_INITIAL_MATRIX) {
1847     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1848     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
1849   }
1850   PetscFunctionReturn(0);
1851 }
1852 
1853 #undef __FUNCT__
1854 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
1855 /*
1856     Not great since it makes two copies of the submatrix, first an SeqBAIJ
1857   in local and then by concatenating the local matrices the end result.
1858   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
1859 */
1860 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
1861 {
1862   PetscErrorCode ierr;
1863   PetscMPIInt    rank,size;
1864   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
1865   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
1866   Mat            *local,M,Mreuse;
1867   MatScalar      *vwork,*aa;
1868   MPI_Comm       comm = ((PetscObject)mat)->comm;
1869   Mat_SeqBAIJ    *aij;
1870 
1871 
1872   PetscFunctionBegin;
1873   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1874   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1875 
1876   if (call ==  MAT_REUSE_MATRIX) {
1877     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
1878     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1879     local = &Mreuse;
1880     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
1881   } else {
1882     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
1883     Mreuse = *local;
1884     ierr   = PetscFree(local);CHKERRQ(ierr);
1885   }
1886 
1887   /*
1888       m - number of local rows
1889       n - number of columns (same on all processors)
1890       rstart - first row in new global matrix generated
1891   */
1892   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
1893   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1894   m    = m/bs;
1895   n    = n/bs;
1896 
1897   if (call == MAT_INITIAL_MATRIX) {
1898     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
1899     ii  = aij->i;
1900     jj  = aij->j;
1901 
1902     /*
1903         Determine the number of non-zeros in the diagonal and off-diagonal
1904         portions of the matrix in order to do correct preallocation
1905     */
1906 
1907     /* first get start and end of "diagonal" columns */
1908     if (csize == PETSC_DECIDE) {
1909       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
1910       if (mglobal == n*bs) { /* square matrix */
1911 	nlocal = m;
1912       } else {
1913         nlocal = n/size + ((n % size) > rank);
1914       }
1915     } else {
1916       nlocal = csize/bs;
1917     }
1918     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1919     rstart = rend - nlocal;
1920     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
1921 
1922     /* next, compute all the lengths */
1923     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
1924     olens = dlens + m;
1925     for (i=0; i<m; i++) {
1926       jend = ii[i+1] - ii[i];
1927       olen = 0;
1928       dlen = 0;
1929       for (j=0; j<jend; j++) {
1930         if (*jj < rstart || *jj >= rend) olen++;
1931         else dlen++;
1932         jj++;
1933       }
1934       olens[i] = olen;
1935       dlens[i] = dlen;
1936     }
1937     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
1938     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
1939     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
1940     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
1941     ierr = PetscFree(dlens);CHKERRQ(ierr);
1942   } else {
1943     PetscInt ml,nl;
1944 
1945     M = *newmat;
1946     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
1947     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
1948     ierr = MatZeroEntries(M);CHKERRQ(ierr);
1949     /*
1950          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
1951        rather than the slower MatSetValues().
1952     */
1953     M->was_assembled = PETSC_TRUE;
1954     M->assembled     = PETSC_FALSE;
1955   }
1956   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1957   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
1958   aij = (Mat_SeqBAIJ*)(Mreuse)->data;
1959   ii  = aij->i;
1960   jj  = aij->j;
1961   aa  = aij->a;
1962   for (i=0; i<m; i++) {
1963     row   = rstart/bs + i;
1964     nz    = ii[i+1] - ii[i];
1965     cwork = jj;     jj += nz;
1966     vwork = aa;     aa += nz;
1967     ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
1968   }
1969 
1970   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1971   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1972   *newmat = M;
1973 
1974   /* save submatrix used in processor for next request */
1975   if (call ==  MAT_INITIAL_MATRIX) {
1976     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
1977     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
1978   }
1979 
1980   PetscFunctionReturn(0);
1981 }
1982 
1983 #undef __FUNCT__
1984 #define __FUNCT__ "MatPermute_MPIBAIJ"
1985 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
1986 {
1987   MPI_Comm       comm,pcomm;
1988   PetscInt       first,local_size,nrows;
1989   const PetscInt *rows;
1990   PetscMPIInt    size;
1991   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1992   PetscErrorCode ierr;
1993 
1994   PetscFunctionBegin;
1995   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1996   /* make a collective version of 'rowp' */
1997   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1998   if (pcomm==comm) {
1999     crowp = rowp;
2000   } else {
2001     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2002     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2003     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
2004     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2005   }
2006   /* collect the global row permutation and invert it */
2007   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
2008   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
2009   if (pcomm!=comm) {
2010     ierr = ISDestroy(crowp);CHKERRQ(ierr);
2011   }
2012   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
2013   /* get the local target indices */
2014   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
2015   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
2016   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
2017   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
2018   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
2019   ierr = ISDestroy(irowp);CHKERRQ(ierr);
2020   /* the column permutation is so much easier;
2021      make a local version of 'colp' and invert it */
2022   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2023   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2024   if (size==1) {
2025     lcolp = colp;
2026   } else {
2027     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
2028     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
2029     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
2030   }
2031   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2032   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
2033   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
2034   if (size>1) {
2035     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
2036     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
2037   }
2038   /* now we just get the submatrix */
2039   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2040   /* clean up */
2041   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
2042   ierr = ISDestroy(icolp);CHKERRQ(ierr);
2043   PetscFunctionReturn(0);
2044 }
2045 
2046 #undef __FUNCT__
2047 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2048 PetscErrorCode PETSCMAT_DLLEXPORT MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2049 {
2050   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*) mat->data;
2051   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
2052 
2053   PetscFunctionBegin;
2054   if (nghosts) { *nghosts = B->nbs;}
2055   if (ghosts) {*ghosts = baij->garray;}
2056   PetscFunctionReturn(0);
2057 }
2058 
2059 EXTERN PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat);
2060 
2061 #undef __FUNCT__
2062 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ"
2063 /*
2064     This routine is almost identical to MatFDColoringCreate_MPIBAIJ()!
2065 */
2066 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
2067 {
2068   Mat_MPIBAIJ            *baij = (Mat_MPIBAIJ*)mat->data;
2069   PetscErrorCode        ierr;
2070   PetscMPIInt           size,*ncolsonproc,*disp,nn;
2071   PetscInt              bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col;
2072   const PetscInt        *is;
2073   PetscInt              nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj;
2074   PetscInt              *rowhit,M,cstart,cend,colb;
2075   PetscInt              *columnsforrow,l;
2076   IS                    *isa;
2077   PetscTruth             done,flg;
2078   ISLocalToGlobalMapping map = mat->bmapping;
2079   PetscInt               *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype;
2080 
2081   PetscFunctionBegin;
2082   if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();");
2083   if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock");
2084 
2085   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
2086   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2087   M                = mat->rmap->n/bs;
2088   cstart           = mat->cmap->rstart/bs;
2089   cend             = mat->cmap->rend/bs;
2090   c->M             = mat->rmap->N/bs;  /* set the global rows and columns and local rows */
2091   c->N             = mat->cmap->N/bs;
2092   c->m             = mat->rmap->n/bs;
2093   c->rstart        = mat->rmap->rstart/bs;
2094 
2095   c->ncolors       = nis;
2096   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
2097   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
2098   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
2099   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
2100   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
2101   ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
2102 
2103   /* Allow access to data structures of local part of matrix */
2104   if (!baij->colmap) {
2105     ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
2106   }
2107   ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2108   ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2109 
2110   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
2111   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
2112 
2113   for (i=0; i<nis; i++) {
2114     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
2115     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
2116     c->ncolumns[i] = n;
2117     if (n) {
2118       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
2119       ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr);
2120       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
2121     } else {
2122       c->columns[i]  = 0;
2123     }
2124 
2125     if (ctype == IS_COLORING_GLOBAL){
2126       /* Determine the total (parallel) number of columns of this color */
2127       ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
2128       ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr);
2129 
2130       nn   = PetscMPIIntCast(n);
2131       ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2132       nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];}
2133       if (!nctot) {
2134         ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
2135       }
2136 
2137       disp[0] = 0;
2138       for (j=1; j<size; j++) {
2139         disp[j] = disp[j-1] + ncolsonproc[j-1];
2140       }
2141 
2142       /* Get complete list of columns for color on each processor */
2143       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2144       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2145       ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr);
2146     } else if (ctype == IS_COLORING_GHOSTED){
2147       /* Determine local number of columns of this color on this process, including ghost points */
2148       nctot = n;
2149       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2150       ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
2151     } else {
2152       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
2153     }
2154 
2155     /*
2156        Mark all rows affect by these columns
2157     */
2158     /* Temporary option to allow for debugging/testing */
2159     flg  = PETSC_FALSE;
2160     ierr = PetscOptionsGetTruth(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr);
2161     if (!flg) {/*-----------------------------------------------------------------------------*/
2162       /* crude, fast version */
2163       ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
2164       /* loop over columns*/
2165       for (j=0; j<nctot; j++) {
2166         if (ctype == IS_COLORING_GHOSTED) {
2167           col = ltog[cols[j]];
2168         } else {
2169           col  = cols[j];
2170         }
2171         if (col >= cstart && col < cend) {
2172           /* column is in diagonal block of matrix */
2173           rows = A_cj + A_ci[col-cstart];
2174           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2175         } else {
2176 #if defined (PETSC_USE_CTABLE)
2177           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2178 	  colb --;
2179 #else
2180           colb = baij->colmap[col] - 1;
2181 #endif
2182           if (colb == -1) {
2183             m = 0;
2184           } else {
2185             colb = colb/bs;
2186             rows = B_cj + B_ci[colb];
2187             m    = B_ci[colb+1] - B_ci[colb];
2188           }
2189         }
2190         /* loop over columns marking them in rowhit */
2191         for (k=0; k<m; k++) {
2192           rowhit[*rows++] = col + 1;
2193         }
2194       }
2195 
2196       /* count the number of hits */
2197       nrows = 0;
2198       for (j=0; j<M; j++) {
2199         if (rowhit[j]) nrows++;
2200       }
2201       c->nrows[i]         = nrows;
2202       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2203       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2204       ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2205       nrows = 0;
2206       for (j=0; j<M; j++) {
2207         if (rowhit[j]) {
2208           c->rows[i][nrows]           = j;
2209           c->columnsforrow[i][nrows] = rowhit[j] - 1;
2210           nrows++;
2211         }
2212       }
2213     } else {/*-------------------------------------------------------------------------------*/
2214       /* slow version, using rowhit as a linked list */
2215       PetscInt currentcol,fm,mfm;
2216       rowhit[M] = M;
2217       nrows     = 0;
2218       /* loop over columns*/
2219       for (j=0; j<nctot; j++) {
2220         if (ctype == IS_COLORING_GHOSTED) {
2221           col = ltog[cols[j]];
2222         } else {
2223           col  = cols[j];
2224         }
2225         if (col >= cstart && col < cend) {
2226           /* column is in diagonal block of matrix */
2227           rows = A_cj + A_ci[col-cstart];
2228           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2229         } else {
2230 #if defined (PETSC_USE_CTABLE)
2231           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2232           colb --;
2233 #else
2234           colb = baij->colmap[col] - 1;
2235 #endif
2236           if (colb == -1) {
2237             m = 0;
2238           } else {
2239             colb = colb/bs;
2240             rows = B_cj + B_ci[colb];
2241             m    = B_ci[colb+1] - B_ci[colb];
2242           }
2243         }
2244 
2245         /* loop over columns marking them in rowhit */
2246         fm    = M; /* fm points to first entry in linked list */
2247         for (k=0; k<m; k++) {
2248           currentcol = *rows++;
2249 	  /* is it already in the list? */
2250           do {
2251             mfm  = fm;
2252             fm   = rowhit[fm];
2253           } while (fm < currentcol);
2254           /* not in list so add it */
2255           if (fm != currentcol) {
2256             nrows++;
2257             columnsforrow[currentcol] = col;
2258             /* next three lines insert new entry into linked list */
2259             rowhit[mfm]               = currentcol;
2260             rowhit[currentcol]        = fm;
2261             fm                        = currentcol;
2262             /* fm points to present position in list since we know the columns are sorted */
2263           } else {
2264             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
2265           }
2266         }
2267       }
2268       c->nrows[i]         = nrows;
2269       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2270       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2271       ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2272       /* now store the linked list of rows into c->rows[i] */
2273       nrows = 0;
2274       fm    = rowhit[M];
2275       do {
2276         c->rows[i][nrows]            = fm;
2277         c->columnsforrow[i][nrows++] = columnsforrow[fm];
2278         fm                           = rowhit[fm];
2279       } while (fm < M);
2280     } /* ---------------------------------------------------------------------------------------*/
2281     ierr = PetscFree(cols);CHKERRQ(ierr);
2282   }
2283 
2284   /* Optimize by adding the vscale, and scaleforrow[][] fields */
2285   /*
2286        vscale will contain the "diagonal" on processor scalings followed by the off processor
2287   */
2288   if (ctype == IS_COLORING_GLOBAL) {
2289     PetscInt *garray;
2290     ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2291     for (i=0; i<baij->B->cmap->n/bs; i++) {
2292       for (j=0; j<bs; j++) {
2293         garray[i*bs+j] = bs*baij->garray[i]+j;
2294       }
2295     }
2296     ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr);
2297     ierr = PetscFree(garray);CHKERRQ(ierr);
2298     CHKMEMQ;
2299     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2300     for (k=0; k<c->ncolors; k++) {
2301       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2302       for (l=0; l<c->nrows[k]; l++) {
2303         col = c->columnsforrow[k][l];
2304         if (col >= cstart && col < cend) {
2305           /* column is in diagonal block of matrix */
2306           colb = col - cstart;
2307         } else {
2308           /* column  is in "off-processor" part */
2309 #if defined (PETSC_USE_CTABLE)
2310           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2311           colb --;
2312 #else
2313           colb = baij->colmap[col] - 1;
2314 #endif
2315           colb = colb/bs;
2316           colb += cend - cstart;
2317         }
2318         c->vscaleforrow[k][l] = colb;
2319       }
2320     }
2321   } else if (ctype == IS_COLORING_GHOSTED) {
2322     /* Get gtol mapping */
2323     PetscInt N = mat->cmap->N, *gtol;
2324     ierr = PetscMalloc((N+1)*sizeof(PetscInt),&gtol);CHKERRQ(ierr);
2325     for (i=0; i<N; i++) gtol[i] = -1;
2326     for (i=0; i<map->n; i++) gtol[ltog[i]] = i;
2327 
2328     c->vscale = 0; /* will be created in MatFDColoringApply() */
2329     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2330     for (k=0; k<c->ncolors; k++) {
2331       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2332       for (l=0; l<c->nrows[k]; l++) {
2333         col = c->columnsforrow[k][l];      /* global column index */
2334         c->vscaleforrow[k][l] = gtol[col]; /* local column index */
2335       }
2336     }
2337     ierr = PetscFree(gtol);CHKERRQ(ierr);
2338   }
2339   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
2340 
2341   ierr = PetscFree(rowhit);CHKERRQ(ierr);
2342   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
2343   ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2344   ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2345     CHKMEMQ;
2346   PetscFunctionReturn(0);
2347 }
2348 
2349 #undef __FUNCT__
2350 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ"
2351 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat)
2352 {
2353   Mat            B;
2354   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
2355   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2356   Mat_SeqAIJ     *b;
2357   PetscErrorCode ierr;
2358   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2359   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2360   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2361 
2362   PetscFunctionBegin;
2363   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2364   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2365 
2366   /* ----------------------------------------------------------------
2367      Tell every processor the number of nonzeros per row
2368   */
2369   ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
2370   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2371     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2372   }
2373   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2374   ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr);
2375   displs     = recvcounts + size;
2376   for (i=0; i<size; i++) {
2377     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2378     displs[i]     = A->rmap->range[i]/bs;
2379   }
2380 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2381   ierr  = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2382 #else
2383   ierr  = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2384 #endif
2385   /* ---------------------------------------------------------------
2386      Create the sequential matrix of the same type as the local block diagonal
2387   */
2388   ierr  = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2389   ierr  = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2390   ierr  = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2391   ierr  = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2392   b = (Mat_SeqAIJ *)B->data;
2393 
2394   /*--------------------------------------------------------------------
2395     Copy my part of matrix column indices over
2396   */
2397   sendcount  = ad->nz + bd->nz;
2398   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2399   a_jsendbuf = ad->j;
2400   b_jsendbuf = bd->j;
2401   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2402   cnt        = 0;
2403   for (i=0; i<n; i++) {
2404 
2405     /* put in lower diagonal portion */
2406     m = bd->i[i+1] - bd->i[i];
2407     while (m > 0) {
2408       /* is it above diagonal (in bd (compressed) numbering) */
2409       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2410       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2411       m--;
2412     }
2413 
2414     /* put in diagonal portion */
2415     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2416       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2417     }
2418 
2419     /* put in upper diagonal portion */
2420     while (m-- > 0) {
2421       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2422     }
2423   }
2424   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2425 
2426   /*--------------------------------------------------------------------
2427     Gather all column indices to all processors
2428   */
2429   for (i=0; i<size; i++) {
2430     recvcounts[i] = 0;
2431     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2432       recvcounts[i] += lens[j];
2433     }
2434   }
2435   displs[0]  = 0;
2436   for (i=1; i<size; i++) {
2437     displs[i] = displs[i-1] + recvcounts[i-1];
2438   }
2439 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2440   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2441 #else
2442   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2443 #endif
2444   /*--------------------------------------------------------------------
2445     Assemble the matrix into useable form (note numerical values not yet set)
2446   */
2447   /* set the b->ilen (length of each row) values */
2448   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2449   /* set the b->i indices */
2450   b->i[0] = 0;
2451   for (i=1; i<=A->rmap->N/bs; i++) {
2452     b->i[i] = b->i[i-1] + lens[i-1];
2453   }
2454   ierr = PetscFree(lens);CHKERRQ(ierr);
2455   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2456   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2457   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2458 
2459   if (A->symmetric){
2460     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2461   } else if (A->hermitian) {
2462     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2463   } else if (A->structurally_symmetric) {
2464     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2465   }
2466   *newmat = B;
2467   PetscFunctionReturn(0);
2468 }
2469 
2470 #undef __FUNCT__
2471 #define __FUNCT__ "MatSOR_MPIBAIJ"
2472 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2473 {
2474   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2475   PetscErrorCode ierr;
2476   Vec            bb1 = 0;
2477 
2478   PetscFunctionBegin;
2479   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2480     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2481   }
2482 
2483   if (flag == SOR_APPLY_UPPER) {
2484     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2485     PetscFunctionReturn(0);
2486   }
2487 
2488   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2489     if (flag & SOR_ZERO_INITIAL_GUESS) {
2490       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2491       its--;
2492     }
2493 
2494     while (its--) {
2495       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2496       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2497 
2498       /* update rhs: bb1 = bb - B*x */
2499       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2500       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2501 
2502       /* local sweep */
2503       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2504     }
2505   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
2506     if (flag & SOR_ZERO_INITIAL_GUESS) {
2507       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2508       its--;
2509     }
2510     while (its--) {
2511       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2512       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2513 
2514       /* update rhs: bb1 = bb - B*x */
2515       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2516       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2517 
2518       /* local sweep */
2519       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2520     }
2521   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
2522     if (flag & SOR_ZERO_INITIAL_GUESS) {
2523       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2524       its--;
2525     }
2526     while (its--) {
2527       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2528       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2529 
2530       /* update rhs: bb1 = bb - B*x */
2531       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2532       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2533 
2534       /* local sweep */
2535       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2536     }
2537   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2538 
2539   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
2540   PetscFunctionReturn(0);
2541 }
2542 
2543 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2544 
2545 
2546 /* -------------------------------------------------------------------*/
2547 static struct _MatOps MatOps_Values = {
2548        MatSetValues_MPIBAIJ,
2549        MatGetRow_MPIBAIJ,
2550        MatRestoreRow_MPIBAIJ,
2551        MatMult_MPIBAIJ,
2552 /* 4*/ MatMultAdd_MPIBAIJ,
2553        MatMultTranspose_MPIBAIJ,
2554        MatMultTransposeAdd_MPIBAIJ,
2555        0,
2556        0,
2557        0,
2558 /*10*/ 0,
2559        0,
2560        0,
2561        MatSOR_MPIBAIJ,
2562        MatTranspose_MPIBAIJ,
2563 /*15*/ MatGetInfo_MPIBAIJ,
2564        MatEqual_MPIBAIJ,
2565        MatGetDiagonal_MPIBAIJ,
2566        MatDiagonalScale_MPIBAIJ,
2567        MatNorm_MPIBAIJ,
2568 /*20*/ MatAssemblyBegin_MPIBAIJ,
2569        MatAssemblyEnd_MPIBAIJ,
2570        MatSetOption_MPIBAIJ,
2571        MatZeroEntries_MPIBAIJ,
2572 /*24*/ MatZeroRows_MPIBAIJ,
2573        0,
2574        0,
2575        0,
2576        0,
2577 /*29*/ MatSetUpPreallocation_MPIBAIJ,
2578        0,
2579        0,
2580        0,
2581        0,
2582 /*34*/ MatDuplicate_MPIBAIJ,
2583        0,
2584        0,
2585        0,
2586        0,
2587 /*39*/ MatAXPY_MPIBAIJ,
2588        MatGetSubMatrices_MPIBAIJ,
2589        MatIncreaseOverlap_MPIBAIJ,
2590        MatGetValues_MPIBAIJ,
2591        MatCopy_MPIBAIJ,
2592 /*44*/ 0,
2593        MatScale_MPIBAIJ,
2594        0,
2595        0,
2596        0,
2597 /*49*/ MatSetBlockSize_MPIBAIJ,
2598        0,
2599        0,
2600        0,
2601        0,
2602 /*54*/ MatFDColoringCreate_MPIBAIJ,
2603        0,
2604        MatSetUnfactored_MPIBAIJ,
2605        MatPermute_MPIBAIJ,
2606        MatSetValuesBlocked_MPIBAIJ,
2607 /*59*/ MatGetSubMatrix_MPIBAIJ,
2608        MatDestroy_MPIBAIJ,
2609        MatView_MPIBAIJ,
2610        0,
2611        0,
2612 /*64*/ 0,
2613        0,
2614        0,
2615        0,
2616        0,
2617 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2618        0,
2619        0,
2620        0,
2621        0,
2622 /*74*/ 0,
2623        MatFDColoringApply_BAIJ,
2624        0,
2625        0,
2626        0,
2627 /*79*/ 0,
2628        0,
2629        0,
2630        0,
2631        MatLoad_MPIBAIJ,
2632 /*84*/ 0,
2633        0,
2634        0,
2635        0,
2636        0,
2637 /*89*/ 0,
2638        0,
2639        0,
2640        0,
2641        0,
2642 /*94*/ 0,
2643        0,
2644        0,
2645        0,
2646        0,
2647 /*99*/ 0,
2648        0,
2649        0,
2650        0,
2651        0,
2652 /*104*/0,
2653        MatRealPart_MPIBAIJ,
2654        MatImaginaryPart_MPIBAIJ,
2655        0,
2656        0,
2657 /*109*/0,
2658        0,
2659        0,
2660        0,
2661        0,
2662 /*114*/MatGetSeqNonzerostructure_MPIBAIJ,
2663        0,
2664        MatGetGhosts_MPIBAIJ,
2665        0,
2666        0,
2667 /*119*/0,
2668        0,
2669        0,
2670        0
2671 };
2672 
2673 EXTERN_C_BEGIN
2674 #undef __FUNCT__
2675 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2676 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
2677 {
2678   PetscFunctionBegin;
2679   *a      = ((Mat_MPIBAIJ *)A->data)->A;
2680   *iscopy = PETSC_FALSE;
2681   PetscFunctionReturn(0);
2682 }
2683 EXTERN_C_END
2684 
2685 EXTERN_C_BEGIN
2686 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2687 EXTERN_C_END
2688 
2689 EXTERN_C_BEGIN
2690 #undef __FUNCT__
2691 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2692 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2693 {
2694   PetscInt       m,rstart,cstart,cend;
2695   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2696   const PetscInt *JJ=0;
2697   PetscScalar    *values=0;
2698   PetscErrorCode ierr;
2699 
2700   PetscFunctionBegin;
2701 
2702   if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2703   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2704   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2705   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2706   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2707   m      = B->rmap->n/bs;
2708   rstart = B->rmap->rstart/bs;
2709   cstart = B->cmap->rstart/bs;
2710   cend   = B->cmap->rend/bs;
2711 
2712   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2713   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
2714   for (i=0; i<m; i++) {
2715     nz = ii[i+1] - ii[i];
2716     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2717     nz_max = PetscMax(nz_max,nz);
2718     JJ  = jj + ii[i];
2719     for (j=0; j<nz; j++) {
2720       if (*JJ >= cstart) break;
2721       JJ++;
2722     }
2723     d = 0;
2724     for (; j<nz; j++) {
2725       if (*JJ++ >= cend) break;
2726       d++;
2727     }
2728     d_nnz[i] = d;
2729     o_nnz[i] = nz - d;
2730   }
2731   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2732   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2733 
2734   values = (PetscScalar*)V;
2735   if (!values) {
2736     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2737     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2738   }
2739   for (i=0; i<m; i++) {
2740     PetscInt          row    = i + rstart;
2741     PetscInt          ncols  = ii[i+1] - ii[i];
2742     const PetscInt    *icols = jj + ii[i];
2743     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2744     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2745   }
2746 
2747   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2748   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2749   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2750 
2751   PetscFunctionReturn(0);
2752 }
2753 EXTERN_C_END
2754 
2755 #undef __FUNCT__
2756 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2757 /*@C
2758    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2759    (the default parallel PETSc format).
2760 
2761    Collective on MPI_Comm
2762 
2763    Input Parameters:
2764 +  A - the matrix
2765 .  i - the indices into j for the start of each local row (starts with zero)
2766 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2767 -  v - optional values in the matrix
2768 
2769    Level: developer
2770 
2771 .keywords: matrix, aij, compressed row, sparse, parallel
2772 
2773 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2774 @*/
2775 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2776 {
2777   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
2778 
2779   PetscFunctionBegin;
2780   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2781   if (f) {
2782     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
2783   }
2784   PetscFunctionReturn(0);
2785 }
2786 
2787 EXTERN_C_BEGIN
2788 #undef __FUNCT__
2789 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2790 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2791 {
2792   Mat_MPIBAIJ    *b;
2793   PetscErrorCode ierr;
2794   PetscInt       i, newbs = PetscAbs(bs);
2795 
2796   PetscFunctionBegin;
2797   if (bs < 0) {
2798     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr);
2799       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2800     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2801     bs   = PetscAbs(bs);
2802   }
2803   if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
2804   bs = newbs;
2805 
2806 
2807   if (bs < 1) SETERRQ(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2808   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2809   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2810   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2811   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2812 
2813   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2814   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2815   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2816   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2817 
2818   if (d_nnz) {
2819     for (i=0; i<B->rmap->n/bs; i++) {
2820       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2821     }
2822   }
2823   if (o_nnz) {
2824     for (i=0; i<B->rmap->n/bs; i++) {
2825       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2826     }
2827   }
2828 
2829   b = (Mat_MPIBAIJ*)B->data;
2830   b->bs2 = bs*bs;
2831   b->mbs = B->rmap->n/bs;
2832   b->nbs = B->cmap->n/bs;
2833   b->Mbs = B->rmap->N/bs;
2834   b->Nbs = B->cmap->N/bs;
2835 
2836   for (i=0; i<=b->size; i++) {
2837     b->rangebs[i] = B->rmap->range[i]/bs;
2838   }
2839   b->rstartbs = B->rmap->rstart/bs;
2840   b->rendbs   = B->rmap->rend/bs;
2841   b->cstartbs = B->cmap->rstart/bs;
2842   b->cendbs   = B->cmap->rend/bs;
2843 
2844   if (!B->preallocated) {
2845     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2846     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2847     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2848     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2849     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2850     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2851     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2852     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2853     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
2854   }
2855 
2856   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2857   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2858   B->preallocated = PETSC_TRUE;
2859   PetscFunctionReturn(0);
2860 }
2861 EXTERN_C_END
2862 
2863 EXTERN_C_BEGIN
2864 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2865 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2866 EXTERN_C_END
2867 
2868 
2869 EXTERN_C_BEGIN
2870 #undef __FUNCT__
2871 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
2872 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj)
2873 {
2874   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2875   PetscErrorCode ierr;
2876   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2877   PetscInt       M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2878   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2879 
2880   PetscFunctionBegin;
2881   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr);
2882   ii[0] = 0;
2883   CHKMEMQ;
2884   for (i=0; i<M; i++) {
2885     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2886     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
2887     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2888     /* remove one from count of matrix has diagonal */
2889     for (j=id[i]; j<id[i+1]; j++) {
2890       if (jd[j] == i) {ii[i+1]--;break;}
2891     }
2892   CHKMEMQ;
2893   }
2894   ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2895   cnt = 0;
2896   for (i=0; i<M; i++) {
2897     for (j=io[i]; j<io[i+1]; j++) {
2898       if (garray[jo[j]] > rstart) break;
2899       jj[cnt++] = garray[jo[j]];
2900   CHKMEMQ;
2901     }
2902     for (k=id[i]; k<id[i+1]; k++) {
2903       if (jd[k] != i) {
2904         jj[cnt++] = rstart + jd[k];
2905   CHKMEMQ;
2906       }
2907     }
2908     for (;j<io[i+1]; j++) {
2909       jj[cnt++] = garray[jo[j]];
2910   CHKMEMQ;
2911     }
2912   }
2913   ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr);
2914   PetscFunctionReturn(0);
2915 }
2916 EXTERN_C_END
2917 
2918 EXTERN_C_BEGIN
2919 #if defined(PETSC_HAVE_MUMPS)
2920 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
2921 #endif
2922 EXTERN_C_END
2923 
2924 /*MC
2925    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2926 
2927    Options Database Keys:
2928 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2929 . -mat_block_size <bs> - set the blocksize used to store the matrix
2930 - -mat_use_hash_table <fact>
2931 
2932   Level: beginner
2933 
2934 .seealso: MatCreateMPIBAIJ
2935 M*/
2936 
2937 EXTERN_C_BEGIN
2938 #undef __FUNCT__
2939 #define __FUNCT__ "MatCreate_MPIBAIJ"
2940 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B)
2941 {
2942   Mat_MPIBAIJ    *b;
2943   PetscErrorCode ierr;
2944   PetscTruth     flg;
2945 
2946   PetscFunctionBegin;
2947   ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
2948   B->data = (void*)b;
2949 
2950 
2951   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2952   B->mapping    = 0;
2953   B->assembled  = PETSC_FALSE;
2954 
2955   B->insertmode = NOT_SET_VALUES;
2956   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
2957   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
2958 
2959   /* build local table of row and column ownerships */
2960   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
2961 
2962   /* build cache for off array entries formed */
2963   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
2964   b->donotstash  = PETSC_FALSE;
2965   b->colmap      = PETSC_NULL;
2966   b->garray      = PETSC_NULL;
2967   b->roworiented = PETSC_TRUE;
2968 
2969   /* stuff used in block assembly */
2970   b->barray       = 0;
2971 
2972   /* stuff used for matrix vector multiply */
2973   b->lvec         = 0;
2974   b->Mvctx        = 0;
2975 
2976   /* stuff for MatGetRow() */
2977   b->rowindices   = 0;
2978   b->rowvalues    = 0;
2979   b->getrowactive = PETSC_FALSE;
2980 
2981   /* hash table stuff */
2982   b->ht           = 0;
2983   b->hd           = 0;
2984   b->ht_size      = 0;
2985   b->ht_flag      = PETSC_FALSE;
2986   b->ht_fact      = 0;
2987   b->ht_total_ct  = 0;
2988   b->ht_insert_ct = 0;
2989 
2990   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
2991     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2992     if (flg) {
2993       PetscReal fact = 1.39;
2994       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2995       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
2996       if (fact <= 1.0) fact = 1.39;
2997       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2998       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2999     }
3000   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3001 
3002 #if defined(PETSC_HAVE_MUMPS)
3003   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr);
3004 #endif
3005   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",
3006                                      "MatConvert_MPIBAIJ_MPIAdj",
3007                                       MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3008   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3009                                      "MatStoreValues_MPIBAIJ",
3010                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3011   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3012                                      "MatRetrieveValues_MPIBAIJ",
3013                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3014   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3015                                      "MatGetDiagonalBlock_MPIBAIJ",
3016                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3017   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
3018                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
3019                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3020   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
3021 				     "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
3022 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3023   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3024                                      "MatDiagonalScaleLocal_MPIBAIJ",
3025                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3026   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
3027                                      "MatSetHashTableFactor_MPIBAIJ",
3028                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3029   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3030   PetscFunctionReturn(0);
3031 }
3032 EXTERN_C_END
3033 
3034 /*MC
3035    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3036 
3037    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3038    and MATMPIBAIJ otherwise.
3039 
3040    Options Database Keys:
3041 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3042 
3043   Level: beginner
3044 
3045 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3046 M*/
3047 
3048 EXTERN_C_BEGIN
3049 #undef __FUNCT__
3050 #define __FUNCT__ "MatCreate_BAIJ"
3051 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A)
3052 {
3053   PetscErrorCode ierr;
3054   PetscMPIInt    size;
3055 
3056   PetscFunctionBegin;
3057   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
3058   if (size == 1) {
3059     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
3060   } else {
3061     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
3062   }
3063   PetscFunctionReturn(0);
3064 }
3065 EXTERN_C_END
3066 
3067 #undef __FUNCT__
3068 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3069 /*@C
3070    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3071    (block compressed row).  For good matrix assembly performance
3072    the user should preallocate the matrix storage by setting the parameters
3073    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3074    performance can be increased by more than a factor of 50.
3075 
3076    Collective on Mat
3077 
3078    Input Parameters:
3079 +  A - the matrix
3080 .  bs   - size of blockk
3081 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3082            submatrix  (same for all local rows)
3083 .  d_nnz - array containing the number of block nonzeros in the various block rows
3084            of the in diagonal portion of the local (possibly different for each block
3085            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3086 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3087            submatrix (same for all local rows).
3088 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3089            off-diagonal portion of the local submatrix (possibly different for
3090            each block row) or PETSC_NULL.
3091 
3092    If the *_nnz parameter is given then the *_nz parameter is ignored
3093 
3094    Options Database Keys:
3095 +   -mat_block_size - size of the blocks to use
3096 -   -mat_use_hash_table <fact>
3097 
3098    Notes:
3099    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3100    than it must be used on all processors that share the object for that argument.
3101 
3102    Storage Information:
3103    For a square global matrix we define each processor's diagonal portion
3104    to be its local rows and the corresponding columns (a square submatrix);
3105    each processor's off-diagonal portion encompasses the remainder of the
3106    local matrix (a rectangular submatrix).
3107 
3108    The user can specify preallocated storage for the diagonal part of
3109    the local submatrix with either d_nz or d_nnz (not both).  Set
3110    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3111    memory allocation.  Likewise, specify preallocated storage for the
3112    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3113 
3114    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3115    the figure below we depict these three local rows and all columns (0-11).
3116 
3117 .vb
3118            0 1 2 3 4 5 6 7 8 9 10 11
3119           -------------------
3120    row 3  |  o o o d d d o o o o o o
3121    row 4  |  o o o d d d o o o o o o
3122    row 5  |  o o o d d d o o o o o o
3123           -------------------
3124 .ve
3125 
3126    Thus, any entries in the d locations are stored in the d (diagonal)
3127    submatrix, and any entries in the o locations are stored in the
3128    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3129    stored simply in the MATSEQBAIJ format for compressed row storage.
3130 
3131    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3132    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3133    In general, for PDE problems in which most nonzeros are near the diagonal,
3134    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3135    or you will get TERRIBLE performance; see the users' manual chapter on
3136    matrices.
3137 
3138    You can call MatGetInfo() to get information on how effective the preallocation was;
3139    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3140    You can also run with the option -info and look for messages with the string
3141    malloc in them to see if additional memory allocation was needed.
3142 
3143    Level: intermediate
3144 
3145 .keywords: matrix, block, aij, compressed row, sparse, parallel
3146 
3147 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
3148 @*/
3149 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3150 {
3151   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3152 
3153   PetscFunctionBegin;
3154   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3155   if (f) {
3156     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3157   }
3158   PetscFunctionReturn(0);
3159 }
3160 
3161 #undef __FUNCT__
3162 #define __FUNCT__ "MatCreateMPIBAIJ"
3163 /*@C
3164    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
3165    (block compressed row).  For good matrix assembly performance
3166    the user should preallocate the matrix storage by setting the parameters
3167    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3168    performance can be increased by more than a factor of 50.
3169 
3170    Collective on MPI_Comm
3171 
3172    Input Parameters:
3173 +  comm - MPI communicator
3174 .  bs   - size of blockk
3175 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3176            This value should be the same as the local size used in creating the
3177            y vector for the matrix-vector product y = Ax.
3178 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3179            This value should be the same as the local size used in creating the
3180            x vector for the matrix-vector product y = Ax.
3181 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3182 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3183 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3184            submatrix  (same for all local rows)
3185 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3186            of the in diagonal portion of the local (possibly different for each block
3187            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3188 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3189            submatrix (same for all local rows).
3190 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3191            off-diagonal portion of the local submatrix (possibly different for
3192            each block row) or PETSC_NULL.
3193 
3194    Output Parameter:
3195 .  A - the matrix
3196 
3197    Options Database Keys:
3198 +   -mat_block_size - size of the blocks to use
3199 -   -mat_use_hash_table <fact>
3200 
3201    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3202    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3203    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3204 
3205    Notes:
3206    If the *_nnz parameter is given then the *_nz parameter is ignored
3207 
3208    A nonzero block is any block that as 1 or more nonzeros in it
3209 
3210    The user MUST specify either the local or global matrix dimensions
3211    (possibly both).
3212 
3213    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3214    than it must be used on all processors that share the object for that argument.
3215 
3216    Storage Information:
3217    For a square global matrix we define each processor's diagonal portion
3218    to be its local rows and the corresponding columns (a square submatrix);
3219    each processor's off-diagonal portion encompasses the remainder of the
3220    local matrix (a rectangular submatrix).
3221 
3222    The user can specify preallocated storage for the diagonal part of
3223    the local submatrix with either d_nz or d_nnz (not both).  Set
3224    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3225    memory allocation.  Likewise, specify preallocated storage for the
3226    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3227 
3228    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3229    the figure below we depict these three local rows and all columns (0-11).
3230 
3231 .vb
3232            0 1 2 3 4 5 6 7 8 9 10 11
3233           -------------------
3234    row 3  |  o o o d d d o o o o o o
3235    row 4  |  o o o d d d o o o o o o
3236    row 5  |  o o o d d d o o o o o o
3237           -------------------
3238 .ve
3239 
3240    Thus, any entries in the d locations are stored in the d (diagonal)
3241    submatrix, and any entries in the o locations are stored in the
3242    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3243    stored simply in the MATSEQBAIJ format for compressed row storage.
3244 
3245    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3246    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3247    In general, for PDE problems in which most nonzeros are near the diagonal,
3248    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3249    or you will get TERRIBLE performance; see the users' manual chapter on
3250    matrices.
3251 
3252    Level: intermediate
3253 
3254 .keywords: matrix, block, aij, compressed row, sparse, parallel
3255 
3256 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3257 @*/
3258 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3259 {
3260   PetscErrorCode ierr;
3261   PetscMPIInt    size;
3262 
3263   PetscFunctionBegin;
3264   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3265   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3266   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3267   if (size > 1) {
3268     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3269     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3270   } else {
3271     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3272     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3273   }
3274   PetscFunctionReturn(0);
3275 }
3276 
3277 #undef __FUNCT__
3278 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3279 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3280 {
3281   Mat            mat;
3282   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3283   PetscErrorCode ierr;
3284   PetscInt       len=0;
3285 
3286   PetscFunctionBegin;
3287   *newmat       = 0;
3288   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3289   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3290   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3291   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3292 
3293   mat->factortype   = matin->factortype;
3294   mat->preallocated = PETSC_TRUE;
3295   mat->assembled    = PETSC_TRUE;
3296   mat->insertmode   = NOT_SET_VALUES;
3297 
3298   a      = (Mat_MPIBAIJ*)mat->data;
3299   mat->rmap->bs  = matin->rmap->bs;
3300   a->bs2   = oldmat->bs2;
3301   a->mbs   = oldmat->mbs;
3302   a->nbs   = oldmat->nbs;
3303   a->Mbs   = oldmat->Mbs;
3304   a->Nbs   = oldmat->Nbs;
3305 
3306   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3307   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3308 
3309   a->size         = oldmat->size;
3310   a->rank         = oldmat->rank;
3311   a->donotstash   = oldmat->donotstash;
3312   a->roworiented  = oldmat->roworiented;
3313   a->rowindices   = 0;
3314   a->rowvalues    = 0;
3315   a->getrowactive = PETSC_FALSE;
3316   a->barray       = 0;
3317   a->rstartbs     = oldmat->rstartbs;
3318   a->rendbs       = oldmat->rendbs;
3319   a->cstartbs     = oldmat->cstartbs;
3320   a->cendbs       = oldmat->cendbs;
3321 
3322   /* hash table stuff */
3323   a->ht           = 0;
3324   a->hd           = 0;
3325   a->ht_size      = 0;
3326   a->ht_flag      = oldmat->ht_flag;
3327   a->ht_fact      = oldmat->ht_fact;
3328   a->ht_total_ct  = 0;
3329   a->ht_insert_ct = 0;
3330 
3331   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3332   if (oldmat->colmap) {
3333 #if defined (PETSC_USE_CTABLE)
3334   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3335 #else
3336   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3337   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3338   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3339 #endif
3340   } else a->colmap = 0;
3341 
3342   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3343     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3344     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3345     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3346   } else a->garray = 0;
3347 
3348   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3349   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3350   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3351   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3352   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3353 
3354   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3355   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3356   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3357   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3358   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3359   *newmat = mat;
3360 
3361   PetscFunctionReturn(0);
3362 }
3363 
3364 #undef __FUNCT__
3365 #define __FUNCT__ "MatLoad_MPIBAIJ"
3366 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3367 {
3368   PetscErrorCode ierr;
3369   int            fd;
3370   PetscInt       i,nz,j,rstart,rend;
3371   PetscScalar    *vals,*buf;
3372   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3373   MPI_Status     status;
3374   PetscMPIInt    rank,size,maxnz;
3375   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3376   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
3377   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3378   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
3379   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
3380   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3381 
3382   PetscFunctionBegin;
3383   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3384     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3385   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3386 
3387   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3388   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3389   if (!rank) {
3390     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3391     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3392     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3393   }
3394 
3395   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3396 
3397   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3398   M = header[1]; N = header[2];
3399 
3400   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3401   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3402   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3403 
3404   /* If global sizes are set, check if they are consistent with that given in the file */
3405   if (sizesset) {
3406     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3407   }
3408   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3409   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3410 
3411   if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices");
3412 
3413   /*
3414      This code adds extra rows to make sure the number of rows is
3415      divisible by the blocksize
3416   */
3417   Mbs        = M/bs;
3418   extra_rows = bs - M + bs*Mbs;
3419   if (extra_rows == bs) extra_rows = 0;
3420   else                  Mbs++;
3421   if (extra_rows && !rank) {
3422     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3423   }
3424 
3425   /* determine ownership of all rows */
3426   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3427     mbs        = Mbs/size + ((Mbs % size) > rank);
3428     m          = mbs*bs;
3429   } else { /* User set */
3430     m          = newmat->rmap->n;
3431     mbs        = m/bs;
3432   }
3433   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
3434   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3435 
3436   /* process 0 needs enough room for process with most rows */
3437   if (!rank) {
3438     mmax = rowners[1];
3439     for (i=2; i<size; i++) {
3440       mmax = PetscMax(mmax,rowners[i]);
3441     }
3442     mmax*=bs;
3443   } else mmax = m;
3444 
3445   rowners[0] = 0;
3446   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
3447   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
3448   rstart = rowners[rank];
3449   rend   = rowners[rank+1];
3450 
3451   /* distribute row lengths to all processors */
3452   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
3453   if (!rank) {
3454     mend = m;
3455     if (size == 1) mend = mend - extra_rows;
3456     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3457     for (j=mend; j<m; j++) locrowlens[j] = 1;
3458     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3459     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3460     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3461     for (j=0; j<m; j++) {
3462       procsnz[0] += locrowlens[j];
3463     }
3464     for (i=1; i<size; i++) {
3465       mend = browners[i+1] - browners[i];
3466       if (i == size-1) mend = mend - extra_rows;
3467       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3468       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3469       /* calculate the number of nonzeros on each processor */
3470       for (j=0; j<browners[i+1]-browners[i]; j++) {
3471         procsnz[i] += rowlengths[j];
3472       }
3473       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3474     }
3475     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3476   } else {
3477     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3478   }
3479 
3480   if (!rank) {
3481     /* determine max buffer needed and allocate it */
3482     maxnz = procsnz[0];
3483     for (i=1; i<size; i++) {
3484       maxnz = PetscMax(maxnz,procsnz[i]);
3485     }
3486     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3487 
3488     /* read in my part of the matrix column indices  */
3489     nz     = procsnz[0];
3490     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3491     mycols = ibuf;
3492     if (size == 1)  nz -= extra_rows;
3493     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3494     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
3495 
3496     /* read in every ones (except the last) and ship off */
3497     for (i=1; i<size-1; i++) {
3498       nz   = procsnz[i];
3499       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3500       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3501     }
3502     /* read in the stuff for the last proc */
3503     if (size != 1) {
3504       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3505       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3506       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3507       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3508     }
3509     ierr = PetscFree(cols);CHKERRQ(ierr);
3510   } else {
3511     /* determine buffer space needed for message */
3512     nz = 0;
3513     for (i=0; i<m; i++) {
3514       nz += locrowlens[i];
3515     }
3516     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3517     mycols = ibuf;
3518     /* receive message of column indices*/
3519     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3520     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3521     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3522   }
3523 
3524   /* loop over local rows, determining number of off diagonal entries */
3525   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
3526   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
3527   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3528   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3529   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3530   rowcount = 0; nzcount = 0;
3531   for (i=0; i<mbs; i++) {
3532     dcount  = 0;
3533     odcount = 0;
3534     for (j=0; j<bs; j++) {
3535       kmax = locrowlens[rowcount];
3536       for (k=0; k<kmax; k++) {
3537         tmp = mycols[nzcount++]/bs;
3538         if (!mask[tmp]) {
3539           mask[tmp] = 1;
3540           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3541           else masked1[dcount++] = tmp;
3542         }
3543       }
3544       rowcount++;
3545     }
3546 
3547     dlens[i]  = dcount;
3548     odlens[i] = odcount;
3549 
3550     /* zero out the mask elements we set */
3551     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3552     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3553   }
3554 
3555 
3556   if (!sizesset) {
3557     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3558   }
3559   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3560 
3561   if (!rank) {
3562     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3563     /* read in my part of the matrix numerical values  */
3564     nz = procsnz[0];
3565     vals = buf;
3566     mycols = ibuf;
3567     if (size == 1)  nz -= extra_rows;
3568     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3569     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
3570 
3571     /* insert into matrix */
3572     jj      = rstart*bs;
3573     for (i=0; i<m; i++) {
3574       ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3575       mycols += locrowlens[i];
3576       vals   += locrowlens[i];
3577       jj++;
3578     }
3579     /* read in other processors (except the last one) and ship out */
3580     for (i=1; i<size-1; i++) {
3581       nz   = procsnz[i];
3582       vals = buf;
3583       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3584       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3585     }
3586     /* the last proc */
3587     if (size != 1){
3588       nz   = procsnz[i] - extra_rows;
3589       vals = buf;
3590       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3591       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3592       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3593     }
3594     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3595   } else {
3596     /* receive numeric values */
3597     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3598 
3599     /* receive message of values*/
3600     vals   = buf;
3601     mycols = ibuf;
3602     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
3603     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
3604     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3605 
3606     /* insert into matrix */
3607     jj      = rstart*bs;
3608     for (i=0; i<m; i++) {
3609       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3610       mycols += locrowlens[i];
3611       vals   += locrowlens[i];
3612       jj++;
3613     }
3614   }
3615   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3616   ierr = PetscFree(buf);CHKERRQ(ierr);
3617   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3618   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3619   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3620   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3621   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3622   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3623 
3624   PetscFunctionReturn(0);
3625 }
3626 
3627 #undef __FUNCT__
3628 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3629 /*@
3630    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3631 
3632    Input Parameters:
3633 .  mat  - the matrix
3634 .  fact - factor
3635 
3636    Not Collective, each process can use a different factor
3637 
3638    Level: advanced
3639 
3640   Notes:
3641    This can also be set by the command line option: -mat_use_hash_table <fact>
3642 
3643 .keywords: matrix, hashtable, factor, HT
3644 
3645 .seealso: MatSetOption()
3646 @*/
3647 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3648 {
3649   PetscErrorCode ierr,(*f)(Mat,PetscReal);
3650 
3651   PetscFunctionBegin;
3652   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
3653   if (f) {
3654     ierr = (*f)(mat,fact);CHKERRQ(ierr);
3655   }
3656   PetscFunctionReturn(0);
3657 }
3658 
3659 EXTERN_C_BEGIN
3660 #undef __FUNCT__
3661 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3662 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3663 {
3664   Mat_MPIBAIJ *baij;
3665 
3666   PetscFunctionBegin;
3667   baij = (Mat_MPIBAIJ*)mat->data;
3668   baij->ht_fact = fact;
3669   PetscFunctionReturn(0);
3670 }
3671 EXTERN_C_END
3672 
3673 #undef __FUNCT__
3674 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3675 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3676 {
3677   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3678   PetscFunctionBegin;
3679   *Ad     = a->A;
3680   *Ao     = a->B;
3681   *colmap = a->garray;
3682   PetscFunctionReturn(0);
3683 }
3684 
3685 /*
3686     Special version for direct calls from Fortran (to eliminate two function call overheads
3687 */
3688 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3689 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3690 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3691 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3692 #endif
3693 
3694 #undef __FUNCT__
3695 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3696 /*@C
3697   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3698 
3699   Collective on Mat
3700 
3701   Input Parameters:
3702 + mat - the matrix
3703 . min - number of input rows
3704 . im - input rows
3705 . nin - number of input columns
3706 . in - input columns
3707 . v - numerical values input
3708 - addvin - INSERT_VALUES or ADD_VALUES
3709 
3710   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3711 
3712   Level: advanced
3713 
3714 .seealso:   MatSetValuesBlocked()
3715 @*/
3716 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3717 {
3718   /* convert input arguments to C version */
3719   Mat             mat = *matin;
3720   PetscInt        m = *min, n = *nin;
3721   InsertMode      addv = *addvin;
3722 
3723   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3724   const MatScalar *value;
3725   MatScalar       *barray=baij->barray;
3726   PetscTruth      roworiented = baij->roworiented;
3727   PetscErrorCode  ierr;
3728   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3729   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3730   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3731 
3732   PetscFunctionBegin;
3733   /* tasks normally handled by MatSetValuesBlocked() */
3734   if (mat->insertmode == NOT_SET_VALUES) {
3735     mat->insertmode = addv;
3736   }
3737 #if defined(PETSC_USE_DEBUG)
3738   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3739   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3740 #endif
3741   if (mat->assembled) {
3742     mat->was_assembled = PETSC_TRUE;
3743     mat->assembled     = PETSC_FALSE;
3744   }
3745   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3746 
3747 
3748   if(!barray) {
3749     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
3750     baij->barray = barray;
3751   }
3752 
3753   if (roworiented) {
3754     stepval = (n-1)*bs;
3755   } else {
3756     stepval = (m-1)*bs;
3757   }
3758   for (i=0; i<m; i++) {
3759     if (im[i] < 0) continue;
3760 #if defined(PETSC_USE_DEBUG)
3761     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3762 #endif
3763     if (im[i] >= rstart && im[i] < rend) {
3764       row = im[i] - rstart;
3765       for (j=0; j<n; j++) {
3766         /* If NumCol = 1 then a copy is not required */
3767         if ((roworiented) && (n == 1)) {
3768           barray = (MatScalar*)v + i*bs2;
3769         } else if((!roworiented) && (m == 1)) {
3770           barray = (MatScalar*)v + j*bs2;
3771         } else { /* Here a copy is required */
3772           if (roworiented) {
3773             value = v + i*(stepval+bs)*bs + j*bs;
3774           } else {
3775             value = v + j*(stepval+bs)*bs + i*bs;
3776           }
3777           for (ii=0; ii<bs; ii++,value+=stepval) {
3778             for (jj=0; jj<bs; jj++) {
3779               *barray++  = *value++;
3780             }
3781           }
3782           barray -=bs2;
3783         }
3784 
3785         if (in[j] >= cstart && in[j] < cend){
3786           col  = in[j] - cstart;
3787           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3788         }
3789         else if (in[j] < 0) continue;
3790 #if defined(PETSC_USE_DEBUG)
3791         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3792 #endif
3793         else {
3794           if (mat->was_assembled) {
3795             if (!baij->colmap) {
3796               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3797             }
3798 
3799 #if defined(PETSC_USE_DEBUG)
3800 #if defined (PETSC_USE_CTABLE)
3801             { PetscInt data;
3802               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3803               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3804             }
3805 #else
3806             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3807 #endif
3808 #endif
3809 #if defined (PETSC_USE_CTABLE)
3810 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3811             col  = (col - 1)/bs;
3812 #else
3813             col = (baij->colmap[in[j]] - 1)/bs;
3814 #endif
3815             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3816               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3817               col =  in[j];
3818             }
3819           }
3820           else col = in[j];
3821           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3822         }
3823       }
3824     } else {
3825       if (!baij->donotstash) {
3826         if (roworiented) {
3827           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3828         } else {
3829           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3830         }
3831       }
3832     }
3833   }
3834 
3835   /* task normally handled by MatSetValuesBlocked() */
3836   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3837   PetscFunctionReturn(0);
3838 }
3839