xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 7212da7c135ebda4e0bbb2601b2d797d12ac4dc2)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4 #include "petscblaslapack.h"
5 
6 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
7 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
8 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
9 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
10 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
11 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
12 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
13 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
14 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
15 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar);
16 
17 #undef __FUNCT__
18 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
19 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
20 {
21   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
22   PetscErrorCode ierr;
23   PetscInt       i,*idxb = 0;
24   PetscScalar    *va,*vb;
25   Vec            vtmp;
26 
27   PetscFunctionBegin;
28   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
29   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
30   if (idx) {
31     for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;}
32   }
33 
34   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
35   if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
36   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
37   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
38 
39   for (i=0; i<A->rmap->n; i++){
40     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);}
41   }
42 
43   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
44   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
45   if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);}
46   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
47   PetscFunctionReturn(0);
48 }
49 
50 EXTERN_C_BEGIN
51 #undef __FUNCT__
52 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
53 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat)
54 {
55   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
56   PetscErrorCode ierr;
57 
58   PetscFunctionBegin;
59   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
60   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
61   PetscFunctionReturn(0);
62 }
63 EXTERN_C_END
64 
65 EXTERN_C_BEGIN
66 #undef __FUNCT__
67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
68 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat)
69 {
70   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
71   PetscErrorCode ierr;
72 
73   PetscFunctionBegin;
74   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
75   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
76   PetscFunctionReturn(0);
77 }
78 EXTERN_C_END
79 
80 /*
81      Local utility routine that creates a mapping from the global column
82    number to the local number in the off-diagonal part of the local
83    storage of the matrix.  This is done in a non scalable way since the
84    length of colmap equals the global matrix length.
85 */
86 #undef __FUNCT__
87 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
88 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
89 {
90   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
91   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
92   PetscErrorCode ierr;
93   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
94 
95   PetscFunctionBegin;
96 #if defined (PETSC_USE_CTABLE)
97   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
98   for (i=0; i<nbs; i++){
99     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
100   }
101 #else
102   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
103   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
104   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
105   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
106 #endif
107   PetscFunctionReturn(0);
108 }
109 
110 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
111 { \
112  \
113     brow = row/bs;  \
114     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
115     rmax = aimax[brow]; nrow = ailen[brow]; \
116       bcol = col/bs; \
117       ridx = row % bs; cidx = col % bs; \
118       low = 0; high = nrow; \
119       while (high-low > 3) { \
120         t = (low+high)/2; \
121         if (rp[t] > bcol) high = t; \
122         else              low  = t; \
123       } \
124       for (_i=low; _i<high; _i++) { \
125         if (rp[_i] > bcol) break; \
126         if (rp[_i] == bcol) { \
127           bap  = ap +  bs2*_i + bs*cidx + ridx; \
128           if (addv == ADD_VALUES) *bap += value;  \
129           else                    *bap  = value;  \
130           goto a_noinsert; \
131         } \
132       } \
133       if (a->nonew == 1) goto a_noinsert; \
134       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
135       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
136       N = nrow++ - 1;  \
137       /* shift up all the later entries in this row */ \
138       for (ii=N; ii>=_i; ii--) { \
139         rp[ii+1] = rp[ii]; \
140         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
141       } \
142       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
143       rp[_i]                      = bcol;  \
144       ap[bs2*_i + bs*cidx + ridx] = value;  \
145       a_noinsert:; \
146     ailen[brow] = nrow; \
147 }
148 
149 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
150 { \
151     brow = row/bs;  \
152     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
153     rmax = bimax[brow]; nrow = bilen[brow]; \
154       bcol = col/bs; \
155       ridx = row % bs; cidx = col % bs; \
156       low = 0; high = nrow; \
157       while (high-low > 3) { \
158         t = (low+high)/2; \
159         if (rp[t] > bcol) high = t; \
160         else              low  = t; \
161       } \
162       for (_i=low; _i<high; _i++) { \
163         if (rp[_i] > bcol) break; \
164         if (rp[_i] == bcol) { \
165           bap  = ap +  bs2*_i + bs*cidx + ridx; \
166           if (addv == ADD_VALUES) *bap += value;  \
167           else                    *bap  = value;  \
168           goto b_noinsert; \
169         } \
170       } \
171       if (b->nonew == 1) goto b_noinsert; \
172       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
173       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
174       CHKMEMQ;\
175       N = nrow++ - 1;  \
176       /* shift up all the later entries in this row */ \
177       for (ii=N; ii>=_i; ii--) { \
178         rp[ii+1] = rp[ii]; \
179         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
180       } \
181       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
182       rp[_i]                      = bcol;  \
183       ap[bs2*_i + bs*cidx + ridx] = value;  \
184       b_noinsert:; \
185     bilen[brow] = nrow; \
186 }
187 
188 #undef __FUNCT__
189 #define __FUNCT__ "MatSetValues_MPIBAIJ"
190 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
191 {
192   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
193   MatScalar      value;
194   PetscTruth     roworiented = baij->roworiented;
195   PetscErrorCode ierr;
196   PetscInt       i,j,row,col;
197   PetscInt       rstart_orig=mat->rmap->rstart;
198   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
199   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
200 
201   /* Some Variables required in the macro */
202   Mat            A = baij->A;
203   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
204   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
205   MatScalar      *aa=a->a;
206 
207   Mat            B = baij->B;
208   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
209   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
210   MatScalar      *ba=b->a;
211 
212   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
213   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
214   MatScalar      *ap,*bap;
215 
216   PetscFunctionBegin;
217   if (v) PetscValidScalarPointer(v,6);
218   for (i=0; i<m; i++) {
219     if (im[i] < 0) continue;
220 #if defined(PETSC_USE_DEBUG)
221     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
222 #endif
223     if (im[i] >= rstart_orig && im[i] < rend_orig) {
224       row = im[i] - rstart_orig;
225       for (j=0; j<n; j++) {
226         if (in[j] >= cstart_orig && in[j] < cend_orig){
227           col = in[j] - cstart_orig;
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
230           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
231         } else if (in[j] < 0) continue;
232 #if defined(PETSC_USE_DEBUG)
233         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap->N-1);
234 #endif
235         else {
236           if (mat->was_assembled) {
237             if (!baij->colmap) {
238               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
239             }
240 #if defined (PETSC_USE_CTABLE)
241             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
242             col  = col - 1;
243 #else
244             col = baij->colmap[in[j]/bs] - 1;
245 #endif
246             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
247               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
248               col =  in[j];
249               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
250               B = baij->B;
251               b = (Mat_SeqBAIJ*)(B)->data;
252               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
253               ba=b->a;
254             } else col += in[j]%bs;
255           } else col = in[j];
256           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
257           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
258           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
259         }
260       }
261     } else {
262       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
263       if (!baij->donotstash) {
264         if (roworiented) {
265           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
266         } else {
267           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
268         }
269       }
270     }
271   }
272   PetscFunctionReturn(0);
273 }
274 
275 #undef __FUNCT__
276 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
277 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
278 {
279   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
280   const PetscScalar *value;
281   MatScalar         *barray=baij->barray;
282   PetscTruth        roworiented = baij->roworiented;
283   PetscErrorCode    ierr;
284   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
285   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
286   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
287 
288   PetscFunctionBegin;
289   if(!barray) {
290     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
291     baij->barray = barray;
292   }
293 
294   if (roworiented) {
295     stepval = (n-1)*bs;
296   } else {
297     stepval = (m-1)*bs;
298   }
299   for (i=0; i<m; i++) {
300     if (im[i] < 0) continue;
301 #if defined(PETSC_USE_DEBUG)
302     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
303 #endif
304     if (im[i] >= rstart && im[i] < rend) {
305       row = im[i] - rstart;
306       for (j=0; j<n; j++) {
307         /* If NumCol = 1 then a copy is not required */
308         if ((roworiented) && (n == 1)) {
309           barray = (MatScalar*)v + i*bs2;
310         } else if((!roworiented) && (m == 1)) {
311           barray = (MatScalar*)v + j*bs2;
312         } else { /* Here a copy is required */
313           if (roworiented) {
314             value = v + (i*(stepval+bs) + j)*bs;
315           } else {
316 	    value = v + (j*(stepval+bs) + i)*bs;
317           }
318           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
319             for (jj=0; jj<bs; jj++) {
320               barray[jj]  = value[jj];
321             }
322             barray += bs;
323           }
324           barray -= bs2;
325         }
326 
327         if (in[j] >= cstart && in[j] < cend){
328           col  = in[j] - cstart;
329           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
330         }
331         else if (in[j] < 0) continue;
332 #if defined(PETSC_USE_DEBUG)
333         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
334 #endif
335         else {
336           if (mat->was_assembled) {
337             if (!baij->colmap) {
338               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
339             }
340 
341 #if defined(PETSC_USE_DEBUG)
342 #if defined (PETSC_USE_CTABLE)
343             { PetscInt data;
344               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
345               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
346             }
347 #else
348             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
349 #endif
350 #endif
351 #if defined (PETSC_USE_CTABLE)
352 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
353             col  = (col - 1)/bs;
354 #else
355             col = (baij->colmap[in[j]] - 1)/bs;
356 #endif
357             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
358               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
359               col =  in[j];
360             }
361           }
362           else col = in[j];
363           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
364         }
365       }
366     } else {
367       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
368       if (!baij->donotstash) {
369         if (roworiented) {
370           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
371         } else {
372           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
373         }
374       }
375     }
376   }
377   PetscFunctionReturn(0);
378 }
379 
380 #define HASH_KEY 0.6180339887
381 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
382 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
383 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
384 #undef __FUNCT__
385 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
386 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
387 {
388   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
389   PetscTruth     roworiented = baij->roworiented;
390   PetscErrorCode ierr;
391   PetscInt       i,j,row,col;
392   PetscInt       rstart_orig=mat->rmap->rstart;
393   PetscInt       rend_orig=mat->rmap->rend,Nbs=baij->Nbs;
394   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
395   PetscReal      tmp;
396   MatScalar      **HD = baij->hd,value;
397 #if defined(PETSC_USE_DEBUG)
398   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
399 #endif
400 
401   PetscFunctionBegin;
402   if (v) PetscValidScalarPointer(v,6);
403   for (i=0; i<m; i++) {
404 #if defined(PETSC_USE_DEBUG)
405     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
406     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
407 #endif
408       row = im[i];
409     if (row >= rstart_orig && row < rend_orig) {
410       for (j=0; j<n; j++) {
411         col = in[j];
412         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
413         /* Look up PetscInto the Hash Table */
414         key = (row/bs)*Nbs+(col/bs)+1;
415         h1  = HASH(size,key,tmp);
416 
417 
418         idx = h1;
419 #if defined(PETSC_USE_DEBUG)
420         insert_ct++;
421         total_ct++;
422         if (HT[idx] != key) {
423           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
424           if (idx == size) {
425             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
426             if (idx == h1) {
427               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
428             }
429           }
430         }
431 #else
432         if (HT[idx] != key) {
433           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
434           if (idx == size) {
435             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
436             if (idx == h1) {
437               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
438             }
439           }
440         }
441 #endif
442         /* A HASH table entry is found, so insert the values at the correct address */
443         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
444         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
445       }
446     } else {
447       if (!baij->donotstash) {
448         if (roworiented) {
449           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
450         } else {
451           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
452         }
453       }
454     }
455   }
456 #if defined(PETSC_USE_DEBUG)
457   baij->ht_total_ct = total_ct;
458   baij->ht_insert_ct = insert_ct;
459 #endif
460   PetscFunctionReturn(0);
461 }
462 
463 #undef __FUNCT__
464 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
465 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
466 {
467   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
468   PetscTruth        roworiented = baij->roworiented;
469   PetscErrorCode    ierr;
470   PetscInt          i,j,ii,jj,row,col;
471   PetscInt          rstart=baij->rstartbs;
472   PetscInt          rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
473   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
474   PetscReal         tmp;
475   MatScalar         **HD = baij->hd,*baij_a;
476   const PetscScalar *v_t,*value;
477 #if defined(PETSC_USE_DEBUG)
478   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
479 #endif
480 
481   PetscFunctionBegin;
482 
483   if (roworiented) {
484     stepval = (n-1)*bs;
485   } else {
486     stepval = (m-1)*bs;
487   }
488   for (i=0; i<m; i++) {
489 #if defined(PETSC_USE_DEBUG)
490     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
491     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
492 #endif
493     row   = im[i];
494     v_t   = v + i*nbs2;
495     if (row >= rstart && row < rend) {
496       for (j=0; j<n; j++) {
497         col = in[j];
498 
499         /* Look up into the Hash Table */
500         key = row*Nbs+col+1;
501         h1  = HASH(size,key,tmp);
502 
503         idx = h1;
504 #if defined(PETSC_USE_DEBUG)
505         total_ct++;
506         insert_ct++;
507        if (HT[idx] != key) {
508           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
509           if (idx == size) {
510             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
511             if (idx == h1) {
512               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
513             }
514           }
515         }
516 #else
517         if (HT[idx] != key) {
518           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
519           if (idx == size) {
520             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
521             if (idx == h1) {
522               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
523             }
524           }
525         }
526 #endif
527         baij_a = HD[idx];
528         if (roworiented) {
529           /*value = v + i*(stepval+bs)*bs + j*bs;*/
530           /* value = v + (i*(stepval+bs)+j)*bs; */
531           value = v_t;
532           v_t  += bs;
533           if (addv == ADD_VALUES) {
534             for (ii=0; ii<bs; ii++,value+=stepval) {
535               for (jj=ii; jj<bs2; jj+=bs) {
536                 baij_a[jj]  += *value++;
537               }
538             }
539           } else {
540             for (ii=0; ii<bs; ii++,value+=stepval) {
541               for (jj=ii; jj<bs2; jj+=bs) {
542                 baij_a[jj]  = *value++;
543               }
544             }
545           }
546         } else {
547           value = v + j*(stepval+bs)*bs + i*bs;
548           if (addv == ADD_VALUES) {
549             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
550               for (jj=0; jj<bs; jj++) {
551                 baij_a[jj]  += *value++;
552               }
553             }
554           } else {
555             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
556               for (jj=0; jj<bs; jj++) {
557                 baij_a[jj]  = *value++;
558               }
559             }
560           }
561         }
562       }
563     } else {
564       if (!baij->donotstash) {
565         if (roworiented) {
566           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
567         } else {
568           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
569         }
570       }
571     }
572   }
573 #if defined(PETSC_USE_DEBUG)
574   baij->ht_total_ct = total_ct;
575   baij->ht_insert_ct = insert_ct;
576 #endif
577   PetscFunctionReturn(0);
578 }
579 
580 #undef __FUNCT__
581 #define __FUNCT__ "MatGetValues_MPIBAIJ"
582 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
583 {
584   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
585   PetscErrorCode ierr;
586   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
587   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
588 
589   PetscFunctionBegin;
590   for (i=0; i<m; i++) {
591     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
592     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
593     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
594       row = idxm[i] - bsrstart;
595       for (j=0; j<n; j++) {
596         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
597         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
598         if (idxn[j] >= bscstart && idxn[j] < bscend){
599           col = idxn[j] - bscstart;
600           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
601         } else {
602           if (!baij->colmap) {
603             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
604           }
605 #if defined (PETSC_USE_CTABLE)
606           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
607           data --;
608 #else
609           data = baij->colmap[idxn[j]/bs]-1;
610 #endif
611           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
612           else {
613             col  = data + idxn[j]%bs;
614             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
615           }
616         }
617       }
618     } else {
619       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
620     }
621   }
622  PetscFunctionReturn(0);
623 }
624 
625 #undef __FUNCT__
626 #define __FUNCT__ "MatNorm_MPIBAIJ"
627 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
628 {
629   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
630   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
631   PetscErrorCode ierr;
632   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
633   PetscReal      sum = 0.0;
634   MatScalar      *v;
635 
636   PetscFunctionBegin;
637   if (baij->size == 1) {
638     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
639   } else {
640     if (type == NORM_FROBENIUS) {
641       v = amat->a;
642       nz = amat->nz*bs2;
643       for (i=0; i<nz; i++) {
644 #if defined(PETSC_USE_COMPLEX)
645         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
646 #else
647         sum += (*v)*(*v); v++;
648 #endif
649       }
650       v = bmat->a;
651       nz = bmat->nz*bs2;
652       for (i=0; i<nz; i++) {
653 #if defined(PETSC_USE_COMPLEX)
654         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
655 #else
656         sum += (*v)*(*v); v++;
657 #endif
658       }
659       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
660       *nrm = sqrt(*nrm);
661     } else if (type == NORM_1) { /* max column sum */
662       PetscReal *tmp,*tmp2;
663       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
664       ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
665       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
666       v = amat->a; jj = amat->j;
667       for (i=0; i<amat->nz; i++) {
668         for (j=0; j<bs; j++){
669           col = bs*(cstart + *jj) + j; /* column index */
670           for (row=0; row<bs; row++){
671             tmp[col] += PetscAbsScalar(*v);  v++;
672           }
673         }
674         jj++;
675       }
676       v = bmat->a; jj = bmat->j;
677       for (i=0; i<bmat->nz; i++) {
678         for (j=0; j<bs; j++){
679           col = bs*garray[*jj] + j;
680           for (row=0; row<bs; row++){
681             tmp[col] += PetscAbsScalar(*v); v++;
682           }
683         }
684         jj++;
685       }
686       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
687       *nrm = 0.0;
688       for (j=0; j<mat->cmap->N; j++) {
689         if (tmp2[j] > *nrm) *nrm = tmp2[j];
690       }
691       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
692     } else if (type == NORM_INFINITY) { /* max row sum */
693       PetscReal *sums;
694       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr);
695       sum = 0.0;
696       for (j=0; j<amat->mbs; j++) {
697         for (row=0; row<bs; row++) sums[row] = 0.0;
698         v = amat->a + bs2*amat->i[j];
699         nz = amat->i[j+1]-amat->i[j];
700         for (i=0; i<nz; i++) {
701           for (col=0; col<bs; col++){
702             for (row=0; row<bs; row++){
703               sums[row] += PetscAbsScalar(*v); v++;
704             }
705           }
706         }
707         v = bmat->a + bs2*bmat->i[j];
708         nz = bmat->i[j+1]-bmat->i[j];
709         for (i=0; i<nz; i++) {
710           for (col=0; col<bs; col++){
711             for (row=0; row<bs; row++){
712               sums[row] += PetscAbsScalar(*v); v++;
713             }
714           }
715         }
716         for (row=0; row<bs; row++){
717           if (sums[row] > sum) sum = sums[row];
718         }
719       }
720       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
721       ierr = PetscFree(sums);CHKERRQ(ierr);
722     } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet");
723   }
724   PetscFunctionReturn(0);
725 }
726 
727 /*
728   Creates the hash table, and sets the table
729   This table is created only once.
730   If new entried need to be added to the matrix
731   then the hash table has to be destroyed and
732   recreated.
733 */
734 #undef __FUNCT__
735 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
736 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
737 {
738   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
739   Mat            A = baij->A,B=baij->B;
740   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
741   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
742   PetscErrorCode ierr;
743   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
744   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
745   PetscInt       *HT,key;
746   MatScalar      **HD;
747   PetscReal      tmp;
748 #if defined(PETSC_USE_INFO)
749   PetscInt       ct=0,max=0;
750 #endif
751 
752   PetscFunctionBegin;
753   if (baij->ht) PetscFunctionReturn(0);
754 
755   baij->ht_size = (PetscInt)(factor*nz);
756   ht_size       = baij->ht_size;
757 
758   /* Allocate Memory for Hash Table */
759   ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr);
760   ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr);
761   ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr);
762   HD   = baij->hd;
763   HT   = baij->ht;
764 
765   /* Loop Over A */
766   for (i=0; i<a->mbs; i++) {
767     for (j=ai[i]; j<ai[i+1]; j++) {
768       row = i+rstart;
769       col = aj[j]+cstart;
770 
771       key = row*Nbs + col + 1;
772       h1  = HASH(ht_size,key,tmp);
773       for (k=0; k<ht_size; k++){
774         if (!HT[(h1+k)%ht_size]) {
775           HT[(h1+k)%ht_size] = key;
776           HD[(h1+k)%ht_size] = a->a + j*bs2;
777           break;
778 #if defined(PETSC_USE_INFO)
779         } else {
780           ct++;
781 #endif
782         }
783       }
784 #if defined(PETSC_USE_INFO)
785       if (k> max) max = k;
786 #endif
787     }
788   }
789   /* Loop Over B */
790   for (i=0; i<b->mbs; i++) {
791     for (j=bi[i]; j<bi[i+1]; j++) {
792       row = i+rstart;
793       col = garray[bj[j]];
794       key = row*Nbs + col + 1;
795       h1  = HASH(ht_size,key,tmp);
796       for (k=0; k<ht_size; k++){
797         if (!HT[(h1+k)%ht_size]) {
798           HT[(h1+k)%ht_size] = key;
799           HD[(h1+k)%ht_size] = b->a + j*bs2;
800           break;
801 #if defined(PETSC_USE_INFO)
802         } else {
803           ct++;
804 #endif
805         }
806       }
807 #if defined(PETSC_USE_INFO)
808       if (k> max) max = k;
809 #endif
810     }
811   }
812 
813   /* Print Summary */
814 #if defined(PETSC_USE_INFO)
815   for (i=0,j=0; i<ht_size; i++) {
816     if (HT[i]) {j++;}
817   }
818   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
819 #endif
820   PetscFunctionReturn(0);
821 }
822 
823 #undef __FUNCT__
824 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
825 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
826 {
827   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
828   PetscErrorCode ierr;
829   PetscInt       nstash,reallocs;
830   InsertMode     addv;
831 
832   PetscFunctionBegin;
833   if (baij->donotstash || mat->nooffprocentries) {
834     PetscFunctionReturn(0);
835   }
836 
837   /* make sure all processors are either in INSERTMODE or ADDMODE */
838   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
839   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
840   mat->insertmode = addv; /* in case this processor had no cache */
841 
842   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
843   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
844   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
845   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
846   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
847   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
848   PetscFunctionReturn(0);
849 }
850 
851 #undef __FUNCT__
852 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
853 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
854 {
855   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
856   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
857   PetscErrorCode ierr;
858   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
859   PetscInt       *row,*col;
860   PetscTruth     r1,r2,r3,other_disassembled;
861   MatScalar      *val;
862   InsertMode     addv = mat->insertmode;
863   PetscMPIInt    n;
864 
865   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
866   PetscFunctionBegin;
867   if (!baij->donotstash && !mat->nooffprocentries) {
868     while (1) {
869       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
870       if (!flg) break;
871 
872       for (i=0; i<n;) {
873         /* Now identify the consecutive vals belonging to the same row */
874         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
875         if (j < n) ncols = j-i;
876         else       ncols = n-i;
877         /* Now assemble all these values with a single function call */
878         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
879         i = j;
880       }
881     }
882     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
883     /* Now process the block-stash. Since the values are stashed column-oriented,
884        set the roworiented flag to column oriented, and after MatSetValues()
885        restore the original flags */
886     r1 = baij->roworiented;
887     r2 = a->roworiented;
888     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
889     baij->roworiented = PETSC_FALSE;
890     a->roworiented    = PETSC_FALSE;
891     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
892     while (1) {
893       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
894       if (!flg) break;
895 
896       for (i=0; i<n;) {
897         /* Now identify the consecutive vals belonging to the same row */
898         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
899         if (j < n) ncols = j-i;
900         else       ncols = n-i;
901         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
902         i = j;
903       }
904     }
905     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
906     baij->roworiented = r1;
907     a->roworiented    = r2;
908     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
909   }
910 
911   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
912   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
913 
914   /* determine if any processor has disassembled, if so we must
915      also disassemble ourselfs, in order that we may reassemble. */
916   /*
917      if nonzero structure of submatrix B cannot change then we know that
918      no processor disassembled thus we can skip this stuff
919   */
920   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
921     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
922     if (mat->was_assembled && !other_disassembled) {
923       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
924     }
925   }
926 
927   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
928     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
929   }
930   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
931   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
932   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
933 
934 #if defined(PETSC_USE_INFO)
935   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
936     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
937     baij->ht_total_ct  = 0;
938     baij->ht_insert_ct = 0;
939   }
940 #endif
941   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
942     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
943     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
944     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
945   }
946 
947   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
948   baij->rowvalues = 0;
949   PetscFunctionReturn(0);
950 }
951 
952 #undef __FUNCT__
953 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
954 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
955 {
956   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
957   PetscErrorCode    ierr;
958   PetscMPIInt       size = baij->size,rank = baij->rank;
959   PetscInt          bs = mat->rmap->bs;
960   PetscTruth        iascii,isdraw;
961   PetscViewer       sviewer;
962   PetscViewerFormat format;
963 
964   PetscFunctionBegin;
965   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
966   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
967   if (iascii) {
968     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
969     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
970       MatInfo info;
971       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
972       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
973       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
974               rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
975               mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
976       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
977       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
978       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
979       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
980       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
981       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
982       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
983       PetscFunctionReturn(0);
984     } else if (format == PETSC_VIEWER_ASCII_INFO) {
985       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
986       PetscFunctionReturn(0);
987     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
988       PetscFunctionReturn(0);
989     }
990   }
991 
992   if (isdraw) {
993     PetscDraw       draw;
994     PetscTruth isnull;
995     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
996     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
997   }
998 
999   if (size == 1) {
1000     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1001     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1002   } else {
1003     /* assemble the entire matrix onto first processor. */
1004     Mat         A;
1005     Mat_SeqBAIJ *Aloc;
1006     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1007     MatScalar   *a;
1008 
1009     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1010     /* Perhaps this should be the type of mat? */
1011     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1012     if (!rank) {
1013       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1014     } else {
1015       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1016     }
1017     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1018     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1019     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1020 
1021     /* copy over the A part */
1022     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1023     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1024     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1025 
1026     for (i=0; i<mbs; i++) {
1027       rvals[0] = bs*(baij->rstartbs + i);
1028       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1029       for (j=ai[i]; j<ai[i+1]; j++) {
1030         col = (baij->cstartbs+aj[j])*bs;
1031         for (k=0; k<bs; k++) {
1032           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1033           col++; a += bs;
1034         }
1035       }
1036     }
1037     /* copy over the B part */
1038     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1039     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1040     for (i=0; i<mbs; i++) {
1041       rvals[0] = bs*(baij->rstartbs + i);
1042       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1043       for (j=ai[i]; j<ai[i+1]; j++) {
1044         col = baij->garray[aj[j]]*bs;
1045         for (k=0; k<bs; k++) {
1046           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1047           col++; a += bs;
1048         }
1049       }
1050     }
1051     ierr = PetscFree(rvals);CHKERRQ(ierr);
1052     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1053     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1054     /*
1055        Everyone has to call to draw the matrix since the graphics waits are
1056        synchronized across all processors that share the PetscDraw object
1057     */
1058     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1059     if (!rank) {
1060       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1061       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1062     }
1063     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1064     ierr = MatDestroy(A);CHKERRQ(ierr);
1065   }
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 #undef __FUNCT__
1070 #define __FUNCT__ "MatView_MPIBAIJ"
1071 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1072 {
1073   PetscErrorCode ierr;
1074   PetscTruth     iascii,isdraw,issocket,isbinary;
1075 
1076   PetscFunctionBegin;
1077   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1078   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1079   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1080   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1081   if (iascii || isdraw || issocket || isbinary) {
1082     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1083   } else {
1084     SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1085   }
1086   PetscFunctionReturn(0);
1087 }
1088 
1089 #undef __FUNCT__
1090 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1091 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1092 {
1093   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1094   PetscErrorCode ierr;
1095 
1096   PetscFunctionBegin;
1097 #if defined(PETSC_USE_LOG)
1098   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1099 #endif
1100   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1101   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1102   if (baij->A){ierr = MatDestroy(baij->A);CHKERRQ(ierr);}
1103   if (baij->B){ierr = MatDestroy(baij->B);CHKERRQ(ierr);}
1104 #if defined (PETSC_USE_CTABLE)
1105   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
1106 #else
1107   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1108 #endif
1109   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1110   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1111   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1112   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1113   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1114   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1115   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1116   ierr = PetscFree(baij);CHKERRQ(ierr);
1117 
1118   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1119   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1120   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1121   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1122   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1123   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1124   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1125   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1126   PetscFunctionReturn(0);
1127 }
1128 
1129 #undef __FUNCT__
1130 #define __FUNCT__ "MatMult_MPIBAIJ"
1131 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1132 {
1133   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1134   PetscErrorCode ierr;
1135   PetscInt       nt;
1136 
1137   PetscFunctionBegin;
1138   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1139   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1140   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1141   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1142   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1143   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1144   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1145   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1146   PetscFunctionReturn(0);
1147 }
1148 
1149 #undef __FUNCT__
1150 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1151 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1152 {
1153   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1154   PetscErrorCode ierr;
1155 
1156   PetscFunctionBegin;
1157   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1158   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1159   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1160   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1161   PetscFunctionReturn(0);
1162 }
1163 
1164 #undef __FUNCT__
1165 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1166 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1167 {
1168   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1169   PetscErrorCode ierr;
1170   PetscTruth     merged;
1171 
1172   PetscFunctionBegin;
1173   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1174   /* do nondiagonal part */
1175   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1176   if (!merged) {
1177     /* send it on its way */
1178     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1179     /* do local part */
1180     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1181     /* receive remote parts: note this assumes the values are not actually */
1182     /* inserted in yy until the next line */
1183     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1184   } else {
1185     /* do local part */
1186     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1187     /* send it on its way */
1188     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1189     /* values actually were received in the Begin() but we need to call this nop */
1190     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1191   }
1192   PetscFunctionReturn(0);
1193 }
1194 
1195 #undef __FUNCT__
1196 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1197 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1198 {
1199   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1200   PetscErrorCode ierr;
1201 
1202   PetscFunctionBegin;
1203   /* do nondiagonal part */
1204   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1205   /* send it on its way */
1206   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1207   /* do local part */
1208   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1209   /* receive remote parts: note this assumes the values are not actually */
1210   /* inserted in yy until the next line, which is true for my implementation*/
1211   /* but is not perhaps always true. */
1212   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1213   PetscFunctionReturn(0);
1214 }
1215 
1216 /*
1217   This only works correctly for square matrices where the subblock A->A is the
1218    diagonal block
1219 */
1220 #undef __FUNCT__
1221 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1222 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1223 {
1224   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1225   PetscErrorCode ierr;
1226 
1227   PetscFunctionBegin;
1228   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1229   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1230   PetscFunctionReturn(0);
1231 }
1232 
1233 #undef __FUNCT__
1234 #define __FUNCT__ "MatScale_MPIBAIJ"
1235 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1236 {
1237   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1238   PetscErrorCode ierr;
1239 
1240   PetscFunctionBegin;
1241   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1242   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1243   PetscFunctionReturn(0);
1244 }
1245 
1246 #undef __FUNCT__
1247 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1248 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1249 {
1250   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1251   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1252   PetscErrorCode ierr;
1253   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1254   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1255   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1256 
1257   PetscFunctionBegin;
1258   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1259   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1260   mat->getrowactive = PETSC_TRUE;
1261 
1262   if (!mat->rowvalues && (idx || v)) {
1263     /*
1264         allocate enough space to hold information from the longest row.
1265     */
1266     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1267     PetscInt     max = 1,mbs = mat->mbs,tmp;
1268     for (i=0; i<mbs; i++) {
1269       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1270       if (max < tmp) { max = tmp; }
1271     }
1272     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1273   }
1274   lrow = row - brstart;
1275 
1276   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1277   if (!v)   {pvA = 0; pvB = 0;}
1278   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1279   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1280   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1281   nztot = nzA + nzB;
1282 
1283   cmap  = mat->garray;
1284   if (v  || idx) {
1285     if (nztot) {
1286       /* Sort by increasing column numbers, assuming A and B already sorted */
1287       PetscInt imark = -1;
1288       if (v) {
1289         *v = v_p = mat->rowvalues;
1290         for (i=0; i<nzB; i++) {
1291           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1292           else break;
1293         }
1294         imark = i;
1295         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1296         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1297       }
1298       if (idx) {
1299         *idx = idx_p = mat->rowindices;
1300         if (imark > -1) {
1301           for (i=0; i<imark; i++) {
1302             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1303           }
1304         } else {
1305           for (i=0; i<nzB; i++) {
1306             if (cmap[cworkB[i]/bs] < cstart)
1307               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1308             else break;
1309           }
1310           imark = i;
1311         }
1312         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1313         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1314       }
1315     } else {
1316       if (idx) *idx = 0;
1317       if (v)   *v   = 0;
1318     }
1319   }
1320   *nz = nztot;
1321   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1322   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1323   PetscFunctionReturn(0);
1324 }
1325 
1326 #undef __FUNCT__
1327 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1328 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1329 {
1330   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1331 
1332   PetscFunctionBegin;
1333   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1334   baij->getrowactive = PETSC_FALSE;
1335   PetscFunctionReturn(0);
1336 }
1337 
1338 #undef __FUNCT__
1339 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1340 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1341 {
1342   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1343   PetscErrorCode ierr;
1344 
1345   PetscFunctionBegin;
1346   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1347   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1348   PetscFunctionReturn(0);
1349 }
1350 
1351 #undef __FUNCT__
1352 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1353 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1354 {
1355   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1356   Mat            A = a->A,B = a->B;
1357   PetscErrorCode ierr;
1358   PetscReal      isend[5],irecv[5];
1359 
1360   PetscFunctionBegin;
1361   info->block_size     = (PetscReal)matin->rmap->bs;
1362   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1363   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1364   isend[3] = info->memory;  isend[4] = info->mallocs;
1365   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1366   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1367   isend[3] += info->memory;  isend[4] += info->mallocs;
1368   if (flag == MAT_LOCAL) {
1369     info->nz_used      = isend[0];
1370     info->nz_allocated = isend[1];
1371     info->nz_unneeded  = isend[2];
1372     info->memory       = isend[3];
1373     info->mallocs      = isend[4];
1374   } else if (flag == MAT_GLOBAL_MAX) {
1375     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1376     info->nz_used      = irecv[0];
1377     info->nz_allocated = irecv[1];
1378     info->nz_unneeded  = irecv[2];
1379     info->memory       = irecv[3];
1380     info->mallocs      = irecv[4];
1381   } else if (flag == MAT_GLOBAL_SUM) {
1382     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1383     info->nz_used      = irecv[0];
1384     info->nz_allocated = irecv[1];
1385     info->nz_unneeded  = irecv[2];
1386     info->memory       = irecv[3];
1387     info->mallocs      = irecv[4];
1388   } else {
1389     SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1390   }
1391   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1392   info->fill_ratio_needed = 0;
1393   info->factor_mallocs    = 0;
1394   PetscFunctionReturn(0);
1395 }
1396 
1397 #undef __FUNCT__
1398 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1399 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscTruth flg)
1400 {
1401   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1402   PetscErrorCode ierr;
1403 
1404   PetscFunctionBegin;
1405   switch (op) {
1406   case MAT_NEW_NONZERO_LOCATIONS:
1407   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1408   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1409   case MAT_KEEP_NONZERO_PATTERN:
1410   case MAT_NEW_NONZERO_LOCATION_ERR:
1411     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1412     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1413     break;
1414   case MAT_ROW_ORIENTED:
1415     a->roworiented = flg;
1416     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1417     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1418     break;
1419   case MAT_NEW_DIAGONALS:
1420     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1421     break;
1422   case MAT_IGNORE_OFF_PROC_ENTRIES:
1423     a->donotstash = flg;
1424     break;
1425   case MAT_USE_HASH_TABLE:
1426     a->ht_flag = flg;
1427     break;
1428   case MAT_SYMMETRIC:
1429   case MAT_STRUCTURALLY_SYMMETRIC:
1430   case MAT_HERMITIAN:
1431   case MAT_SYMMETRY_ETERNAL:
1432     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1433     break;
1434   default:
1435     SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op);
1436   }
1437   PetscFunctionReturn(0);
1438 }
1439 
1440 #undef __FUNCT__
1441 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1442 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1443 {
1444   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1445   Mat_SeqBAIJ    *Aloc;
1446   Mat            B;
1447   PetscErrorCode ierr;
1448   PetscInt       M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1449   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1450   MatScalar      *a;
1451 
1452   PetscFunctionBegin;
1453   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1454   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1455     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1456     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1457     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1458     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1459   } else {
1460     B = *matout;
1461   }
1462 
1463   /* copy over the A part */
1464   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1465   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1466   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1467 
1468   for (i=0; i<mbs; i++) {
1469     rvals[0] = bs*(baij->rstartbs + i);
1470     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1471     for (j=ai[i]; j<ai[i+1]; j++) {
1472       col = (baij->cstartbs+aj[j])*bs;
1473       for (k=0; k<bs; k++) {
1474         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1475         col++; a += bs;
1476       }
1477     }
1478   }
1479   /* copy over the B part */
1480   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1481   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1482   for (i=0; i<mbs; i++) {
1483     rvals[0] = bs*(baij->rstartbs + i);
1484     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1485     for (j=ai[i]; j<ai[i+1]; j++) {
1486       col = baij->garray[aj[j]]*bs;
1487       for (k=0; k<bs; k++) {
1488         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1489         col++; a += bs;
1490       }
1491     }
1492   }
1493   ierr = PetscFree(rvals);CHKERRQ(ierr);
1494   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1495   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1496 
1497   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1498     *matout = B;
1499   } else {
1500     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1501   }
1502   PetscFunctionReturn(0);
1503 }
1504 
1505 #undef __FUNCT__
1506 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1507 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1508 {
1509   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1510   Mat            a = baij->A,b = baij->B;
1511   PetscErrorCode ierr;
1512   PetscInt       s1,s2,s3;
1513 
1514   PetscFunctionBegin;
1515   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1516   if (rr) {
1517     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1518     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1519     /* Overlap communication with computation. */
1520     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1521   }
1522   if (ll) {
1523     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1524     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1525     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1526   }
1527   /* scale  the diagonal block */
1528   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1529 
1530   if (rr) {
1531     /* Do a scatter end and then right scale the off-diagonal block */
1532     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1533     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1534   }
1535 
1536   PetscFunctionReturn(0);
1537 }
1538 
1539 #undef __FUNCT__
1540 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1541 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
1542 {
1543   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1544   PetscErrorCode ierr;
1545   PetscMPIInt    imdex,size = l->size,n,rank = l->rank;
1546   PetscInt       i,*owners = A->rmap->range;
1547   PetscInt       *nprocs,j,idx,nsends,row;
1548   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
1549   PetscInt       *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1550   PetscInt       *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1551   MPI_Comm       comm = ((PetscObject)A)->comm;
1552   MPI_Request    *send_waits,*recv_waits;
1553   MPI_Status     recv_status,*send_status;
1554 #if defined(PETSC_DEBUG)
1555   PetscTruth     found = PETSC_FALSE;
1556 #endif
1557 
1558   PetscFunctionBegin;
1559   /*  first count number of contributors to each processor */
1560   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1561   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1562   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1563   j = 0;
1564   for (i=0; i<N; i++) {
1565     if (lastidx > (idx = rows[i])) j = 0;
1566     lastidx = idx;
1567     for (; j<size; j++) {
1568       if (idx >= owners[j] && idx < owners[j+1]) {
1569         nprocs[2*j]++;
1570         nprocs[2*j+1] = 1;
1571         owner[i] = j;
1572 #if defined(PETSC_DEBUG)
1573         found = PETSC_TRUE;
1574 #endif
1575         break;
1576       }
1577     }
1578 #if defined(PETSC_DEBUG)
1579     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1580     found = PETSC_FALSE;
1581 #endif
1582   }
1583   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1584 
1585   if (A->nooffproczerorows) {
1586     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
1587     nrecvs = nsends;
1588     nmax   = N;
1589   } else {
1590     /* inform other processors of number of messages and max length*/
1591     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1592   }
1593 
1594   /* post receives:   */
1595   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1596   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1597   for (i=0; i<nrecvs; i++) {
1598     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1599   }
1600 
1601   /* do sends:
1602      1) starts[i] gives the starting index in svalues for stuff going to
1603      the ith processor
1604   */
1605   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1606   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1607   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1608   starts[0]  = 0;
1609   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1610   for (i=0; i<N; i++) {
1611     svalues[starts[owner[i]]++] = rows[i];
1612   }
1613 
1614   starts[0] = 0;
1615   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1616   count = 0;
1617   for (i=0; i<size; i++) {
1618     if (nprocs[2*i+1]) {
1619       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1620     }
1621   }
1622   ierr = PetscFree(starts);CHKERRQ(ierr);
1623 
1624   base = owners[rank];
1625 
1626   /*  wait on receives */
1627   ierr   = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr);
1628   count  = nrecvs;
1629   slen = 0;
1630   while (count) {
1631     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1632     /* unpack receives into our local space */
1633     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1634     source[imdex]  = recv_status.MPI_SOURCE;
1635     lens[imdex]    = n;
1636     slen          += n;
1637     count--;
1638   }
1639   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1640 
1641   /* move the data into the send scatter */
1642   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1643   count = 0;
1644   for (i=0; i<nrecvs; i++) {
1645     values = rvalues + i*nmax;
1646     for (j=0; j<lens[i]; j++) {
1647       lrows[count++] = values[j] - base;
1648     }
1649   }
1650   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1651   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1652   ierr = PetscFree(owner);CHKERRQ(ierr);
1653   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1654 
1655   /* actually zap the local rows */
1656   /*
1657         Zero the required rows. If the "diagonal block" of the matrix
1658      is square and the user wishes to set the diagonal we use separate
1659      code so that MatSetValues() is not called for each diagonal allocating
1660      new memory, thus calling lots of mallocs and slowing things down.
1661 
1662   */
1663   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1664   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr);
1665   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1666     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr);
1667   } else if (diag != 0.0) {
1668     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1669     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1670        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1671     for (i=0; i<slen; i++) {
1672       row  = lrows[i] + rstart_bs;
1673       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1674     }
1675     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1676     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1677   } else {
1678     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1679   }
1680 
1681   ierr = PetscFree(lrows);CHKERRQ(ierr);
1682 
1683   /* wait on sends */
1684   if (nsends) {
1685     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1686     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1687     ierr = PetscFree(send_status);CHKERRQ(ierr);
1688   }
1689   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1690   ierr = PetscFree(svalues);CHKERRQ(ierr);
1691 
1692   PetscFunctionReturn(0);
1693 }
1694 
1695 #undef __FUNCT__
1696 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1697 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1698 {
1699   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1700   PetscErrorCode ierr;
1701 
1702   PetscFunctionBegin;
1703   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1704   PetscFunctionReturn(0);
1705 }
1706 
1707 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1708 
1709 #undef __FUNCT__
1710 #define __FUNCT__ "MatEqual_MPIBAIJ"
1711 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1712 {
1713   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1714   Mat            a,b,c,d;
1715   PetscTruth     flg;
1716   PetscErrorCode ierr;
1717 
1718   PetscFunctionBegin;
1719   a = matA->A; b = matA->B;
1720   c = matB->A; d = matB->B;
1721 
1722   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1723   if (flg) {
1724     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1725   }
1726   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1727   PetscFunctionReturn(0);
1728 }
1729 
1730 #undef __FUNCT__
1731 #define __FUNCT__ "MatCopy_MPIBAIJ"
1732 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1733 {
1734   PetscErrorCode ierr;
1735   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1736   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1737 
1738   PetscFunctionBegin;
1739   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1740   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1741     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1742   } else {
1743     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1744     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1745   }
1746   PetscFunctionReturn(0);
1747 }
1748 
1749 #undef __FUNCT__
1750 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1751 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1752 {
1753   PetscErrorCode ierr;
1754 
1755   PetscFunctionBegin;
1756   ierr =  MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1757   PetscFunctionReturn(0);
1758 }
1759 
1760 #undef __FUNCT__
1761 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1762 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1763 {
1764   PetscErrorCode ierr;
1765   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1766   PetscBLASInt   bnz,one=1;
1767   Mat_SeqBAIJ    *x,*y;
1768 
1769   PetscFunctionBegin;
1770   if (str == SAME_NONZERO_PATTERN) {
1771     PetscScalar alpha = a;
1772     x = (Mat_SeqBAIJ *)xx->A->data;
1773     y = (Mat_SeqBAIJ *)yy->A->data;
1774     bnz = PetscBLASIntCast(x->nz);
1775     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1776     x = (Mat_SeqBAIJ *)xx->B->data;
1777     y = (Mat_SeqBAIJ *)yy->B->data;
1778     bnz = PetscBLASIntCast(x->nz);
1779     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1780   } else {
1781     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1782   }
1783   PetscFunctionReturn(0);
1784 }
1785 
1786 #undef __FUNCT__
1787 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ"
1788 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs)
1789 {
1790   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1791   PetscInt rbs,cbs;
1792   PetscErrorCode ierr;
1793 
1794   PetscFunctionBegin;
1795   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1796   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1797   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
1798   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
1799   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
1800   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
1801   PetscFunctionReturn(0);
1802 }
1803 
1804 #undef __FUNCT__
1805 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1806 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1807 {
1808   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1809   PetscErrorCode ierr;
1810 
1811   PetscFunctionBegin;
1812   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1813   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1814   PetscFunctionReturn(0);
1815 }
1816 
1817 #undef __FUNCT__
1818 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
1819 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1820 {
1821   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1822   PetscErrorCode ierr;
1823 
1824   PetscFunctionBegin;
1825   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1826   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1827   PetscFunctionReturn(0);
1828 }
1829 
1830 #undef __FUNCT__
1831 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
1832 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1833 {
1834   PetscErrorCode ierr;
1835   IS             iscol_local;
1836   PetscInt       csize;
1837 
1838   PetscFunctionBegin;
1839   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1840   if (call == MAT_REUSE_MATRIX) {
1841     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1842     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1843   } else {
1844     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1845   }
1846   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1847   if (call == MAT_INITIAL_MATRIX) {
1848     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1849     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
1850   }
1851   PetscFunctionReturn(0);
1852 }
1853 
1854 #undef __FUNCT__
1855 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
1856 /*
1857     Not great since it makes two copies of the submatrix, first an SeqBAIJ
1858   in local and then by concatenating the local matrices the end result.
1859   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
1860 */
1861 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
1862 {
1863   PetscErrorCode ierr;
1864   PetscMPIInt    rank,size;
1865   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
1866   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
1867   Mat            *local,M,Mreuse;
1868   MatScalar      *vwork,*aa;
1869   MPI_Comm       comm = ((PetscObject)mat)->comm;
1870   Mat_SeqBAIJ    *aij;
1871 
1872 
1873   PetscFunctionBegin;
1874   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1875   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1876 
1877   if (call ==  MAT_REUSE_MATRIX) {
1878     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
1879     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1880     local = &Mreuse;
1881     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
1882   } else {
1883     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
1884     Mreuse = *local;
1885     ierr   = PetscFree(local);CHKERRQ(ierr);
1886   }
1887 
1888   /*
1889       m - number of local rows
1890       n - number of columns (same on all processors)
1891       rstart - first row in new global matrix generated
1892   */
1893   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
1894   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1895   m    = m/bs;
1896   n    = n/bs;
1897 
1898   if (call == MAT_INITIAL_MATRIX) {
1899     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
1900     ii  = aij->i;
1901     jj  = aij->j;
1902 
1903     /*
1904         Determine the number of non-zeros in the diagonal and off-diagonal
1905         portions of the matrix in order to do correct preallocation
1906     */
1907 
1908     /* first get start and end of "diagonal" columns */
1909     if (csize == PETSC_DECIDE) {
1910       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
1911       if (mglobal == n*bs) { /* square matrix */
1912 	nlocal = m;
1913       } else {
1914         nlocal = n/size + ((n % size) > rank);
1915       }
1916     } else {
1917       nlocal = csize/bs;
1918     }
1919     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1920     rstart = rend - nlocal;
1921     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
1922 
1923     /* next, compute all the lengths */
1924     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
1925     olens = dlens + m;
1926     for (i=0; i<m; i++) {
1927       jend = ii[i+1] - ii[i];
1928       olen = 0;
1929       dlen = 0;
1930       for (j=0; j<jend; j++) {
1931         if (*jj < rstart || *jj >= rend) olen++;
1932         else dlen++;
1933         jj++;
1934       }
1935       olens[i] = olen;
1936       dlens[i] = dlen;
1937     }
1938     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
1939     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
1940     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
1941     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
1942     ierr = PetscFree(dlens);CHKERRQ(ierr);
1943   } else {
1944     PetscInt ml,nl;
1945 
1946     M = *newmat;
1947     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
1948     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
1949     ierr = MatZeroEntries(M);CHKERRQ(ierr);
1950     /*
1951          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
1952        rather than the slower MatSetValues().
1953     */
1954     M->was_assembled = PETSC_TRUE;
1955     M->assembled     = PETSC_FALSE;
1956   }
1957   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1958   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
1959   aij = (Mat_SeqBAIJ*)(Mreuse)->data;
1960   ii  = aij->i;
1961   jj  = aij->j;
1962   aa  = aij->a;
1963   for (i=0; i<m; i++) {
1964     row   = rstart/bs + i;
1965     nz    = ii[i+1] - ii[i];
1966     cwork = jj;     jj += nz;
1967     vwork = aa;     aa += nz;
1968     ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
1969   }
1970 
1971   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1972   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1973   *newmat = M;
1974 
1975   /* save submatrix used in processor for next request */
1976   if (call ==  MAT_INITIAL_MATRIX) {
1977     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
1978     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
1979   }
1980 
1981   PetscFunctionReturn(0);
1982 }
1983 
1984 #undef __FUNCT__
1985 #define __FUNCT__ "MatPermute_MPIBAIJ"
1986 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
1987 {
1988   MPI_Comm       comm,pcomm;
1989   PetscInt       first,local_size,nrows;
1990   const PetscInt *rows;
1991   PetscMPIInt    size;
1992   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1993   PetscErrorCode ierr;
1994 
1995   PetscFunctionBegin;
1996   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1997   /* make a collective version of 'rowp' */
1998   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1999   if (pcomm==comm) {
2000     crowp = rowp;
2001   } else {
2002     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2003     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2004     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
2005     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2006   }
2007   /* collect the global row permutation and invert it */
2008   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
2009   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
2010   if (pcomm!=comm) {
2011     ierr = ISDestroy(crowp);CHKERRQ(ierr);
2012   }
2013   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
2014   /* get the local target indices */
2015   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
2016   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
2017   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
2018   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
2019   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
2020   ierr = ISDestroy(irowp);CHKERRQ(ierr);
2021   /* the column permutation is so much easier;
2022      make a local version of 'colp' and invert it */
2023   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2024   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2025   if (size==1) {
2026     lcolp = colp;
2027   } else {
2028     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
2029     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
2030     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
2031   }
2032   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2033   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
2034   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
2035   if (size>1) {
2036     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
2037     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
2038   }
2039   /* now we just get the submatrix */
2040   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2041   /* clean up */
2042   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
2043   ierr = ISDestroy(icolp);CHKERRQ(ierr);
2044   PetscFunctionReturn(0);
2045 }
2046 
2047 #undef __FUNCT__
2048 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2049 PetscErrorCode PETSCMAT_DLLEXPORT MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2050 {
2051   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*) mat->data;
2052   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
2053 
2054   PetscFunctionBegin;
2055   if (nghosts) { *nghosts = B->nbs;}
2056   if (ghosts) {*ghosts = baij->garray;}
2057   PetscFunctionReturn(0);
2058 }
2059 
2060 EXTERN PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat);
2061 
2062 #undef __FUNCT__
2063 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ"
2064 /*
2065     This routine is almost identical to MatFDColoringCreate_MPIBAIJ()!
2066 */
2067 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
2068 {
2069   Mat_MPIBAIJ            *baij = (Mat_MPIBAIJ*)mat->data;
2070   PetscErrorCode        ierr;
2071   PetscMPIInt           size,*ncolsonproc,*disp,nn;
2072   PetscInt              bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col;
2073   const PetscInt        *is;
2074   PetscInt              nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj;
2075   PetscInt              *rowhit,M,cstart,cend,colb;
2076   PetscInt              *columnsforrow,l;
2077   IS                    *isa;
2078   PetscTruth             done,flg;
2079   ISLocalToGlobalMapping map = mat->bmapping;
2080   PetscInt               *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype;
2081 
2082   PetscFunctionBegin;
2083   if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();");
2084   if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock");
2085 
2086   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
2087   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2088   M                = mat->rmap->n/bs;
2089   cstart           = mat->cmap->rstart/bs;
2090   cend             = mat->cmap->rend/bs;
2091   c->M             = mat->rmap->N/bs;  /* set the global rows and columns and local rows */
2092   c->N             = mat->cmap->N/bs;
2093   c->m             = mat->rmap->n/bs;
2094   c->rstart        = mat->rmap->rstart/bs;
2095 
2096   c->ncolors       = nis;
2097   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
2098   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
2099   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
2100   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
2101   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
2102   ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
2103 
2104   /* Allow access to data structures of local part of matrix */
2105   if (!baij->colmap) {
2106     ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
2107   }
2108   ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2109   ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2110 
2111   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
2112   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
2113 
2114   for (i=0; i<nis; i++) {
2115     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
2116     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
2117     c->ncolumns[i] = n;
2118     if (n) {
2119       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
2120       ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr);
2121       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
2122     } else {
2123       c->columns[i]  = 0;
2124     }
2125 
2126     if (ctype == IS_COLORING_GLOBAL){
2127       /* Determine the total (parallel) number of columns of this color */
2128       ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
2129       ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr);
2130 
2131       nn   = PetscMPIIntCast(n);
2132       ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2133       nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];}
2134       if (!nctot) {
2135         ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
2136       }
2137 
2138       disp[0] = 0;
2139       for (j=1; j<size; j++) {
2140         disp[j] = disp[j-1] + ncolsonproc[j-1];
2141       }
2142 
2143       /* Get complete list of columns for color on each processor */
2144       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2145       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2146       ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr);
2147     } else if (ctype == IS_COLORING_GHOSTED){
2148       /* Determine local number of columns of this color on this process, including ghost points */
2149       nctot = n;
2150       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2151       ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
2152     } else {
2153       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
2154     }
2155 
2156     /*
2157        Mark all rows affect by these columns
2158     */
2159     /* Temporary option to allow for debugging/testing */
2160     flg  = PETSC_FALSE;
2161     ierr = PetscOptionsGetTruth(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr);
2162     if (!flg) {/*-----------------------------------------------------------------------------*/
2163       /* crude, fast version */
2164       ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
2165       /* loop over columns*/
2166       for (j=0; j<nctot; j++) {
2167         if (ctype == IS_COLORING_GHOSTED) {
2168           col = ltog[cols[j]];
2169         } else {
2170           col  = cols[j];
2171         }
2172         if (col >= cstart && col < cend) {
2173           /* column is in diagonal block of matrix */
2174           rows = A_cj + A_ci[col-cstart];
2175           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2176         } else {
2177 #if defined (PETSC_USE_CTABLE)
2178           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2179 	  colb --;
2180 #else
2181           colb = baij->colmap[col] - 1;
2182 #endif
2183           if (colb == -1) {
2184             m = 0;
2185           } else {
2186             colb = colb/bs;
2187             rows = B_cj + B_ci[colb];
2188             m    = B_ci[colb+1] - B_ci[colb];
2189           }
2190         }
2191         /* loop over columns marking them in rowhit */
2192         for (k=0; k<m; k++) {
2193           rowhit[*rows++] = col + 1;
2194         }
2195       }
2196 
2197       /* count the number of hits */
2198       nrows = 0;
2199       for (j=0; j<M; j++) {
2200         if (rowhit[j]) nrows++;
2201       }
2202       c->nrows[i]         = nrows;
2203       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2204       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2205       ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2206       nrows = 0;
2207       for (j=0; j<M; j++) {
2208         if (rowhit[j]) {
2209           c->rows[i][nrows]           = j;
2210           c->columnsforrow[i][nrows] = rowhit[j] - 1;
2211           nrows++;
2212         }
2213       }
2214     } else {/*-------------------------------------------------------------------------------*/
2215       /* slow version, using rowhit as a linked list */
2216       PetscInt currentcol,fm,mfm;
2217       rowhit[M] = M;
2218       nrows     = 0;
2219       /* loop over columns*/
2220       for (j=0; j<nctot; j++) {
2221         if (ctype == IS_COLORING_GHOSTED) {
2222           col = ltog[cols[j]];
2223         } else {
2224           col  = cols[j];
2225         }
2226         if (col >= cstart && col < cend) {
2227           /* column is in diagonal block of matrix */
2228           rows = A_cj + A_ci[col-cstart];
2229           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2230         } else {
2231 #if defined (PETSC_USE_CTABLE)
2232           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2233           colb --;
2234 #else
2235           colb = baij->colmap[col] - 1;
2236 #endif
2237           if (colb == -1) {
2238             m = 0;
2239           } else {
2240             colb = colb/bs;
2241             rows = B_cj + B_ci[colb];
2242             m    = B_ci[colb+1] - B_ci[colb];
2243           }
2244         }
2245 
2246         /* loop over columns marking them in rowhit */
2247         fm    = M; /* fm points to first entry in linked list */
2248         for (k=0; k<m; k++) {
2249           currentcol = *rows++;
2250 	  /* is it already in the list? */
2251           do {
2252             mfm  = fm;
2253             fm   = rowhit[fm];
2254           } while (fm < currentcol);
2255           /* not in list so add it */
2256           if (fm != currentcol) {
2257             nrows++;
2258             columnsforrow[currentcol] = col;
2259             /* next three lines insert new entry into linked list */
2260             rowhit[mfm]               = currentcol;
2261             rowhit[currentcol]        = fm;
2262             fm                        = currentcol;
2263             /* fm points to present position in list since we know the columns are sorted */
2264           } else {
2265             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
2266           }
2267         }
2268       }
2269       c->nrows[i]         = nrows;
2270       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2271       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2272       ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2273       /* now store the linked list of rows into c->rows[i] */
2274       nrows = 0;
2275       fm    = rowhit[M];
2276       do {
2277         c->rows[i][nrows]            = fm;
2278         c->columnsforrow[i][nrows++] = columnsforrow[fm];
2279         fm                           = rowhit[fm];
2280       } while (fm < M);
2281     } /* ---------------------------------------------------------------------------------------*/
2282     ierr = PetscFree(cols);CHKERRQ(ierr);
2283   }
2284 
2285   /* Optimize by adding the vscale, and scaleforrow[][] fields */
2286   /*
2287        vscale will contain the "diagonal" on processor scalings followed by the off processor
2288   */
2289   if (ctype == IS_COLORING_GLOBAL) {
2290     PetscInt *garray;
2291     ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2292     for (i=0; i<baij->B->cmap->n/bs; i++) {
2293       for (j=0; j<bs; j++) {
2294         garray[i*bs+j] = bs*baij->garray[i]+j;
2295       }
2296     }
2297     ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr);
2298     ierr = PetscFree(garray);CHKERRQ(ierr);
2299     CHKMEMQ;
2300     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2301     for (k=0; k<c->ncolors; k++) {
2302       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2303       for (l=0; l<c->nrows[k]; l++) {
2304         col = c->columnsforrow[k][l];
2305         if (col >= cstart && col < cend) {
2306           /* column is in diagonal block of matrix */
2307           colb = col - cstart;
2308         } else {
2309           /* column  is in "off-processor" part */
2310 #if defined (PETSC_USE_CTABLE)
2311           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2312           colb --;
2313 #else
2314           colb = baij->colmap[col] - 1;
2315 #endif
2316           colb = colb/bs;
2317           colb += cend - cstart;
2318         }
2319         c->vscaleforrow[k][l] = colb;
2320       }
2321     }
2322   } else if (ctype == IS_COLORING_GHOSTED) {
2323     /* Get gtol mapping */
2324     PetscInt N = mat->cmap->N, *gtol;
2325     ierr = PetscMalloc((N+1)*sizeof(PetscInt),&gtol);CHKERRQ(ierr);
2326     for (i=0; i<N; i++) gtol[i] = -1;
2327     for (i=0; i<map->n; i++) gtol[ltog[i]] = i;
2328 
2329     c->vscale = 0; /* will be created in MatFDColoringApply() */
2330     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2331     for (k=0; k<c->ncolors; k++) {
2332       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2333       for (l=0; l<c->nrows[k]; l++) {
2334         col = c->columnsforrow[k][l];      /* global column index */
2335         c->vscaleforrow[k][l] = gtol[col]; /* local column index */
2336       }
2337     }
2338     ierr = PetscFree(gtol);CHKERRQ(ierr);
2339   }
2340   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
2341 
2342   ierr = PetscFree(rowhit);CHKERRQ(ierr);
2343   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
2344   ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2345   ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2346     CHKMEMQ;
2347   PetscFunctionReturn(0);
2348 }
2349 
2350 #undef __FUNCT__
2351 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ"
2352 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat)
2353 {
2354   Mat            B;
2355   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
2356   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2357   Mat_SeqAIJ     *b;
2358   PetscErrorCode ierr;
2359   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2360   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2361   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2362 
2363   PetscFunctionBegin;
2364   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2365   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2366 
2367   /* ----------------------------------------------------------------
2368      Tell every processor the number of nonzeros per row
2369   */
2370   ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
2371   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2372     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2373   }
2374   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2375   ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr);
2376   displs     = recvcounts + size;
2377   for (i=0; i<size; i++) {
2378     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2379     displs[i]     = A->rmap->range[i]/bs;
2380   }
2381 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2382   ierr  = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2383 #else
2384   ierr  = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2385 #endif
2386   /* ---------------------------------------------------------------
2387      Create the sequential matrix of the same type as the local block diagonal
2388   */
2389   ierr  = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2390   ierr  = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2391   ierr  = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2392   ierr  = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2393   b = (Mat_SeqAIJ *)B->data;
2394 
2395   /*--------------------------------------------------------------------
2396     Copy my part of matrix column indices over
2397   */
2398   sendcount  = ad->nz + bd->nz;
2399   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2400   a_jsendbuf = ad->j;
2401   b_jsendbuf = bd->j;
2402   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2403   cnt        = 0;
2404   for (i=0; i<n; i++) {
2405 
2406     /* put in lower diagonal portion */
2407     m = bd->i[i+1] - bd->i[i];
2408     while (m > 0) {
2409       /* is it above diagonal (in bd (compressed) numbering) */
2410       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2411       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2412       m--;
2413     }
2414 
2415     /* put in diagonal portion */
2416     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2417       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2418     }
2419 
2420     /* put in upper diagonal portion */
2421     while (m-- > 0) {
2422       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2423     }
2424   }
2425   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2426 
2427   /*--------------------------------------------------------------------
2428     Gather all column indices to all processors
2429   */
2430   for (i=0; i<size; i++) {
2431     recvcounts[i] = 0;
2432     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2433       recvcounts[i] += lens[j];
2434     }
2435   }
2436   displs[0]  = 0;
2437   for (i=1; i<size; i++) {
2438     displs[i] = displs[i-1] + recvcounts[i-1];
2439   }
2440 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2441   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2442 #else
2443   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2444 #endif
2445   /*--------------------------------------------------------------------
2446     Assemble the matrix into useable form (note numerical values not yet set)
2447   */
2448   /* set the b->ilen (length of each row) values */
2449   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2450   /* set the b->i indices */
2451   b->i[0] = 0;
2452   for (i=1; i<=A->rmap->N/bs; i++) {
2453     b->i[i] = b->i[i-1] + lens[i-1];
2454   }
2455   ierr = PetscFree(lens);CHKERRQ(ierr);
2456   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2457   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2458   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2459 
2460   if (A->symmetric){
2461     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2462   } else if (A->hermitian) {
2463     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2464   } else if (A->structurally_symmetric) {
2465     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2466   }
2467   *newmat = B;
2468   PetscFunctionReturn(0);
2469 }
2470 
2471 #undef __FUNCT__
2472 #define __FUNCT__ "MatSOR_MPIBAIJ"
2473 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2474 {
2475   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2476   PetscErrorCode ierr;
2477   Vec            bb1 = 0;
2478 
2479   PetscFunctionBegin;
2480   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2481     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2482   }
2483 
2484   if (flag == SOR_APPLY_UPPER) {
2485     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2486     PetscFunctionReturn(0);
2487   }
2488 
2489   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2490     if (flag & SOR_ZERO_INITIAL_GUESS) {
2491       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2492       its--;
2493     }
2494 
2495     while (its--) {
2496       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2497       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2498 
2499       /* update rhs: bb1 = bb - B*x */
2500       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2501       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2502 
2503       /* local sweep */
2504       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2505     }
2506   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
2507     if (flag & SOR_ZERO_INITIAL_GUESS) {
2508       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2509       its--;
2510     }
2511     while (its--) {
2512       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2513       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2514 
2515       /* update rhs: bb1 = bb - B*x */
2516       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2517       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2518 
2519       /* local sweep */
2520       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2521     }
2522   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
2523     if (flag & SOR_ZERO_INITIAL_GUESS) {
2524       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2525       its--;
2526     }
2527     while (its--) {
2528       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2529       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2530 
2531       /* update rhs: bb1 = bb - B*x */
2532       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2533       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2534 
2535       /* local sweep */
2536       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2537     }
2538   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2539 
2540   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
2541   PetscFunctionReturn(0);
2542 }
2543 
2544 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2545 
2546 
2547 /* -------------------------------------------------------------------*/
2548 static struct _MatOps MatOps_Values = {
2549        MatSetValues_MPIBAIJ,
2550        MatGetRow_MPIBAIJ,
2551        MatRestoreRow_MPIBAIJ,
2552        MatMult_MPIBAIJ,
2553 /* 4*/ MatMultAdd_MPIBAIJ,
2554        MatMultTranspose_MPIBAIJ,
2555        MatMultTransposeAdd_MPIBAIJ,
2556        0,
2557        0,
2558        0,
2559 /*10*/ 0,
2560        0,
2561        0,
2562        MatSOR_MPIBAIJ,
2563        MatTranspose_MPIBAIJ,
2564 /*15*/ MatGetInfo_MPIBAIJ,
2565        MatEqual_MPIBAIJ,
2566        MatGetDiagonal_MPIBAIJ,
2567        MatDiagonalScale_MPIBAIJ,
2568        MatNorm_MPIBAIJ,
2569 /*20*/ MatAssemblyBegin_MPIBAIJ,
2570        MatAssemblyEnd_MPIBAIJ,
2571        MatSetOption_MPIBAIJ,
2572        MatZeroEntries_MPIBAIJ,
2573 /*24*/ MatZeroRows_MPIBAIJ,
2574        0,
2575        0,
2576        0,
2577        0,
2578 /*29*/ MatSetUpPreallocation_MPIBAIJ,
2579        0,
2580        0,
2581        0,
2582        0,
2583 /*34*/ MatDuplicate_MPIBAIJ,
2584        0,
2585        0,
2586        0,
2587        0,
2588 /*39*/ MatAXPY_MPIBAIJ,
2589        MatGetSubMatrices_MPIBAIJ,
2590        MatIncreaseOverlap_MPIBAIJ,
2591        MatGetValues_MPIBAIJ,
2592        MatCopy_MPIBAIJ,
2593 /*44*/ 0,
2594        MatScale_MPIBAIJ,
2595        0,
2596        0,
2597        0,
2598 /*49*/ MatSetBlockSize_MPIBAIJ,
2599        0,
2600        0,
2601        0,
2602        0,
2603 /*54*/ MatFDColoringCreate_MPIBAIJ,
2604        0,
2605        MatSetUnfactored_MPIBAIJ,
2606        MatPermute_MPIBAIJ,
2607        MatSetValuesBlocked_MPIBAIJ,
2608 /*59*/ MatGetSubMatrix_MPIBAIJ,
2609        MatDestroy_MPIBAIJ,
2610        MatView_MPIBAIJ,
2611        0,
2612        0,
2613 /*64*/ 0,
2614        0,
2615        0,
2616        0,
2617        0,
2618 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2619        0,
2620        0,
2621        0,
2622        0,
2623 /*74*/ 0,
2624        MatFDColoringApply_BAIJ,
2625        0,
2626        0,
2627        0,
2628 /*79*/ 0,
2629        0,
2630        0,
2631        0,
2632        MatLoad_MPIBAIJ,
2633 /*84*/ 0,
2634        0,
2635        0,
2636        0,
2637        0,
2638 /*89*/ 0,
2639        0,
2640        0,
2641        0,
2642        0,
2643 /*94*/ 0,
2644        0,
2645        0,
2646        0,
2647        0,
2648 /*99*/ 0,
2649        0,
2650        0,
2651        0,
2652        0,
2653 /*104*/0,
2654        MatRealPart_MPIBAIJ,
2655        MatImaginaryPart_MPIBAIJ,
2656        0,
2657        0,
2658 /*109*/0,
2659        0,
2660        0,
2661        0,
2662        0,
2663 /*114*/MatGetSeqNonzerostructure_MPIBAIJ,
2664        0,
2665        MatGetGhosts_MPIBAIJ,
2666        0,
2667        0,
2668 /*119*/0,
2669        0,
2670        0,
2671        0
2672 };
2673 
2674 EXTERN_C_BEGIN
2675 #undef __FUNCT__
2676 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2677 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
2678 {
2679   PetscFunctionBegin;
2680   *a      = ((Mat_MPIBAIJ *)A->data)->A;
2681   *iscopy = PETSC_FALSE;
2682   PetscFunctionReturn(0);
2683 }
2684 EXTERN_C_END
2685 
2686 EXTERN_C_BEGIN
2687 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2688 EXTERN_C_END
2689 
2690 EXTERN_C_BEGIN
2691 #undef __FUNCT__
2692 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2693 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2694 {
2695   PetscInt       m,rstart,cstart,cend;
2696   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2697   const PetscInt *JJ=0;
2698   PetscScalar    *values=0;
2699   PetscErrorCode ierr;
2700 
2701   PetscFunctionBegin;
2702 
2703   if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2704   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2705   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2706   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2707   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2708   m      = B->rmap->n/bs;
2709   rstart = B->rmap->rstart/bs;
2710   cstart = B->cmap->rstart/bs;
2711   cend   = B->cmap->rend/bs;
2712 
2713   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2714   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
2715   for (i=0; i<m; i++) {
2716     nz = ii[i+1] - ii[i];
2717     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2718     nz_max = PetscMax(nz_max,nz);
2719     JJ  = jj + ii[i];
2720     for (j=0; j<nz; j++) {
2721       if (*JJ >= cstart) break;
2722       JJ++;
2723     }
2724     d = 0;
2725     for (; j<nz; j++) {
2726       if (*JJ++ >= cend) break;
2727       d++;
2728     }
2729     d_nnz[i] = d;
2730     o_nnz[i] = nz - d;
2731   }
2732   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2733   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2734 
2735   values = (PetscScalar*)V;
2736   if (!values) {
2737     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2738     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2739   }
2740   for (i=0; i<m; i++) {
2741     PetscInt          row    = i + rstart;
2742     PetscInt          ncols  = ii[i+1] - ii[i];
2743     const PetscInt    *icols = jj + ii[i];
2744     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2745     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2746   }
2747 
2748   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2749   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2750   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2751 
2752   PetscFunctionReturn(0);
2753 }
2754 EXTERN_C_END
2755 
2756 #undef __FUNCT__
2757 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2758 /*@C
2759    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2760    (the default parallel PETSc format).
2761 
2762    Collective on MPI_Comm
2763 
2764    Input Parameters:
2765 +  A - the matrix
2766 .  i - the indices into j for the start of each local row (starts with zero)
2767 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2768 -  v - optional values in the matrix
2769 
2770    Level: developer
2771 
2772 .keywords: matrix, aij, compressed row, sparse, parallel
2773 
2774 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2775 @*/
2776 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2777 {
2778   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
2779 
2780   PetscFunctionBegin;
2781   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2782   if (f) {
2783     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
2784   }
2785   PetscFunctionReturn(0);
2786 }
2787 
2788 EXTERN_C_BEGIN
2789 #undef __FUNCT__
2790 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2791 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2792 {
2793   Mat_MPIBAIJ    *b;
2794   PetscErrorCode ierr;
2795   PetscInt       i, newbs = PetscAbs(bs);
2796 
2797   PetscFunctionBegin;
2798   if (bs < 0) {
2799     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr);
2800       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2801     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2802     bs   = PetscAbs(bs);
2803   }
2804   if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
2805   bs = newbs;
2806 
2807 
2808   if (bs < 1) SETERRQ(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2809   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2810   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2811   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2812   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2813 
2814   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2815   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2816   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2817   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2818 
2819   if (d_nnz) {
2820     for (i=0; i<B->rmap->n/bs; i++) {
2821       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2822     }
2823   }
2824   if (o_nnz) {
2825     for (i=0; i<B->rmap->n/bs; i++) {
2826       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2827     }
2828   }
2829 
2830   b = (Mat_MPIBAIJ*)B->data;
2831   b->bs2 = bs*bs;
2832   b->mbs = B->rmap->n/bs;
2833   b->nbs = B->cmap->n/bs;
2834   b->Mbs = B->rmap->N/bs;
2835   b->Nbs = B->cmap->N/bs;
2836 
2837   for (i=0; i<=b->size; i++) {
2838     b->rangebs[i] = B->rmap->range[i]/bs;
2839   }
2840   b->rstartbs = B->rmap->rstart/bs;
2841   b->rendbs   = B->rmap->rend/bs;
2842   b->cstartbs = B->cmap->rstart/bs;
2843   b->cendbs   = B->cmap->rend/bs;
2844 
2845   if (!B->preallocated) {
2846     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2847     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2848     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2849     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2850     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2851     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2852     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2853     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2854     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
2855   }
2856 
2857   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2858   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2859   B->preallocated = PETSC_TRUE;
2860   PetscFunctionReturn(0);
2861 }
2862 EXTERN_C_END
2863 
2864 EXTERN_C_BEGIN
2865 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2866 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2867 EXTERN_C_END
2868 
2869 
2870 EXTERN_C_BEGIN
2871 #undef __FUNCT__
2872 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
2873 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj)
2874 {
2875   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2876   PetscErrorCode ierr;
2877   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2878   PetscInt       M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2879   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2880 
2881   PetscFunctionBegin;
2882   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr);
2883   ii[0] = 0;
2884   CHKMEMQ;
2885   for (i=0; i<M; i++) {
2886     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2887     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
2888     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2889     /* remove one from count of matrix has diagonal */
2890     for (j=id[i]; j<id[i+1]; j++) {
2891       if (jd[j] == i) {ii[i+1]--;break;}
2892     }
2893   CHKMEMQ;
2894   }
2895   ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2896   cnt = 0;
2897   for (i=0; i<M; i++) {
2898     for (j=io[i]; j<io[i+1]; j++) {
2899       if (garray[jo[j]] > rstart) break;
2900       jj[cnt++] = garray[jo[j]];
2901   CHKMEMQ;
2902     }
2903     for (k=id[i]; k<id[i+1]; k++) {
2904       if (jd[k] != i) {
2905         jj[cnt++] = rstart + jd[k];
2906   CHKMEMQ;
2907       }
2908     }
2909     for (;j<io[i+1]; j++) {
2910       jj[cnt++] = garray[jo[j]];
2911   CHKMEMQ;
2912     }
2913   }
2914   ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr);
2915   PetscFunctionReturn(0);
2916 }
2917 EXTERN_C_END
2918 
2919 EXTERN_C_BEGIN
2920 #if defined(PETSC_HAVE_MUMPS)
2921 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
2922 #endif
2923 EXTERN_C_END
2924 
2925 /*MC
2926    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2927 
2928    Options Database Keys:
2929 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2930 . -mat_block_size <bs> - set the blocksize used to store the matrix
2931 - -mat_use_hash_table <fact>
2932 
2933   Level: beginner
2934 
2935 .seealso: MatCreateMPIBAIJ
2936 M*/
2937 
2938 EXTERN_C_BEGIN
2939 #undef __FUNCT__
2940 #define __FUNCT__ "MatCreate_MPIBAIJ"
2941 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B)
2942 {
2943   Mat_MPIBAIJ    *b;
2944   PetscErrorCode ierr;
2945   PetscTruth     flg;
2946 
2947   PetscFunctionBegin;
2948   ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
2949   B->data = (void*)b;
2950 
2951 
2952   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2953   B->mapping    = 0;
2954   B->assembled  = PETSC_FALSE;
2955 
2956   B->insertmode = NOT_SET_VALUES;
2957   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
2958   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
2959 
2960   /* build local table of row and column ownerships */
2961   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
2962 
2963   /* build cache for off array entries formed */
2964   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
2965   b->donotstash  = PETSC_FALSE;
2966   b->colmap      = PETSC_NULL;
2967   b->garray      = PETSC_NULL;
2968   b->roworiented = PETSC_TRUE;
2969 
2970   /* stuff used in block assembly */
2971   b->barray       = 0;
2972 
2973   /* stuff used for matrix vector multiply */
2974   b->lvec         = 0;
2975   b->Mvctx        = 0;
2976 
2977   /* stuff for MatGetRow() */
2978   b->rowindices   = 0;
2979   b->rowvalues    = 0;
2980   b->getrowactive = PETSC_FALSE;
2981 
2982   /* hash table stuff */
2983   b->ht           = 0;
2984   b->hd           = 0;
2985   b->ht_size      = 0;
2986   b->ht_flag      = PETSC_FALSE;
2987   b->ht_fact      = 0;
2988   b->ht_total_ct  = 0;
2989   b->ht_insert_ct = 0;
2990 
2991   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
2992     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2993     if (flg) {
2994       PetscReal fact = 1.39;
2995       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2996       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
2997       if (fact <= 1.0) fact = 1.39;
2998       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2999       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3000     }
3001   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3002 
3003 #if defined(PETSC_HAVE_MUMPS)
3004   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr);
3005 #endif
3006   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",
3007                                      "MatConvert_MPIBAIJ_MPIAdj",
3008                                       MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3009   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3010                                      "MatStoreValues_MPIBAIJ",
3011                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3012   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3013                                      "MatRetrieveValues_MPIBAIJ",
3014                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3015   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3016                                      "MatGetDiagonalBlock_MPIBAIJ",
3017                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3018   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
3019                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
3020                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3021   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
3022 				     "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
3023 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3024   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3025                                      "MatDiagonalScaleLocal_MPIBAIJ",
3026                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3027   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
3028                                      "MatSetHashTableFactor_MPIBAIJ",
3029                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3030   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3031   PetscFunctionReturn(0);
3032 }
3033 EXTERN_C_END
3034 
3035 /*MC
3036    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3037 
3038    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3039    and MATMPIBAIJ otherwise.
3040 
3041    Options Database Keys:
3042 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3043 
3044   Level: beginner
3045 
3046 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3047 M*/
3048 
3049 EXTERN_C_BEGIN
3050 #undef __FUNCT__
3051 #define __FUNCT__ "MatCreate_BAIJ"
3052 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A)
3053 {
3054   PetscErrorCode ierr;
3055   PetscMPIInt    size;
3056 
3057   PetscFunctionBegin;
3058   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
3059   if (size == 1) {
3060     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
3061   } else {
3062     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
3063   }
3064   PetscFunctionReturn(0);
3065 }
3066 EXTERN_C_END
3067 
3068 #undef __FUNCT__
3069 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3070 /*@C
3071    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3072    (block compressed row).  For good matrix assembly performance
3073    the user should preallocate the matrix storage by setting the parameters
3074    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3075    performance can be increased by more than a factor of 50.
3076 
3077    Collective on Mat
3078 
3079    Input Parameters:
3080 +  A - the matrix
3081 .  bs   - size of blockk
3082 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3083            submatrix  (same for all local rows)
3084 .  d_nnz - array containing the number of block nonzeros in the various block rows
3085            of the in diagonal portion of the local (possibly different for each block
3086            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3087 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3088            submatrix (same for all local rows).
3089 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3090            off-diagonal portion of the local submatrix (possibly different for
3091            each block row) or PETSC_NULL.
3092 
3093    If the *_nnz parameter is given then the *_nz parameter is ignored
3094 
3095    Options Database Keys:
3096 +   -mat_block_size - size of the blocks to use
3097 -   -mat_use_hash_table <fact>
3098 
3099    Notes:
3100    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3101    than it must be used on all processors that share the object for that argument.
3102 
3103    Storage Information:
3104    For a square global matrix we define each processor's diagonal portion
3105    to be its local rows and the corresponding columns (a square submatrix);
3106    each processor's off-diagonal portion encompasses the remainder of the
3107    local matrix (a rectangular submatrix).
3108 
3109    The user can specify preallocated storage for the diagonal part of
3110    the local submatrix with either d_nz or d_nnz (not both).  Set
3111    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3112    memory allocation.  Likewise, specify preallocated storage for the
3113    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3114 
3115    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3116    the figure below we depict these three local rows and all columns (0-11).
3117 
3118 .vb
3119            0 1 2 3 4 5 6 7 8 9 10 11
3120           -------------------
3121    row 3  |  o o o d d d o o o o o o
3122    row 4  |  o o o d d d o o o o o o
3123    row 5  |  o o o d d d o o o o o o
3124           -------------------
3125 .ve
3126 
3127    Thus, any entries in the d locations are stored in the d (diagonal)
3128    submatrix, and any entries in the o locations are stored in the
3129    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3130    stored simply in the MATSEQBAIJ format for compressed row storage.
3131 
3132    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3133    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3134    In general, for PDE problems in which most nonzeros are near the diagonal,
3135    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3136    or you will get TERRIBLE performance; see the users' manual chapter on
3137    matrices.
3138 
3139    You can call MatGetInfo() to get information on how effective the preallocation was;
3140    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3141    You can also run with the option -info and look for messages with the string
3142    malloc in them to see if additional memory allocation was needed.
3143 
3144    Level: intermediate
3145 
3146 .keywords: matrix, block, aij, compressed row, sparse, parallel
3147 
3148 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
3149 @*/
3150 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3151 {
3152   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3153 
3154   PetscFunctionBegin;
3155   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3156   if (f) {
3157     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3158   }
3159   PetscFunctionReturn(0);
3160 }
3161 
3162 #undef __FUNCT__
3163 #define __FUNCT__ "MatCreateMPIBAIJ"
3164 /*@C
3165    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
3166    (block compressed row).  For good matrix assembly performance
3167    the user should preallocate the matrix storage by setting the parameters
3168    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3169    performance can be increased by more than a factor of 50.
3170 
3171    Collective on MPI_Comm
3172 
3173    Input Parameters:
3174 +  comm - MPI communicator
3175 .  bs   - size of blockk
3176 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3177            This value should be the same as the local size used in creating the
3178            y vector for the matrix-vector product y = Ax.
3179 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3180            This value should be the same as the local size used in creating the
3181            x vector for the matrix-vector product y = Ax.
3182 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3183 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3184 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3185            submatrix  (same for all local rows)
3186 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3187            of the in diagonal portion of the local (possibly different for each block
3188            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3189 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3190            submatrix (same for all local rows).
3191 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3192            off-diagonal portion of the local submatrix (possibly different for
3193            each block row) or PETSC_NULL.
3194 
3195    Output Parameter:
3196 .  A - the matrix
3197 
3198    Options Database Keys:
3199 +   -mat_block_size - size of the blocks to use
3200 -   -mat_use_hash_table <fact>
3201 
3202    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3203    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3204    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3205 
3206    Notes:
3207    If the *_nnz parameter is given then the *_nz parameter is ignored
3208 
3209    A nonzero block is any block that as 1 or more nonzeros in it
3210 
3211    The user MUST specify either the local or global matrix dimensions
3212    (possibly both).
3213 
3214    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3215    than it must be used on all processors that share the object for that argument.
3216 
3217    Storage Information:
3218    For a square global matrix we define each processor's diagonal portion
3219    to be its local rows and the corresponding columns (a square submatrix);
3220    each processor's off-diagonal portion encompasses the remainder of the
3221    local matrix (a rectangular submatrix).
3222 
3223    The user can specify preallocated storage for the diagonal part of
3224    the local submatrix with either d_nz or d_nnz (not both).  Set
3225    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3226    memory allocation.  Likewise, specify preallocated storage for the
3227    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3228 
3229    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3230    the figure below we depict these three local rows and all columns (0-11).
3231 
3232 .vb
3233            0 1 2 3 4 5 6 7 8 9 10 11
3234           -------------------
3235    row 3  |  o o o d d d o o o o o o
3236    row 4  |  o o o d d d o o o o o o
3237    row 5  |  o o o d d d o o o o o o
3238           -------------------
3239 .ve
3240 
3241    Thus, any entries in the d locations are stored in the d (diagonal)
3242    submatrix, and any entries in the o locations are stored in the
3243    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3244    stored simply in the MATSEQBAIJ format for compressed row storage.
3245 
3246    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3247    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3248    In general, for PDE problems in which most nonzeros are near the diagonal,
3249    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3250    or you will get TERRIBLE performance; see the users' manual chapter on
3251    matrices.
3252 
3253    Level: intermediate
3254 
3255 .keywords: matrix, block, aij, compressed row, sparse, parallel
3256 
3257 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3258 @*/
3259 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3260 {
3261   PetscErrorCode ierr;
3262   PetscMPIInt    size;
3263 
3264   PetscFunctionBegin;
3265   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3266   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3267   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3268   if (size > 1) {
3269     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3270     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3271   } else {
3272     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3273     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3274   }
3275   PetscFunctionReturn(0);
3276 }
3277 
3278 #undef __FUNCT__
3279 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3280 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3281 {
3282   Mat            mat;
3283   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3284   PetscErrorCode ierr;
3285   PetscInt       len=0;
3286 
3287   PetscFunctionBegin;
3288   *newmat       = 0;
3289   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3290   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3291   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3292   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3293 
3294   mat->factortype   = matin->factortype;
3295   mat->preallocated = PETSC_TRUE;
3296   mat->assembled    = PETSC_TRUE;
3297   mat->insertmode   = NOT_SET_VALUES;
3298 
3299   a      = (Mat_MPIBAIJ*)mat->data;
3300   mat->rmap->bs  = matin->rmap->bs;
3301   a->bs2   = oldmat->bs2;
3302   a->mbs   = oldmat->mbs;
3303   a->nbs   = oldmat->nbs;
3304   a->Mbs   = oldmat->Mbs;
3305   a->Nbs   = oldmat->Nbs;
3306 
3307   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3308   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3309 
3310   a->size         = oldmat->size;
3311   a->rank         = oldmat->rank;
3312   a->donotstash   = oldmat->donotstash;
3313   a->roworiented  = oldmat->roworiented;
3314   a->rowindices   = 0;
3315   a->rowvalues    = 0;
3316   a->getrowactive = PETSC_FALSE;
3317   a->barray       = 0;
3318   a->rstartbs     = oldmat->rstartbs;
3319   a->rendbs       = oldmat->rendbs;
3320   a->cstartbs     = oldmat->cstartbs;
3321   a->cendbs       = oldmat->cendbs;
3322 
3323   /* hash table stuff */
3324   a->ht           = 0;
3325   a->hd           = 0;
3326   a->ht_size      = 0;
3327   a->ht_flag      = oldmat->ht_flag;
3328   a->ht_fact      = oldmat->ht_fact;
3329   a->ht_total_ct  = 0;
3330   a->ht_insert_ct = 0;
3331 
3332   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3333   if (oldmat->colmap) {
3334 #if defined (PETSC_USE_CTABLE)
3335   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3336 #else
3337   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3338   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3339   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3340 #endif
3341   } else a->colmap = 0;
3342 
3343   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3344     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3345     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3346     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3347   } else a->garray = 0;
3348 
3349   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3350   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3351   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3352   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3353   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3354 
3355   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3356   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3357   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3358   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3359   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3360   *newmat = mat;
3361 
3362   PetscFunctionReturn(0);
3363 }
3364 
3365 #undef __FUNCT__
3366 #define __FUNCT__ "MatLoad_MPIBAIJ"
3367 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3368 {
3369   PetscErrorCode ierr;
3370   int            fd;
3371   PetscInt       i,nz,j,rstart,rend;
3372   PetscScalar    *vals,*buf;
3373   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3374   MPI_Status     status;
3375   PetscMPIInt    rank,size,maxnz;
3376   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3377   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
3378   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3379   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
3380   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
3381   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3382 
3383   PetscFunctionBegin;
3384   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3385     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3386   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3387 
3388   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3389   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3390   if (!rank) {
3391     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3392     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3393     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3394   }
3395 
3396   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3397 
3398   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3399   M = header[1]; N = header[2];
3400 
3401   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3402   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3403   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3404 
3405   /* If global sizes are set, check if they are consistent with that given in the file */
3406   if (sizesset) {
3407     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3408   }
3409   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3410   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3411 
3412   if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices");
3413 
3414   /*
3415      This code adds extra rows to make sure the number of rows is
3416      divisible by the blocksize
3417   */
3418   Mbs        = M/bs;
3419   extra_rows = bs - M + bs*Mbs;
3420   if (extra_rows == bs) extra_rows = 0;
3421   else                  Mbs++;
3422   if (extra_rows && !rank) {
3423     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3424   }
3425 
3426   /* determine ownership of all rows */
3427   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3428     mbs        = Mbs/size + ((Mbs % size) > rank);
3429     m          = mbs*bs;
3430   } else { /* User set */
3431     m          = newmat->rmap->n;
3432     mbs        = m/bs;
3433   }
3434   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
3435   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3436 
3437   /* process 0 needs enough room for process with most rows */
3438   if (!rank) {
3439     mmax = rowners[1];
3440     for (i=2; i<size; i++) {
3441       mmax = PetscMax(mmax,rowners[i]);
3442     }
3443     mmax*=bs;
3444   } else mmax = m;
3445 
3446   rowners[0] = 0;
3447   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
3448   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
3449   rstart = rowners[rank];
3450   rend   = rowners[rank+1];
3451 
3452   /* distribute row lengths to all processors */
3453   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
3454   if (!rank) {
3455     mend = m;
3456     if (size == 1) mend = mend - extra_rows;
3457     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3458     for (j=mend; j<m; j++) locrowlens[j] = 1;
3459     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3460     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3461     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3462     for (j=0; j<m; j++) {
3463       procsnz[0] += locrowlens[j];
3464     }
3465     for (i=1; i<size; i++) {
3466       mend = browners[i+1] - browners[i];
3467       if (i == size-1) mend = mend - extra_rows;
3468       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3469       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3470       /* calculate the number of nonzeros on each processor */
3471       for (j=0; j<browners[i+1]-browners[i]; j++) {
3472         procsnz[i] += rowlengths[j];
3473       }
3474       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3475     }
3476     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3477   } else {
3478     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3479   }
3480 
3481   if (!rank) {
3482     /* determine max buffer needed and allocate it */
3483     maxnz = procsnz[0];
3484     for (i=1; i<size; i++) {
3485       maxnz = PetscMax(maxnz,procsnz[i]);
3486     }
3487     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3488 
3489     /* read in my part of the matrix column indices  */
3490     nz     = procsnz[0];
3491     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3492     mycols = ibuf;
3493     if (size == 1)  nz -= extra_rows;
3494     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3495     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
3496 
3497     /* read in every ones (except the last) and ship off */
3498     for (i=1; i<size-1; i++) {
3499       nz   = procsnz[i];
3500       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3501       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3502     }
3503     /* read in the stuff for the last proc */
3504     if (size != 1) {
3505       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3506       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3507       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3508       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3509     }
3510     ierr = PetscFree(cols);CHKERRQ(ierr);
3511   } else {
3512     /* determine buffer space needed for message */
3513     nz = 0;
3514     for (i=0; i<m; i++) {
3515       nz += locrowlens[i];
3516     }
3517     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3518     mycols = ibuf;
3519     /* receive message of column indices*/
3520     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3521     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3522     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3523   }
3524 
3525   /* loop over local rows, determining number of off diagonal entries */
3526   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
3527   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
3528   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3529   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3530   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3531   rowcount = 0; nzcount = 0;
3532   for (i=0; i<mbs; i++) {
3533     dcount  = 0;
3534     odcount = 0;
3535     for (j=0; j<bs; j++) {
3536       kmax = locrowlens[rowcount];
3537       for (k=0; k<kmax; k++) {
3538         tmp = mycols[nzcount++]/bs;
3539         if (!mask[tmp]) {
3540           mask[tmp] = 1;
3541           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3542           else masked1[dcount++] = tmp;
3543         }
3544       }
3545       rowcount++;
3546     }
3547 
3548     dlens[i]  = dcount;
3549     odlens[i] = odcount;
3550 
3551     /* zero out the mask elements we set */
3552     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3553     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3554   }
3555 
3556 
3557   if (!sizesset) {
3558     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3559   }
3560   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3561 
3562   if (!rank) {
3563     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3564     /* read in my part of the matrix numerical values  */
3565     nz = procsnz[0];
3566     vals = buf;
3567     mycols = ibuf;
3568     if (size == 1)  nz -= extra_rows;
3569     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3570     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
3571 
3572     /* insert into matrix */
3573     jj      = rstart*bs;
3574     for (i=0; i<m; i++) {
3575       ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3576       mycols += locrowlens[i];
3577       vals   += locrowlens[i];
3578       jj++;
3579     }
3580     /* read in other processors (except the last one) and ship out */
3581     for (i=1; i<size-1; i++) {
3582       nz   = procsnz[i];
3583       vals = buf;
3584       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3585       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3586     }
3587     /* the last proc */
3588     if (size != 1){
3589       nz   = procsnz[i] - extra_rows;
3590       vals = buf;
3591       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3592       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3593       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3594     }
3595     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3596   } else {
3597     /* receive numeric values */
3598     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3599 
3600     /* receive message of values*/
3601     vals   = buf;
3602     mycols = ibuf;
3603     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
3604     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
3605     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3606 
3607     /* insert into matrix */
3608     jj      = rstart*bs;
3609     for (i=0; i<m; i++) {
3610       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3611       mycols += locrowlens[i];
3612       vals   += locrowlens[i];
3613       jj++;
3614     }
3615   }
3616   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3617   ierr = PetscFree(buf);CHKERRQ(ierr);
3618   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3619   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3620   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3621   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3622   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3623   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3624 
3625   PetscFunctionReturn(0);
3626 }
3627 
3628 #undef __FUNCT__
3629 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3630 /*@
3631    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3632 
3633    Input Parameters:
3634 .  mat  - the matrix
3635 .  fact - factor
3636 
3637    Not Collective, each process can use a different factor
3638 
3639    Level: advanced
3640 
3641   Notes:
3642    This can also be set by the command line option: -mat_use_hash_table <fact>
3643 
3644 .keywords: matrix, hashtable, factor, HT
3645 
3646 .seealso: MatSetOption()
3647 @*/
3648 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3649 {
3650   PetscErrorCode ierr,(*f)(Mat,PetscReal);
3651 
3652   PetscFunctionBegin;
3653   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
3654   if (f) {
3655     ierr = (*f)(mat,fact);CHKERRQ(ierr);
3656   }
3657   PetscFunctionReturn(0);
3658 }
3659 
3660 EXTERN_C_BEGIN
3661 #undef __FUNCT__
3662 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3663 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3664 {
3665   Mat_MPIBAIJ *baij;
3666 
3667   PetscFunctionBegin;
3668   baij = (Mat_MPIBAIJ*)mat->data;
3669   baij->ht_fact = fact;
3670   PetscFunctionReturn(0);
3671 }
3672 EXTERN_C_END
3673 
3674 #undef __FUNCT__
3675 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3676 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3677 {
3678   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3679   PetscFunctionBegin;
3680   *Ad     = a->A;
3681   *Ao     = a->B;
3682   *colmap = a->garray;
3683   PetscFunctionReturn(0);
3684 }
3685 
3686 /*
3687     Special version for direct calls from Fortran (to eliminate two function call overheads
3688 */
3689 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3690 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3691 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3692 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3693 #endif
3694 
3695 #undef __FUNCT__
3696 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3697 /*@C
3698   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3699 
3700   Collective on Mat
3701 
3702   Input Parameters:
3703 + mat - the matrix
3704 . min - number of input rows
3705 . im - input rows
3706 . nin - number of input columns
3707 . in - input columns
3708 . v - numerical values input
3709 - addvin - INSERT_VALUES or ADD_VALUES
3710 
3711   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3712 
3713   Level: advanced
3714 
3715 .seealso:   MatSetValuesBlocked()
3716 @*/
3717 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3718 {
3719   /* convert input arguments to C version */
3720   Mat             mat = *matin;
3721   PetscInt        m = *min, n = *nin;
3722   InsertMode      addv = *addvin;
3723 
3724   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3725   const MatScalar *value;
3726   MatScalar       *barray=baij->barray;
3727   PetscTruth      roworiented = baij->roworiented;
3728   PetscErrorCode  ierr;
3729   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3730   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3731   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3732 
3733   PetscFunctionBegin;
3734   /* tasks normally handled by MatSetValuesBlocked() */
3735   if (mat->insertmode == NOT_SET_VALUES) {
3736     mat->insertmode = addv;
3737   }
3738 #if defined(PETSC_USE_DEBUG)
3739   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3740   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3741 #endif
3742   if (mat->assembled) {
3743     mat->was_assembled = PETSC_TRUE;
3744     mat->assembled     = PETSC_FALSE;
3745   }
3746   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3747 
3748 
3749   if(!barray) {
3750     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
3751     baij->barray = barray;
3752   }
3753 
3754   if (roworiented) {
3755     stepval = (n-1)*bs;
3756   } else {
3757     stepval = (m-1)*bs;
3758   }
3759   for (i=0; i<m; i++) {
3760     if (im[i] < 0) continue;
3761 #if defined(PETSC_USE_DEBUG)
3762     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3763 #endif
3764     if (im[i] >= rstart && im[i] < rend) {
3765       row = im[i] - rstart;
3766       for (j=0; j<n; j++) {
3767         /* If NumCol = 1 then a copy is not required */
3768         if ((roworiented) && (n == 1)) {
3769           barray = (MatScalar*)v + i*bs2;
3770         } else if((!roworiented) && (m == 1)) {
3771           barray = (MatScalar*)v + j*bs2;
3772         } else { /* Here a copy is required */
3773           if (roworiented) {
3774             value = v + i*(stepval+bs)*bs + j*bs;
3775           } else {
3776             value = v + j*(stepval+bs)*bs + i*bs;
3777           }
3778           for (ii=0; ii<bs; ii++,value+=stepval) {
3779             for (jj=0; jj<bs; jj++) {
3780               *barray++  = *value++;
3781             }
3782           }
3783           barray -=bs2;
3784         }
3785 
3786         if (in[j] >= cstart && in[j] < cend){
3787           col  = in[j] - cstart;
3788           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3789         }
3790         else if (in[j] < 0) continue;
3791 #if defined(PETSC_USE_DEBUG)
3792         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3793 #endif
3794         else {
3795           if (mat->was_assembled) {
3796             if (!baij->colmap) {
3797               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3798             }
3799 
3800 #if defined(PETSC_USE_DEBUG)
3801 #if defined (PETSC_USE_CTABLE)
3802             { PetscInt data;
3803               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3804               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3805             }
3806 #else
3807             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3808 #endif
3809 #endif
3810 #if defined (PETSC_USE_CTABLE)
3811 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3812             col  = (col - 1)/bs;
3813 #else
3814             col = (baij->colmap[in[j]] - 1)/bs;
3815 #endif
3816             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3817               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3818               col =  in[j];
3819             }
3820           }
3821           else col = in[j];
3822           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3823         }
3824       }
3825     } else {
3826       if (!baij->donotstash) {
3827         if (roworiented) {
3828           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3829         } else {
3830           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3831         }
3832       }
3833     }
3834   }
3835 
3836   /* task normally handled by MatSetValuesBlocked() */
3837   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3838   PetscFunctionReturn(0);
3839 }
3840