xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 42eaa7f3b58bc3af4b9ef28b03ccaa8b4c1282ac)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4 #include "petscblaslapack.h"
5 
6 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
7 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
8 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
9 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
10 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
11 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
12 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
13 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
14 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
15 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar);
16 
17 #undef __FUNCT__
18 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
19 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
20 {
21   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
22   PetscErrorCode ierr;
23   PetscInt       i,*idxb = 0;
24   PetscScalar    *va,*vb;
25   Vec            vtmp;
26 
27   PetscFunctionBegin;
28   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
29   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
30   if (idx) {
31     for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;}
32   }
33 
34   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
35   if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
36   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
37   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
38 
39   for (i=0; i<A->rmap->n; i++){
40     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);}
41   }
42 
43   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
44   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
45   if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);}
46   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
47   PetscFunctionReturn(0);
48 }
49 
50 EXTERN_C_BEGIN
51 #undef __FUNCT__
52 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
53 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat)
54 {
55   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
56   PetscErrorCode ierr;
57 
58   PetscFunctionBegin;
59   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
60   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
61   PetscFunctionReturn(0);
62 }
63 EXTERN_C_END
64 
65 EXTERN_C_BEGIN
66 #undef __FUNCT__
67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
68 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat)
69 {
70   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
71   PetscErrorCode ierr;
72 
73   PetscFunctionBegin;
74   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
75   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
76   PetscFunctionReturn(0);
77 }
78 EXTERN_C_END
79 
80 /*
81      Local utility routine that creates a mapping from the global column
82    number to the local number in the off-diagonal part of the local
83    storage of the matrix.  This is done in a non scalable way since the
84    length of colmap equals the global matrix length.
85 */
86 #undef __FUNCT__
87 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
88 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
89 {
90   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
91   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
92   PetscErrorCode ierr;
93   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
94 
95   PetscFunctionBegin;
96 #if defined (PETSC_USE_CTABLE)
97   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
98   for (i=0; i<nbs; i++){
99     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
100   }
101 #else
102   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
103   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
104   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
105   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
106 #endif
107   PetscFunctionReturn(0);
108 }
109 
110 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
111 { \
112  \
113     brow = row/bs;  \
114     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
115     rmax = aimax[brow]; nrow = ailen[brow]; \
116       bcol = col/bs; \
117       ridx = row % bs; cidx = col % bs; \
118       low = 0; high = nrow; \
119       while (high-low > 3) { \
120         t = (low+high)/2; \
121         if (rp[t] > bcol) high = t; \
122         else              low  = t; \
123       } \
124       for (_i=low; _i<high; _i++) { \
125         if (rp[_i] > bcol) break; \
126         if (rp[_i] == bcol) { \
127           bap  = ap +  bs2*_i + bs*cidx + ridx; \
128           if (addv == ADD_VALUES) *bap += value;  \
129           else                    *bap  = value;  \
130           goto a_noinsert; \
131         } \
132       } \
133       if (a->nonew == 1) goto a_noinsert; \
134       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
135       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
136       N = nrow++ - 1;  \
137       /* shift up all the later entries in this row */ \
138       for (ii=N; ii>=_i; ii--) { \
139         rp[ii+1] = rp[ii]; \
140         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
141       } \
142       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
143       rp[_i]                      = bcol;  \
144       ap[bs2*_i + bs*cidx + ridx] = value;  \
145       a_noinsert:; \
146     ailen[brow] = nrow; \
147 }
148 
149 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
150 { \
151     brow = row/bs;  \
152     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
153     rmax = bimax[brow]; nrow = bilen[brow]; \
154       bcol = col/bs; \
155       ridx = row % bs; cidx = col % bs; \
156       low = 0; high = nrow; \
157       while (high-low > 3) { \
158         t = (low+high)/2; \
159         if (rp[t] > bcol) high = t; \
160         else              low  = t; \
161       } \
162       for (_i=low; _i<high; _i++) { \
163         if (rp[_i] > bcol) break; \
164         if (rp[_i] == bcol) { \
165           bap  = ap +  bs2*_i + bs*cidx + ridx; \
166           if (addv == ADD_VALUES) *bap += value;  \
167           else                    *bap  = value;  \
168           goto b_noinsert; \
169         } \
170       } \
171       if (b->nonew == 1) goto b_noinsert; \
172       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
173       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
174       CHKMEMQ;\
175       N = nrow++ - 1;  \
176       /* shift up all the later entries in this row */ \
177       for (ii=N; ii>=_i; ii--) { \
178         rp[ii+1] = rp[ii]; \
179         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
180       } \
181       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
182       rp[_i]                      = bcol;  \
183       ap[bs2*_i + bs*cidx + ridx] = value;  \
184       b_noinsert:; \
185     bilen[brow] = nrow; \
186 }
187 
188 #undef __FUNCT__
189 #define __FUNCT__ "MatSetValues_MPIBAIJ"
190 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
191 {
192   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
193   MatScalar      value;
194   PetscTruth     roworiented = baij->roworiented;
195   PetscErrorCode ierr;
196   PetscInt       i,j,row,col;
197   PetscInt       rstart_orig=mat->rmap->rstart;
198   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
199   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
200 
201   /* Some Variables required in the macro */
202   Mat            A = baij->A;
203   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
204   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
205   MatScalar      *aa=a->a;
206 
207   Mat            B = baij->B;
208   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
209   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
210   MatScalar      *ba=b->a;
211 
212   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
213   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
214   MatScalar      *ap,*bap;
215 
216   PetscFunctionBegin;
217   if (v) PetscValidScalarPointer(v,6);
218   for (i=0; i<m; i++) {
219     if (im[i] < 0) continue;
220 #if defined(PETSC_USE_DEBUG)
221     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
222 #endif
223     if (im[i] >= rstart_orig && im[i] < rend_orig) {
224       row = im[i] - rstart_orig;
225       for (j=0; j<n; j++) {
226         if (in[j] >= cstart_orig && in[j] < cend_orig){
227           col = in[j] - cstart_orig;
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
230           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
231         } else if (in[j] < 0) continue;
232 #if defined(PETSC_USE_DEBUG)
233         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
234 #endif
235         else {
236           if (mat->was_assembled) {
237             if (!baij->colmap) {
238               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
239             }
240 #if defined (PETSC_USE_CTABLE)
241             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
242             col  = col - 1;
243 #else
244             col = baij->colmap[in[j]/bs] - 1;
245 #endif
246             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
247               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
248               col =  in[j];
249               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
250               B = baij->B;
251               b = (Mat_SeqBAIJ*)(B)->data;
252               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
253               ba=b->a;
254             } else col += in[j]%bs;
255           } else col = in[j];
256           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
257           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
258           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
259         }
260       }
261     } else {
262       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
263       if (!baij->donotstash) {
264         if (roworiented) {
265           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
266         } else {
267           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
268         }
269       }
270     }
271   }
272   PetscFunctionReturn(0);
273 }
274 
275 #undef __FUNCT__
276 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
277 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
278 {
279   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
280   const PetscScalar *value;
281   MatScalar         *barray=baij->barray;
282   PetscTruth        roworiented = baij->roworiented;
283   PetscErrorCode    ierr;
284   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
285   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
286   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
287 
288   PetscFunctionBegin;
289   if(!barray) {
290     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
291     baij->barray = barray;
292   }
293 
294   if (roworiented) {
295     stepval = (n-1)*bs;
296   } else {
297     stepval = (m-1)*bs;
298   }
299   for (i=0; i<m; i++) {
300     if (im[i] < 0) continue;
301 #if defined(PETSC_USE_DEBUG)
302     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
303 #endif
304     if (im[i] >= rstart && im[i] < rend) {
305       row = im[i] - rstart;
306       for (j=0; j<n; j++) {
307         /* If NumCol = 1 then a copy is not required */
308         if ((roworiented) && (n == 1)) {
309           barray = (MatScalar*)v + i*bs2;
310         } else if((!roworiented) && (m == 1)) {
311           barray = (MatScalar*)v + j*bs2;
312         } else { /* Here a copy is required */
313           if (roworiented) {
314             value = v + (i*(stepval+bs) + j)*bs;
315           } else {
316 	    value = v + (j*(stepval+bs) + i)*bs;
317           }
318           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
319             for (jj=0; jj<bs; jj++) {
320               barray[jj]  = value[jj];
321             }
322             barray += bs;
323           }
324           barray -= bs2;
325         }
326 
327         if (in[j] >= cstart && in[j] < cend){
328           col  = in[j] - cstart;
329           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
330         }
331         else if (in[j] < 0) continue;
332 #if defined(PETSC_USE_DEBUG)
333         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
334 #endif
335         else {
336           if (mat->was_assembled) {
337             if (!baij->colmap) {
338               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
339             }
340 
341 #if defined(PETSC_USE_DEBUG)
342 #if defined (PETSC_USE_CTABLE)
343             { PetscInt data;
344               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
345               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
346             }
347 #else
348             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
349 #endif
350 #endif
351 #if defined (PETSC_USE_CTABLE)
352 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
353             col  = (col - 1)/bs;
354 #else
355             col = (baij->colmap[in[j]] - 1)/bs;
356 #endif
357             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
358               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
359               col =  in[j];
360             }
361           }
362           else col = in[j];
363           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
364         }
365       }
366     } else {
367       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
368       if (!baij->donotstash) {
369         if (roworiented) {
370           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
371         } else {
372           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
373         }
374       }
375     }
376   }
377   PetscFunctionReturn(0);
378 }
379 
380 #define HASH_KEY 0.6180339887
381 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
382 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
383 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
384 #undef __FUNCT__
385 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
386 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
387 {
388   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
389   PetscTruth     roworiented = baij->roworiented;
390   PetscErrorCode ierr;
391   PetscInt       i,j,row,col;
392   PetscInt       rstart_orig=mat->rmap->rstart;
393   PetscInt       rend_orig=mat->rmap->rend,Nbs=baij->Nbs;
394   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
395   PetscReal      tmp;
396   MatScalar      **HD = baij->hd,value;
397 #if defined(PETSC_USE_DEBUG)
398   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
399 #endif
400 
401   PetscFunctionBegin;
402   if (v) PetscValidScalarPointer(v,6);
403   for (i=0; i<m; i++) {
404 #if defined(PETSC_USE_DEBUG)
405     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
406     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
407 #endif
408       row = im[i];
409     if (row >= rstart_orig && row < rend_orig) {
410       for (j=0; j<n; j++) {
411         col = in[j];
412         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
413         /* Look up PetscInto the Hash Table */
414         key = (row/bs)*Nbs+(col/bs)+1;
415         h1  = HASH(size,key,tmp);
416 
417 
418         idx = h1;
419 #if defined(PETSC_USE_DEBUG)
420         insert_ct++;
421         total_ct++;
422         if (HT[idx] != key) {
423           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
424           if (idx == size) {
425             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
426             if (idx == h1) {
427               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
428             }
429           }
430         }
431 #else
432         if (HT[idx] != key) {
433           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
434           if (idx == size) {
435             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
436             if (idx == h1) {
437               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
438             }
439           }
440         }
441 #endif
442         /* A HASH table entry is found, so insert the values at the correct address */
443         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
444         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
445       }
446     } else {
447       if (!baij->donotstash) {
448         if (roworiented) {
449           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
450         } else {
451           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
452         }
453       }
454     }
455   }
456 #if defined(PETSC_USE_DEBUG)
457   baij->ht_total_ct = total_ct;
458   baij->ht_insert_ct = insert_ct;
459 #endif
460   PetscFunctionReturn(0);
461 }
462 
463 #undef __FUNCT__
464 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
465 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
466 {
467   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
468   PetscTruth        roworiented = baij->roworiented;
469   PetscErrorCode    ierr;
470   PetscInt          i,j,ii,jj,row,col;
471   PetscInt          rstart=baij->rstartbs;
472   PetscInt          rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
473   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
474   PetscReal         tmp;
475   MatScalar         **HD = baij->hd,*baij_a;
476   const PetscScalar *v_t,*value;
477 #if defined(PETSC_USE_DEBUG)
478   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
479 #endif
480 
481   PetscFunctionBegin;
482 
483   if (roworiented) {
484     stepval = (n-1)*bs;
485   } else {
486     stepval = (m-1)*bs;
487   }
488   for (i=0; i<m; i++) {
489 #if defined(PETSC_USE_DEBUG)
490     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
491     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
492 #endif
493     row   = im[i];
494     v_t   = v + i*nbs2;
495     if (row >= rstart && row < rend) {
496       for (j=0; j<n; j++) {
497         col = in[j];
498 
499         /* Look up into the Hash Table */
500         key = row*Nbs+col+1;
501         h1  = HASH(size,key,tmp);
502 
503         idx = h1;
504 #if defined(PETSC_USE_DEBUG)
505         total_ct++;
506         insert_ct++;
507        if (HT[idx] != key) {
508           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
509           if (idx == size) {
510             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
511             if (idx == h1) {
512               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
513             }
514           }
515         }
516 #else
517         if (HT[idx] != key) {
518           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
519           if (idx == size) {
520             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
521             if (idx == h1) {
522               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
523             }
524           }
525         }
526 #endif
527         baij_a = HD[idx];
528         if (roworiented) {
529           /*value = v + i*(stepval+bs)*bs + j*bs;*/
530           /* value = v + (i*(stepval+bs)+j)*bs; */
531           value = v_t;
532           v_t  += bs;
533           if (addv == ADD_VALUES) {
534             for (ii=0; ii<bs; ii++,value+=stepval) {
535               for (jj=ii; jj<bs2; jj+=bs) {
536                 baij_a[jj]  += *value++;
537               }
538             }
539           } else {
540             for (ii=0; ii<bs; ii++,value+=stepval) {
541               for (jj=ii; jj<bs2; jj+=bs) {
542                 baij_a[jj]  = *value++;
543               }
544             }
545           }
546         } else {
547           value = v + j*(stepval+bs)*bs + i*bs;
548           if (addv == ADD_VALUES) {
549             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
550               for (jj=0; jj<bs; jj++) {
551                 baij_a[jj]  += *value++;
552               }
553             }
554           } else {
555             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
556               for (jj=0; jj<bs; jj++) {
557                 baij_a[jj]  = *value++;
558               }
559             }
560           }
561         }
562       }
563     } else {
564       if (!baij->donotstash) {
565         if (roworiented) {
566           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
567         } else {
568           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
569         }
570       }
571     }
572   }
573 #if defined(PETSC_USE_DEBUG)
574   baij->ht_total_ct = total_ct;
575   baij->ht_insert_ct = insert_ct;
576 #endif
577   PetscFunctionReturn(0);
578 }
579 
580 #undef __FUNCT__
581 #define __FUNCT__ "MatGetValues_MPIBAIJ"
582 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
583 {
584   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
585   PetscErrorCode ierr;
586   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
587   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
588 
589   PetscFunctionBegin;
590   for (i=0; i<m; i++) {
591     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
592     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
593     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
594       row = idxm[i] - bsrstart;
595       for (j=0; j<n; j++) {
596         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
597         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
598         if (idxn[j] >= bscstart && idxn[j] < bscend){
599           col = idxn[j] - bscstart;
600           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
601         } else {
602           if (!baij->colmap) {
603             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
604           }
605 #if defined (PETSC_USE_CTABLE)
606           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
607           data --;
608 #else
609           data = baij->colmap[idxn[j]/bs]-1;
610 #endif
611           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
612           else {
613             col  = data + idxn[j]%bs;
614             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
615           }
616         }
617       }
618     } else {
619       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
620     }
621   }
622  PetscFunctionReturn(0);
623 }
624 
625 #undef __FUNCT__
626 #define __FUNCT__ "MatNorm_MPIBAIJ"
627 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
628 {
629   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
630   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
631   PetscErrorCode ierr;
632   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
633   PetscReal      sum = 0.0;
634   MatScalar      *v;
635 
636   PetscFunctionBegin;
637   if (baij->size == 1) {
638     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
639   } else {
640     if (type == NORM_FROBENIUS) {
641       v = amat->a;
642       nz = amat->nz*bs2;
643       for (i=0; i<nz; i++) {
644 #if defined(PETSC_USE_COMPLEX)
645         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
646 #else
647         sum += (*v)*(*v); v++;
648 #endif
649       }
650       v = bmat->a;
651       nz = bmat->nz*bs2;
652       for (i=0; i<nz; i++) {
653 #if defined(PETSC_USE_COMPLEX)
654         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
655 #else
656         sum += (*v)*(*v); v++;
657 #endif
658       }
659       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
660       *nrm = sqrt(*nrm);
661     } else if (type == NORM_1) { /* max column sum */
662       PetscReal *tmp,*tmp2;
663       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
664       ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
665       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
666       v = amat->a; jj = amat->j;
667       for (i=0; i<amat->nz; i++) {
668         for (j=0; j<bs; j++){
669           col = bs*(cstart + *jj) + j; /* column index */
670           for (row=0; row<bs; row++){
671             tmp[col] += PetscAbsScalar(*v);  v++;
672           }
673         }
674         jj++;
675       }
676       v = bmat->a; jj = bmat->j;
677       for (i=0; i<bmat->nz; i++) {
678         for (j=0; j<bs; j++){
679           col = bs*garray[*jj] + j;
680           for (row=0; row<bs; row++){
681             tmp[col] += PetscAbsScalar(*v); v++;
682           }
683         }
684         jj++;
685       }
686       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
687       *nrm = 0.0;
688       for (j=0; j<mat->cmap->N; j++) {
689         if (tmp2[j] > *nrm) *nrm = tmp2[j];
690       }
691       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
692     } else if (type == NORM_INFINITY) { /* max row sum */
693       PetscReal *sums;
694       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr);
695       sum = 0.0;
696       for (j=0; j<amat->mbs; j++) {
697         for (row=0; row<bs; row++) sums[row] = 0.0;
698         v = amat->a + bs2*amat->i[j];
699         nz = amat->i[j+1]-amat->i[j];
700         for (i=0; i<nz; i++) {
701           for (col=0; col<bs; col++){
702             for (row=0; row<bs; row++){
703               sums[row] += PetscAbsScalar(*v); v++;
704             }
705           }
706         }
707         v = bmat->a + bs2*bmat->i[j];
708         nz = bmat->i[j+1]-bmat->i[j];
709         for (i=0; i<nz; i++) {
710           for (col=0; col<bs; col++){
711             for (row=0; row<bs; row++){
712               sums[row] += PetscAbsScalar(*v); v++;
713             }
714           }
715         }
716         for (row=0; row<bs; row++){
717           if (sums[row] > sum) sum = sums[row];
718         }
719       }
720       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
721       ierr = PetscFree(sums);CHKERRQ(ierr);
722     } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet");
723   }
724   PetscFunctionReturn(0);
725 }
726 
727 /*
728   Creates the hash table, and sets the table
729   This table is created only once.
730   If new entried need to be added to the matrix
731   then the hash table has to be destroyed and
732   recreated.
733 */
734 #undef __FUNCT__
735 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
736 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
737 {
738   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
739   Mat            A = baij->A,B=baij->B;
740   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
741   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
742   PetscErrorCode ierr;
743   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
744   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
745   PetscInt       *HT,key;
746   MatScalar      **HD;
747   PetscReal      tmp;
748 #if defined(PETSC_USE_INFO)
749   PetscInt       ct=0,max=0;
750 #endif
751 
752   PetscFunctionBegin;
753   if (baij->ht) PetscFunctionReturn(0);
754 
755   baij->ht_size = (PetscInt)(factor*nz);
756   ht_size       = baij->ht_size;
757 
758   /* Allocate Memory for Hash Table */
759   ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr);
760   ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr);
761   ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr);
762   HD   = baij->hd;
763   HT   = baij->ht;
764 
765   /* Loop Over A */
766   for (i=0; i<a->mbs; i++) {
767     for (j=ai[i]; j<ai[i+1]; j++) {
768       row = i+rstart;
769       col = aj[j]+cstart;
770 
771       key = row*Nbs + col + 1;
772       h1  = HASH(ht_size,key,tmp);
773       for (k=0; k<ht_size; k++){
774         if (!HT[(h1+k)%ht_size]) {
775           HT[(h1+k)%ht_size] = key;
776           HD[(h1+k)%ht_size] = a->a + j*bs2;
777           break;
778 #if defined(PETSC_USE_INFO)
779         } else {
780           ct++;
781 #endif
782         }
783       }
784 #if defined(PETSC_USE_INFO)
785       if (k> max) max = k;
786 #endif
787     }
788   }
789   /* Loop Over B */
790   for (i=0; i<b->mbs; i++) {
791     for (j=bi[i]; j<bi[i+1]; j++) {
792       row = i+rstart;
793       col = garray[bj[j]];
794       key = row*Nbs + col + 1;
795       h1  = HASH(ht_size,key,tmp);
796       for (k=0; k<ht_size; k++){
797         if (!HT[(h1+k)%ht_size]) {
798           HT[(h1+k)%ht_size] = key;
799           HD[(h1+k)%ht_size] = b->a + j*bs2;
800           break;
801 #if defined(PETSC_USE_INFO)
802         } else {
803           ct++;
804 #endif
805         }
806       }
807 #if defined(PETSC_USE_INFO)
808       if (k> max) max = k;
809 #endif
810     }
811   }
812 
813   /* Print Summary */
814 #if defined(PETSC_USE_INFO)
815   for (i=0,j=0; i<ht_size; i++) {
816     if (HT[i]) {j++;}
817   }
818   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
819 #endif
820   PetscFunctionReturn(0);
821 }
822 
823 #undef __FUNCT__
824 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
825 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
826 {
827   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
828   PetscErrorCode ierr;
829   PetscInt       nstash,reallocs;
830   InsertMode     addv;
831 
832   PetscFunctionBegin;
833   if (baij->donotstash || mat->nooffprocentries) {
834     PetscFunctionReturn(0);
835   }
836 
837   /* make sure all processors are either in INSERTMODE or ADDMODE */
838   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
839   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
840   mat->insertmode = addv; /* in case this processor had no cache */
841 
842   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
843   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
844   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
845   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
846   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
847   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
848   PetscFunctionReturn(0);
849 }
850 
851 #undef __FUNCT__
852 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
853 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
854 {
855   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
856   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
857   PetscErrorCode ierr;
858   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
859   PetscInt       *row,*col;
860   PetscTruth     r1,r2,r3,other_disassembled;
861   MatScalar      *val;
862   InsertMode     addv = mat->insertmode;
863   PetscMPIInt    n;
864 
865   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
866   PetscFunctionBegin;
867   if (!baij->donotstash && !mat->nooffprocentries) {
868     while (1) {
869       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
870       if (!flg) break;
871 
872       for (i=0; i<n;) {
873         /* Now identify the consecutive vals belonging to the same row */
874         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
875         if (j < n) ncols = j-i;
876         else       ncols = n-i;
877         /* Now assemble all these values with a single function call */
878         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
879         i = j;
880       }
881     }
882     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
883     /* Now process the block-stash. Since the values are stashed column-oriented,
884        set the roworiented flag to column oriented, and after MatSetValues()
885        restore the original flags */
886     r1 = baij->roworiented;
887     r2 = a->roworiented;
888     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
889     baij->roworiented = PETSC_FALSE;
890     a->roworiented    = PETSC_FALSE;
891     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
892     while (1) {
893       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
894       if (!flg) break;
895 
896       for (i=0; i<n;) {
897         /* Now identify the consecutive vals belonging to the same row */
898         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
899         if (j < n) ncols = j-i;
900         else       ncols = n-i;
901         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
902         i = j;
903       }
904     }
905     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
906     baij->roworiented = r1;
907     a->roworiented    = r2;
908     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
909   }
910 
911   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
912   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
913 
914   /* determine if any processor has disassembled, if so we must
915      also disassemble ourselfs, in order that we may reassemble. */
916   /*
917      if nonzero structure of submatrix B cannot change then we know that
918      no processor disassembled thus we can skip this stuff
919   */
920   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
921     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
922     if (mat->was_assembled && !other_disassembled) {
923       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
924     }
925   }
926 
927   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
928     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
929   }
930   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
931   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
932   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
933 
934 #if defined(PETSC_USE_INFO)
935   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
936     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
937     baij->ht_total_ct  = 0;
938     baij->ht_insert_ct = 0;
939   }
940 #endif
941   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
942     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
943     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
944     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
945   }
946 
947   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
948   baij->rowvalues = 0;
949   PetscFunctionReturn(0);
950 }
951 
952 #undef __FUNCT__
953 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
954 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
955 {
956   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
957   PetscErrorCode    ierr;
958   PetscMPIInt       size = baij->size,rank = baij->rank;
959   PetscInt          bs = mat->rmap->bs;
960   PetscTruth        iascii,isdraw;
961   PetscViewer       sviewer;
962   PetscViewerFormat format;
963 
964   PetscFunctionBegin;
965   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
966   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
967   if (iascii) {
968     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
969     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
970       MatInfo info;
971       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
972       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
973       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
974              rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
975       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
976       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
977       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
978       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
979       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
980       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
981       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
982       PetscFunctionReturn(0);
983     } else if (format == PETSC_VIEWER_ASCII_INFO) {
984       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
985       PetscFunctionReturn(0);
986     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
987       PetscFunctionReturn(0);
988     }
989   }
990 
991   if (isdraw) {
992     PetscDraw       draw;
993     PetscTruth isnull;
994     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
995     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
996   }
997 
998   if (size == 1) {
999     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1000     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1001   } else {
1002     /* assemble the entire matrix onto first processor. */
1003     Mat         A;
1004     Mat_SeqBAIJ *Aloc;
1005     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1006     MatScalar   *a;
1007 
1008     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1009     /* Perhaps this should be the type of mat? */
1010     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1011     if (!rank) {
1012       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1013     } else {
1014       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1015     }
1016     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1017     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1018     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1019 
1020     /* copy over the A part */
1021     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1022     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1023     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1024 
1025     for (i=0; i<mbs; i++) {
1026       rvals[0] = bs*(baij->rstartbs + i);
1027       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1028       for (j=ai[i]; j<ai[i+1]; j++) {
1029         col = (baij->cstartbs+aj[j])*bs;
1030         for (k=0; k<bs; k++) {
1031           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1032           col++; a += bs;
1033         }
1034       }
1035     }
1036     /* copy over the B part */
1037     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1038     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1039     for (i=0; i<mbs; i++) {
1040       rvals[0] = bs*(baij->rstartbs + i);
1041       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1042       for (j=ai[i]; j<ai[i+1]; j++) {
1043         col = baij->garray[aj[j]]*bs;
1044         for (k=0; k<bs; k++) {
1045           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1046           col++; a += bs;
1047         }
1048       }
1049     }
1050     ierr = PetscFree(rvals);CHKERRQ(ierr);
1051     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1052     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1053     /*
1054        Everyone has to call to draw the matrix since the graphics waits are
1055        synchronized across all processors that share the PetscDraw object
1056     */
1057     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1058     if (!rank) {
1059       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1060       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1061     }
1062     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1063     ierr = MatDestroy(A);CHKERRQ(ierr);
1064   }
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 #undef __FUNCT__
1069 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1070 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1071 {
1072   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1073   Mat_SeqBAIJ*   A = (Mat_SeqBAIJ*)a->A->data;
1074   Mat_SeqBAIJ*   B = (Mat_SeqBAIJ*)a->B->data;
1075   PetscErrorCode ierr;
1076   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,count,j,k,bs2=a->bs2,header[4],nz,rlen;
1077   PetscInt       *range,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1078   int            fd;
1079   PetscScalar    *column_values;
1080   FILE           *file;
1081   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1082 
1083   PetscFunctionBegin;
1084   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1085   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1086   nz   = bs2*(A->nz + B->nz);
1087   rlen = mat->rmap->n;
1088   if (!rank) {
1089     header[0] = MAT_FILE_CLASSID;
1090     header[1] = mat->rmap->N;
1091     header[2] = mat->cmap->N;
1092     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1093     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1094     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1095     /* get largest number of rows any processor has */
1096     range = mat->rmap->range;
1097     for (i=1; i<size; i++) {
1098       rlen = PetscMax(rlen,range[i+1] - range[i]);
1099     }
1100   } else {
1101     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1102   }
1103 
1104   ierr  = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr);
1105   /* compute lengths of each row  */
1106   count = 0;
1107   for (i=0; i<a->mbs; i++) {
1108     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1109   }
1110   /* store the row lengths to the file */
1111   if (!rank) {
1112     MPI_Status status;
1113     ierr  = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr);
1114     rlen  = (range[1] - range[0])/bs;
1115     for (i=0; i<rlen; i++) {
1116       for (j=0; j<bs; j++) {
1117         row_lens[i*bs+j] = bs*crow_lens[i];
1118       }
1119     }
1120     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1121     for (i=1; i<size; i++) {
1122       rlen = (range[i+1] - range[i])/bs;
1123       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1124       for (k=0; k<rlen; k++) {
1125 	for (j=0; j<bs; j++) {
1126 	  row_lens[k*bs+j] = bs*crow_lens[k];
1127 	}
1128       }
1129       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1130     }
1131     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1132   } else {
1133     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1134   }
1135   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1136 
1137   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1138      information needed to make it for each row from a block row. This does require more communication but still not more than
1139      the communication needed for the nonzero values  */
1140   nzmax = nz; /*  space a largest processor needs */
1141   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1142   ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1143   cnt  = 0;
1144   for (i=0; i<a->mbs; i++) {
1145     pcnt = cnt;
1146     for (j=B->i[i]; j<B->i[i+1]; j++) {
1147       if ( (col = garray[B->j[j]]) > cstart) break;
1148       for (l=0; l<bs; l++) {
1149 	column_indices[cnt++] = bs*col+l;
1150       }
1151     }
1152     for (k=A->i[i]; k<A->i[i+1]; k++) {
1153       for (l=0; l<bs; l++) {
1154         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1155       }
1156     }
1157     for (; j<B->i[i+1]; j++) {
1158       for (l=0; l<bs; l++) {
1159         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1160       }
1161     }
1162     len = cnt - pcnt;
1163     for (k=1; k<bs; k++) {
1164       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1165       cnt += len;
1166     }
1167   }
1168   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1169 
1170   /* store the columns to the file */
1171   if (!rank) {
1172     MPI_Status status;
1173     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1174     for (i=1; i<size; i++) {
1175       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1176       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1177       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1178     }
1179   } else {
1180     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1181     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1182   }
1183   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1184 
1185   /* load up the numerical values */
1186   ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1187   cnt = 0;
1188   for (i=0; i<a->mbs; i++) {
1189     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1190     for (j=B->i[i]; j<B->i[i+1]; j++) {
1191       if ( garray[B->j[j]] > cstart) break;
1192       for (l=0; l<bs; l++) {
1193         for (ll=0; ll<bs; ll++) {
1194 	  column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1195         }
1196       }
1197       cnt += bs;
1198     }
1199     for (k=A->i[i]; k<A->i[i+1]; k++) {
1200       for (l=0; l<bs; l++) {
1201         for (ll=0; ll<bs; ll++) {
1202           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1203         }
1204       }
1205       cnt += bs;
1206     }
1207     for (; j<B->i[i+1]; j++) {
1208       for (l=0; l<bs; l++) {
1209         for (ll=0; ll<bs; ll++) {
1210 	  column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1211         }
1212       }
1213       cnt += bs;
1214     }
1215     cnt += (bs-1)*rlen;
1216   }
1217   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1218 
1219   /* store the column values to the file */
1220   if (!rank) {
1221     MPI_Status status;
1222     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1223     for (i=1; i<size; i++) {
1224       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1225       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1226       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1227     }
1228   } else {
1229     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1230     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1231   }
1232   ierr = PetscFree(column_values);CHKERRQ(ierr);
1233 
1234   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1235   if (file) {
1236     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1237   }
1238   PetscFunctionReturn(0);
1239 }
1240 
1241 #undef __FUNCT__
1242 #define __FUNCT__ "MatView_MPIBAIJ"
1243 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1244 {
1245   PetscErrorCode ierr;
1246   PetscTruth     iascii,isdraw,issocket,isbinary;
1247 
1248   PetscFunctionBegin;
1249   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1250   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1251   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1252   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1253   if (iascii || isdraw || issocket) {
1254     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1255   } else if (isbinary) {
1256     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1257   } else {
1258     SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1259   }
1260   PetscFunctionReturn(0);
1261 }
1262 
1263 #undef __FUNCT__
1264 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1265 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1266 {
1267   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1268   PetscErrorCode ierr;
1269 
1270   PetscFunctionBegin;
1271 #if defined(PETSC_USE_LOG)
1272   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1273 #endif
1274   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1275   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1276   if (baij->A){ierr = MatDestroy(baij->A);CHKERRQ(ierr);}
1277   if (baij->B){ierr = MatDestroy(baij->B);CHKERRQ(ierr);}
1278 #if defined (PETSC_USE_CTABLE)
1279   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
1280 #else
1281   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1282 #endif
1283   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1284   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1285   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1286   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1287   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1288   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1289   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1290   ierr = PetscFree(baij);CHKERRQ(ierr);
1291 
1292   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1293   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1294   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1295   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1296   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1300   PetscFunctionReturn(0);
1301 }
1302 
1303 #undef __FUNCT__
1304 #define __FUNCT__ "MatMult_MPIBAIJ"
1305 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1306 {
1307   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1308   PetscErrorCode ierr;
1309   PetscInt       nt;
1310 
1311   PetscFunctionBegin;
1312   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1313   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1314   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1315   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1316   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1317   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1318   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1319   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1320   PetscFunctionReturn(0);
1321 }
1322 
1323 #undef __FUNCT__
1324 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1325 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1326 {
1327   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1328   PetscErrorCode ierr;
1329 
1330   PetscFunctionBegin;
1331   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1332   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1333   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1334   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1335   PetscFunctionReturn(0);
1336 }
1337 
1338 #undef __FUNCT__
1339 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1340 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1341 {
1342   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1343   PetscErrorCode ierr;
1344   PetscTruth     merged;
1345 
1346   PetscFunctionBegin;
1347   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1348   /* do nondiagonal part */
1349   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1350   if (!merged) {
1351     /* send it on its way */
1352     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1353     /* do local part */
1354     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1355     /* receive remote parts: note this assumes the values are not actually */
1356     /* inserted in yy until the next line */
1357     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1358   } else {
1359     /* do local part */
1360     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1361     /* send it on its way */
1362     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1363     /* values actually were received in the Begin() but we need to call this nop */
1364     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1365   }
1366   PetscFunctionReturn(0);
1367 }
1368 
1369 #undef __FUNCT__
1370 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1371 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1372 {
1373   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1374   PetscErrorCode ierr;
1375 
1376   PetscFunctionBegin;
1377   /* do nondiagonal part */
1378   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1379   /* send it on its way */
1380   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1381   /* do local part */
1382   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1383   /* receive remote parts: note this assumes the values are not actually */
1384   /* inserted in yy until the next line, which is true for my implementation*/
1385   /* but is not perhaps always true. */
1386   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1387   PetscFunctionReturn(0);
1388 }
1389 
1390 /*
1391   This only works correctly for square matrices where the subblock A->A is the
1392    diagonal block
1393 */
1394 #undef __FUNCT__
1395 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1396 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1397 {
1398   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1399   PetscErrorCode ierr;
1400 
1401   PetscFunctionBegin;
1402   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1403   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1404   PetscFunctionReturn(0);
1405 }
1406 
1407 #undef __FUNCT__
1408 #define __FUNCT__ "MatScale_MPIBAIJ"
1409 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1410 {
1411   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1412   PetscErrorCode ierr;
1413 
1414   PetscFunctionBegin;
1415   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1416   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1417   PetscFunctionReturn(0);
1418 }
1419 
1420 #undef __FUNCT__
1421 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1422 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1423 {
1424   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1425   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1426   PetscErrorCode ierr;
1427   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1428   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1429   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1430 
1431   PetscFunctionBegin;
1432   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1433   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1434   mat->getrowactive = PETSC_TRUE;
1435 
1436   if (!mat->rowvalues && (idx || v)) {
1437     /*
1438         allocate enough space to hold information from the longest row.
1439     */
1440     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1441     PetscInt     max = 1,mbs = mat->mbs,tmp;
1442     for (i=0; i<mbs; i++) {
1443       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1444       if (max < tmp) { max = tmp; }
1445     }
1446     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1447   }
1448   lrow = row - brstart;
1449 
1450   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1451   if (!v)   {pvA = 0; pvB = 0;}
1452   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1453   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1454   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1455   nztot = nzA + nzB;
1456 
1457   cmap  = mat->garray;
1458   if (v  || idx) {
1459     if (nztot) {
1460       /* Sort by increasing column numbers, assuming A and B already sorted */
1461       PetscInt imark = -1;
1462       if (v) {
1463         *v = v_p = mat->rowvalues;
1464         for (i=0; i<nzB; i++) {
1465           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1466           else break;
1467         }
1468         imark = i;
1469         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1470         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1471       }
1472       if (idx) {
1473         *idx = idx_p = mat->rowindices;
1474         if (imark > -1) {
1475           for (i=0; i<imark; i++) {
1476             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1477           }
1478         } else {
1479           for (i=0; i<nzB; i++) {
1480             if (cmap[cworkB[i]/bs] < cstart)
1481               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1482             else break;
1483           }
1484           imark = i;
1485         }
1486         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1487         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1488       }
1489     } else {
1490       if (idx) *idx = 0;
1491       if (v)   *v   = 0;
1492     }
1493   }
1494   *nz = nztot;
1495   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1496   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1497   PetscFunctionReturn(0);
1498 }
1499 
1500 #undef __FUNCT__
1501 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1502 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1503 {
1504   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1505 
1506   PetscFunctionBegin;
1507   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1508   baij->getrowactive = PETSC_FALSE;
1509   PetscFunctionReturn(0);
1510 }
1511 
1512 #undef __FUNCT__
1513 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1514 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1515 {
1516   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1517   PetscErrorCode ierr;
1518 
1519   PetscFunctionBegin;
1520   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1521   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1522   PetscFunctionReturn(0);
1523 }
1524 
1525 #undef __FUNCT__
1526 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1527 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1528 {
1529   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1530   Mat            A = a->A,B = a->B;
1531   PetscErrorCode ierr;
1532   PetscReal      isend[5],irecv[5];
1533 
1534   PetscFunctionBegin;
1535   info->block_size     = (PetscReal)matin->rmap->bs;
1536   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1537   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1538   isend[3] = info->memory;  isend[4] = info->mallocs;
1539   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1540   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1541   isend[3] += info->memory;  isend[4] += info->mallocs;
1542   if (flag == MAT_LOCAL) {
1543     info->nz_used      = isend[0];
1544     info->nz_allocated = isend[1];
1545     info->nz_unneeded  = isend[2];
1546     info->memory       = isend[3];
1547     info->mallocs      = isend[4];
1548   } else if (flag == MAT_GLOBAL_MAX) {
1549     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1550     info->nz_used      = irecv[0];
1551     info->nz_allocated = irecv[1];
1552     info->nz_unneeded  = irecv[2];
1553     info->memory       = irecv[3];
1554     info->mallocs      = irecv[4];
1555   } else if (flag == MAT_GLOBAL_SUM) {
1556     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1557     info->nz_used      = irecv[0];
1558     info->nz_allocated = irecv[1];
1559     info->nz_unneeded  = irecv[2];
1560     info->memory       = irecv[3];
1561     info->mallocs      = irecv[4];
1562   } else {
1563     SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1564   }
1565   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1566   info->fill_ratio_needed = 0;
1567   info->factor_mallocs    = 0;
1568   PetscFunctionReturn(0);
1569 }
1570 
1571 #undef __FUNCT__
1572 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1573 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscTruth flg)
1574 {
1575   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1576   PetscErrorCode ierr;
1577 
1578   PetscFunctionBegin;
1579   switch (op) {
1580   case MAT_NEW_NONZERO_LOCATIONS:
1581   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1582   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1583   case MAT_KEEP_NONZERO_PATTERN:
1584   case MAT_NEW_NONZERO_LOCATION_ERR:
1585     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1586     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1587     break;
1588   case MAT_ROW_ORIENTED:
1589     a->roworiented = flg;
1590     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1591     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1592     break;
1593   case MAT_NEW_DIAGONALS:
1594     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1595     break;
1596   case MAT_IGNORE_OFF_PROC_ENTRIES:
1597     a->donotstash = flg;
1598     break;
1599   case MAT_USE_HASH_TABLE:
1600     a->ht_flag = flg;
1601     break;
1602   case MAT_SYMMETRIC:
1603   case MAT_STRUCTURALLY_SYMMETRIC:
1604   case MAT_HERMITIAN:
1605   case MAT_SYMMETRY_ETERNAL:
1606     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1607     break;
1608   default:
1609     SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op);
1610   }
1611   PetscFunctionReturn(0);
1612 }
1613 
1614 #undef __FUNCT__
1615 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1616 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1617 {
1618   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1619   Mat_SeqBAIJ    *Aloc;
1620   Mat            B;
1621   PetscErrorCode ierr;
1622   PetscInt       M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1623   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1624   MatScalar      *a;
1625 
1626   PetscFunctionBegin;
1627   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1628   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1629     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1630     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1631     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1632     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1633   } else {
1634     B = *matout;
1635   }
1636 
1637   /* copy over the A part */
1638   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1639   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1640   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1641 
1642   for (i=0; i<mbs; i++) {
1643     rvals[0] = bs*(baij->rstartbs + i);
1644     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1645     for (j=ai[i]; j<ai[i+1]; j++) {
1646       col = (baij->cstartbs+aj[j])*bs;
1647       for (k=0; k<bs; k++) {
1648         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1649         col++; a += bs;
1650       }
1651     }
1652   }
1653   /* copy over the B part */
1654   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1655   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1656   for (i=0; i<mbs; i++) {
1657     rvals[0] = bs*(baij->rstartbs + i);
1658     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1659     for (j=ai[i]; j<ai[i+1]; j++) {
1660       col = baij->garray[aj[j]]*bs;
1661       for (k=0; k<bs; k++) {
1662         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1663         col++; a += bs;
1664       }
1665     }
1666   }
1667   ierr = PetscFree(rvals);CHKERRQ(ierr);
1668   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1669   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1670 
1671   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1672     *matout = B;
1673   } else {
1674     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1675   }
1676   PetscFunctionReturn(0);
1677 }
1678 
1679 #undef __FUNCT__
1680 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1681 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1682 {
1683   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1684   Mat            a = baij->A,b = baij->B;
1685   PetscErrorCode ierr;
1686   PetscInt       s1,s2,s3;
1687 
1688   PetscFunctionBegin;
1689   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1690   if (rr) {
1691     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1692     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1693     /* Overlap communication with computation. */
1694     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1695   }
1696   if (ll) {
1697     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1698     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1699     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1700   }
1701   /* scale  the diagonal block */
1702   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1703 
1704   if (rr) {
1705     /* Do a scatter end and then right scale the off-diagonal block */
1706     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1707     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1708   }
1709 
1710   PetscFunctionReturn(0);
1711 }
1712 
1713 #undef __FUNCT__
1714 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1715 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
1716 {
1717   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1718   PetscErrorCode ierr;
1719   PetscMPIInt    imdex,size = l->size,n,rank = l->rank;
1720   PetscInt       i,*owners = A->rmap->range;
1721   PetscInt       *nprocs,j,idx,nsends,row;
1722   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
1723   PetscInt       *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1724   PetscInt       *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1725   MPI_Comm       comm = ((PetscObject)A)->comm;
1726   MPI_Request    *send_waits,*recv_waits;
1727   MPI_Status     recv_status,*send_status;
1728 #if defined(PETSC_DEBUG)
1729   PetscTruth     found = PETSC_FALSE;
1730 #endif
1731 
1732   PetscFunctionBegin;
1733   /*  first count number of contributors to each processor */
1734   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1735   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1736   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1737   j = 0;
1738   for (i=0; i<N; i++) {
1739     if (lastidx > (idx = rows[i])) j = 0;
1740     lastidx = idx;
1741     for (; j<size; j++) {
1742       if (idx >= owners[j] && idx < owners[j+1]) {
1743         nprocs[2*j]++;
1744         nprocs[2*j+1] = 1;
1745         owner[i] = j;
1746 #if defined(PETSC_DEBUG)
1747         found = PETSC_TRUE;
1748 #endif
1749         break;
1750       }
1751     }
1752 #if defined(PETSC_DEBUG)
1753     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1754     found = PETSC_FALSE;
1755 #endif
1756   }
1757   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1758 
1759   if (A->nooffproczerorows) {
1760     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
1761     nrecvs = nsends;
1762     nmax   = N;
1763   } else {
1764     /* inform other processors of number of messages and max length*/
1765     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1766   }
1767 
1768   /* post receives:   */
1769   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1770   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1771   for (i=0; i<nrecvs; i++) {
1772     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1773   }
1774 
1775   /* do sends:
1776      1) starts[i] gives the starting index in svalues for stuff going to
1777      the ith processor
1778   */
1779   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1780   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1781   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1782   starts[0]  = 0;
1783   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1784   for (i=0; i<N; i++) {
1785     svalues[starts[owner[i]]++] = rows[i];
1786   }
1787 
1788   starts[0] = 0;
1789   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1790   count = 0;
1791   for (i=0; i<size; i++) {
1792     if (nprocs[2*i+1]) {
1793       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1794     }
1795   }
1796   ierr = PetscFree(starts);CHKERRQ(ierr);
1797 
1798   base = owners[rank];
1799 
1800   /*  wait on receives */
1801   ierr   = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr);
1802   count  = nrecvs;
1803   slen = 0;
1804   while (count) {
1805     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1806     /* unpack receives into our local space */
1807     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1808     source[imdex]  = recv_status.MPI_SOURCE;
1809     lens[imdex]    = n;
1810     slen          += n;
1811     count--;
1812   }
1813   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1814 
1815   /* move the data into the send scatter */
1816   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1817   count = 0;
1818   for (i=0; i<nrecvs; i++) {
1819     values = rvalues + i*nmax;
1820     for (j=0; j<lens[i]; j++) {
1821       lrows[count++] = values[j] - base;
1822     }
1823   }
1824   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1825   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1826   ierr = PetscFree(owner);CHKERRQ(ierr);
1827   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1828 
1829   /* actually zap the local rows */
1830   /*
1831         Zero the required rows. If the "diagonal block" of the matrix
1832      is square and the user wishes to set the diagonal we use separate
1833      code so that MatSetValues() is not called for each diagonal allocating
1834      new memory, thus calling lots of mallocs and slowing things down.
1835 
1836   */
1837   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1838   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr);
1839   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1840     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr);
1841   } else if (diag != 0.0) {
1842     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1843     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1844        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1845     for (i=0; i<slen; i++) {
1846       row  = lrows[i] + rstart_bs;
1847       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1848     }
1849     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1850     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1851   } else {
1852     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1853   }
1854 
1855   ierr = PetscFree(lrows);CHKERRQ(ierr);
1856 
1857   /* wait on sends */
1858   if (nsends) {
1859     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1860     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1861     ierr = PetscFree(send_status);CHKERRQ(ierr);
1862   }
1863   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1864   ierr = PetscFree(svalues);CHKERRQ(ierr);
1865 
1866   PetscFunctionReturn(0);
1867 }
1868 
1869 #undef __FUNCT__
1870 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1871 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1872 {
1873   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1874   PetscErrorCode ierr;
1875 
1876   PetscFunctionBegin;
1877   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1878   PetscFunctionReturn(0);
1879 }
1880 
1881 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1882 
1883 #undef __FUNCT__
1884 #define __FUNCT__ "MatEqual_MPIBAIJ"
1885 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1886 {
1887   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1888   Mat            a,b,c,d;
1889   PetscTruth     flg;
1890   PetscErrorCode ierr;
1891 
1892   PetscFunctionBegin;
1893   a = matA->A; b = matA->B;
1894   c = matB->A; d = matB->B;
1895 
1896   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1897   if (flg) {
1898     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1899   }
1900   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1901   PetscFunctionReturn(0);
1902 }
1903 
1904 #undef __FUNCT__
1905 #define __FUNCT__ "MatCopy_MPIBAIJ"
1906 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1907 {
1908   PetscErrorCode ierr;
1909   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1910   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1911 
1912   PetscFunctionBegin;
1913   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1914   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1915     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1916   } else {
1917     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1918     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1919   }
1920   PetscFunctionReturn(0);
1921 }
1922 
1923 #undef __FUNCT__
1924 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1925 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1926 {
1927   PetscErrorCode ierr;
1928 
1929   PetscFunctionBegin;
1930   ierr =  MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1931   PetscFunctionReturn(0);
1932 }
1933 
1934 #undef __FUNCT__
1935 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1936 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1937 {
1938   PetscErrorCode ierr;
1939   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1940   PetscBLASInt   bnz,one=1;
1941   Mat_SeqBAIJ    *x,*y;
1942 
1943   PetscFunctionBegin;
1944   if (str == SAME_NONZERO_PATTERN) {
1945     PetscScalar alpha = a;
1946     x = (Mat_SeqBAIJ *)xx->A->data;
1947     y = (Mat_SeqBAIJ *)yy->A->data;
1948     bnz = PetscBLASIntCast(x->nz);
1949     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1950     x = (Mat_SeqBAIJ *)xx->B->data;
1951     y = (Mat_SeqBAIJ *)yy->B->data;
1952     bnz = PetscBLASIntCast(x->nz);
1953     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1954   } else {
1955     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1956   }
1957   PetscFunctionReturn(0);
1958 }
1959 
1960 #undef __FUNCT__
1961 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ"
1962 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs)
1963 {
1964   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1965   PetscInt rbs,cbs;
1966   PetscErrorCode ierr;
1967 
1968   PetscFunctionBegin;
1969   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1970   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1971   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
1972   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
1973   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
1974   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
1975   PetscFunctionReturn(0);
1976 }
1977 
1978 #undef __FUNCT__
1979 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1980 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1981 {
1982   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1983   PetscErrorCode ierr;
1984 
1985   PetscFunctionBegin;
1986   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1987   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1988   PetscFunctionReturn(0);
1989 }
1990 
1991 #undef __FUNCT__
1992 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
1993 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1994 {
1995   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1996   PetscErrorCode ierr;
1997 
1998   PetscFunctionBegin;
1999   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2000   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2001   PetscFunctionReturn(0);
2002 }
2003 
2004 #undef __FUNCT__
2005 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
2006 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2007 {
2008   PetscErrorCode ierr;
2009   IS             iscol_local;
2010   PetscInt       csize;
2011 
2012   PetscFunctionBegin;
2013   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2014   if (call == MAT_REUSE_MATRIX) {
2015     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2016     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2017   } else {
2018     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2019   }
2020   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2021   if (call == MAT_INITIAL_MATRIX) {
2022     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2023     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
2024   }
2025   PetscFunctionReturn(0);
2026 }
2027 
2028 #undef __FUNCT__
2029 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
2030 /*
2031     Not great since it makes two copies of the submatrix, first an SeqBAIJ
2032   in local and then by concatenating the local matrices the end result.
2033   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
2034 */
2035 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2036 {
2037   PetscErrorCode ierr;
2038   PetscMPIInt    rank,size;
2039   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2040   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2041   Mat            *local,M,Mreuse;
2042   MatScalar      *vwork,*aa;
2043   MPI_Comm       comm = ((PetscObject)mat)->comm;
2044   Mat_SeqBAIJ    *aij;
2045 
2046 
2047   PetscFunctionBegin;
2048   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2049   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2050 
2051   if (call ==  MAT_REUSE_MATRIX) {
2052     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2053     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2054     local = &Mreuse;
2055     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2056   } else {
2057     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2058     Mreuse = *local;
2059     ierr   = PetscFree(local);CHKERRQ(ierr);
2060   }
2061 
2062   /*
2063       m - number of local rows
2064       n - number of columns (same on all processors)
2065       rstart - first row in new global matrix generated
2066   */
2067   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2068   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2069   m    = m/bs;
2070   n    = n/bs;
2071 
2072   if (call == MAT_INITIAL_MATRIX) {
2073     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2074     ii  = aij->i;
2075     jj  = aij->j;
2076 
2077     /*
2078         Determine the number of non-zeros in the diagonal and off-diagonal
2079         portions of the matrix in order to do correct preallocation
2080     */
2081 
2082     /* first get start and end of "diagonal" columns */
2083     if (csize == PETSC_DECIDE) {
2084       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2085       if (mglobal == n*bs) { /* square matrix */
2086 	nlocal = m;
2087       } else {
2088         nlocal = n/size + ((n % size) > rank);
2089       }
2090     } else {
2091       nlocal = csize/bs;
2092     }
2093     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2094     rstart = rend - nlocal;
2095     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2096 
2097     /* next, compute all the lengths */
2098     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2099     olens = dlens + m;
2100     for (i=0; i<m; i++) {
2101       jend = ii[i+1] - ii[i];
2102       olen = 0;
2103       dlen = 0;
2104       for (j=0; j<jend; j++) {
2105         if (*jj < rstart || *jj >= rend) olen++;
2106         else dlen++;
2107         jj++;
2108       }
2109       olens[i] = olen;
2110       dlens[i] = dlen;
2111     }
2112     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2113     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2114     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2115     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2116     ierr = PetscFree(dlens);CHKERRQ(ierr);
2117   } else {
2118     PetscInt ml,nl;
2119 
2120     M = *newmat;
2121     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2122     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2123     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2124     /*
2125          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2126        rather than the slower MatSetValues().
2127     */
2128     M->was_assembled = PETSC_TRUE;
2129     M->assembled     = PETSC_FALSE;
2130   }
2131   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2132   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2133   aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2134   ii  = aij->i;
2135   jj  = aij->j;
2136   aa  = aij->a;
2137   for (i=0; i<m; i++) {
2138     row   = rstart/bs + i;
2139     nz    = ii[i+1] - ii[i];
2140     cwork = jj;     jj += nz;
2141     vwork = aa;     aa += nz;
2142     ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2143   }
2144 
2145   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2146   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2147   *newmat = M;
2148 
2149   /* save submatrix used in processor for next request */
2150   if (call ==  MAT_INITIAL_MATRIX) {
2151     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2152     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2153   }
2154 
2155   PetscFunctionReturn(0);
2156 }
2157 
2158 #undef __FUNCT__
2159 #define __FUNCT__ "MatPermute_MPIBAIJ"
2160 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2161 {
2162   MPI_Comm       comm,pcomm;
2163   PetscInt       first,local_size,nrows;
2164   const PetscInt *rows;
2165   PetscMPIInt    size;
2166   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
2167   PetscErrorCode ierr;
2168 
2169   PetscFunctionBegin;
2170   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2171   /* make a collective version of 'rowp' */
2172   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2173   if (pcomm==comm) {
2174     crowp = rowp;
2175   } else {
2176     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2177     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2178     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
2179     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2180   }
2181   /* collect the global row permutation and invert it */
2182   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
2183   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
2184   if (pcomm!=comm) {
2185     ierr = ISDestroy(crowp);CHKERRQ(ierr);
2186   }
2187   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
2188   /* get the local target indices */
2189   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
2190   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
2191   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
2192   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
2193   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
2194   ierr = ISDestroy(irowp);CHKERRQ(ierr);
2195   /* the column permutation is so much easier;
2196      make a local version of 'colp' and invert it */
2197   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2198   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2199   if (size==1) {
2200     lcolp = colp;
2201   } else {
2202     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
2203     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
2204     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
2205   }
2206   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2207   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
2208   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
2209   if (size>1) {
2210     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
2211     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
2212   }
2213   /* now we just get the submatrix */
2214   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2215   /* clean up */
2216   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
2217   ierr = ISDestroy(icolp);CHKERRQ(ierr);
2218   PetscFunctionReturn(0);
2219 }
2220 
2221 #undef __FUNCT__
2222 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2223 PetscErrorCode PETSCMAT_DLLEXPORT MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2224 {
2225   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*) mat->data;
2226   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
2227 
2228   PetscFunctionBegin;
2229   if (nghosts) { *nghosts = B->nbs;}
2230   if (ghosts) {*ghosts = baij->garray;}
2231   PetscFunctionReturn(0);
2232 }
2233 
2234 EXTERN PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat);
2235 
2236 #undef __FUNCT__
2237 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ"
2238 /*
2239     This routine is almost identical to MatFDColoringCreate_MPIBAIJ()!
2240 */
2241 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
2242 {
2243   Mat_MPIBAIJ            *baij = (Mat_MPIBAIJ*)mat->data;
2244   PetscErrorCode        ierr;
2245   PetscMPIInt           size,*ncolsonproc,*disp,nn;
2246   PetscInt              bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col;
2247   const PetscInt        *is;
2248   PetscInt              nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj;
2249   PetscInt              *rowhit,M,cstart,cend,colb;
2250   PetscInt              *columnsforrow,l;
2251   IS                    *isa;
2252   PetscTruth             done,flg;
2253   ISLocalToGlobalMapping map = mat->bmapping;
2254   PetscInt               *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype;
2255 
2256   PetscFunctionBegin;
2257   if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();");
2258   if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock");
2259 
2260   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
2261   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2262   M                = mat->rmap->n/bs;
2263   cstart           = mat->cmap->rstart/bs;
2264   cend             = mat->cmap->rend/bs;
2265   c->M             = mat->rmap->N/bs;  /* set the global rows and columns and local rows */
2266   c->N             = mat->cmap->N/bs;
2267   c->m             = mat->rmap->n/bs;
2268   c->rstart        = mat->rmap->rstart/bs;
2269 
2270   c->ncolors       = nis;
2271   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
2272   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
2273   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
2274   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
2275   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
2276   ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
2277 
2278   /* Allow access to data structures of local part of matrix */
2279   if (!baij->colmap) {
2280     ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
2281   }
2282   ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2283   ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2284 
2285   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
2286   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
2287 
2288   for (i=0; i<nis; i++) {
2289     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
2290     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
2291     c->ncolumns[i] = n;
2292     if (n) {
2293       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
2294       ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr);
2295       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
2296     } else {
2297       c->columns[i]  = 0;
2298     }
2299 
2300     if (ctype == IS_COLORING_GLOBAL){
2301       /* Determine the total (parallel) number of columns of this color */
2302       ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
2303       ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr);
2304 
2305       nn   = PetscMPIIntCast(n);
2306       ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2307       nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];}
2308       if (!nctot) {
2309         ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
2310       }
2311 
2312       disp[0] = 0;
2313       for (j=1; j<size; j++) {
2314         disp[j] = disp[j-1] + ncolsonproc[j-1];
2315       }
2316 
2317       /* Get complete list of columns for color on each processor */
2318       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2319       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2320       ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr);
2321     } else if (ctype == IS_COLORING_GHOSTED){
2322       /* Determine local number of columns of this color on this process, including ghost points */
2323       nctot = n;
2324       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2325       ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
2326     } else {
2327       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
2328     }
2329 
2330     /*
2331        Mark all rows affect by these columns
2332     */
2333     /* Temporary option to allow for debugging/testing */
2334     flg  = PETSC_FALSE;
2335     ierr = PetscOptionsGetTruth(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr);
2336     if (!flg) {/*-----------------------------------------------------------------------------*/
2337       /* crude, fast version */
2338       ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
2339       /* loop over columns*/
2340       for (j=0; j<nctot; j++) {
2341         if (ctype == IS_COLORING_GHOSTED) {
2342           col = ltog[cols[j]];
2343         } else {
2344           col  = cols[j];
2345         }
2346         if (col >= cstart && col < cend) {
2347           /* column is in diagonal block of matrix */
2348           rows = A_cj + A_ci[col-cstart];
2349           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2350         } else {
2351 #if defined (PETSC_USE_CTABLE)
2352           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2353 	  colb --;
2354 #else
2355           colb = baij->colmap[col] - 1;
2356 #endif
2357           if (colb == -1) {
2358             m = 0;
2359           } else {
2360             colb = colb/bs;
2361             rows = B_cj + B_ci[colb];
2362             m    = B_ci[colb+1] - B_ci[colb];
2363           }
2364         }
2365         /* loop over columns marking them in rowhit */
2366         for (k=0; k<m; k++) {
2367           rowhit[*rows++] = col + 1;
2368         }
2369       }
2370 
2371       /* count the number of hits */
2372       nrows = 0;
2373       for (j=0; j<M; j++) {
2374         if (rowhit[j]) nrows++;
2375       }
2376       c->nrows[i]         = nrows;
2377       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2378       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2379       ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2380       nrows = 0;
2381       for (j=0; j<M; j++) {
2382         if (rowhit[j]) {
2383           c->rows[i][nrows]           = j;
2384           c->columnsforrow[i][nrows] = rowhit[j] - 1;
2385           nrows++;
2386         }
2387       }
2388     } else {/*-------------------------------------------------------------------------------*/
2389       /* slow version, using rowhit as a linked list */
2390       PetscInt currentcol,fm,mfm;
2391       rowhit[M] = M;
2392       nrows     = 0;
2393       /* loop over columns*/
2394       for (j=0; j<nctot; j++) {
2395         if (ctype == IS_COLORING_GHOSTED) {
2396           col = ltog[cols[j]];
2397         } else {
2398           col  = cols[j];
2399         }
2400         if (col >= cstart && col < cend) {
2401           /* column is in diagonal block of matrix */
2402           rows = A_cj + A_ci[col-cstart];
2403           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2404         } else {
2405 #if defined (PETSC_USE_CTABLE)
2406           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2407           colb --;
2408 #else
2409           colb = baij->colmap[col] - 1;
2410 #endif
2411           if (colb == -1) {
2412             m = 0;
2413           } else {
2414             colb = colb/bs;
2415             rows = B_cj + B_ci[colb];
2416             m    = B_ci[colb+1] - B_ci[colb];
2417           }
2418         }
2419 
2420         /* loop over columns marking them in rowhit */
2421         fm    = M; /* fm points to first entry in linked list */
2422         for (k=0; k<m; k++) {
2423           currentcol = *rows++;
2424 	  /* is it already in the list? */
2425           do {
2426             mfm  = fm;
2427             fm   = rowhit[fm];
2428           } while (fm < currentcol);
2429           /* not in list so add it */
2430           if (fm != currentcol) {
2431             nrows++;
2432             columnsforrow[currentcol] = col;
2433             /* next three lines insert new entry into linked list */
2434             rowhit[mfm]               = currentcol;
2435             rowhit[currentcol]        = fm;
2436             fm                        = currentcol;
2437             /* fm points to present position in list since we know the columns are sorted */
2438           } else {
2439             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
2440           }
2441         }
2442       }
2443       c->nrows[i]         = nrows;
2444       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2445       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2446       ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2447       /* now store the linked list of rows into c->rows[i] */
2448       nrows = 0;
2449       fm    = rowhit[M];
2450       do {
2451         c->rows[i][nrows]            = fm;
2452         c->columnsforrow[i][nrows++] = columnsforrow[fm];
2453         fm                           = rowhit[fm];
2454       } while (fm < M);
2455     } /* ---------------------------------------------------------------------------------------*/
2456     ierr = PetscFree(cols);CHKERRQ(ierr);
2457   }
2458 
2459   /* Optimize by adding the vscale, and scaleforrow[][] fields */
2460   /*
2461        vscale will contain the "diagonal" on processor scalings followed by the off processor
2462   */
2463   if (ctype == IS_COLORING_GLOBAL) {
2464     PetscInt *garray;
2465     ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2466     for (i=0; i<baij->B->cmap->n/bs; i++) {
2467       for (j=0; j<bs; j++) {
2468         garray[i*bs+j] = bs*baij->garray[i]+j;
2469       }
2470     }
2471     ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr);
2472     ierr = PetscFree(garray);CHKERRQ(ierr);
2473     CHKMEMQ;
2474     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2475     for (k=0; k<c->ncolors; k++) {
2476       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2477       for (l=0; l<c->nrows[k]; l++) {
2478         col = c->columnsforrow[k][l];
2479         if (col >= cstart && col < cend) {
2480           /* column is in diagonal block of matrix */
2481           colb = col - cstart;
2482         } else {
2483           /* column  is in "off-processor" part */
2484 #if defined (PETSC_USE_CTABLE)
2485           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2486           colb --;
2487 #else
2488           colb = baij->colmap[col] - 1;
2489 #endif
2490           colb = colb/bs;
2491           colb += cend - cstart;
2492         }
2493         c->vscaleforrow[k][l] = colb;
2494       }
2495     }
2496   } else if (ctype == IS_COLORING_GHOSTED) {
2497     /* Get gtol mapping */
2498     PetscInt N = mat->cmap->N, *gtol;
2499     ierr = PetscMalloc((N+1)*sizeof(PetscInt),&gtol);CHKERRQ(ierr);
2500     for (i=0; i<N; i++) gtol[i] = -1;
2501     for (i=0; i<map->n; i++) gtol[ltog[i]] = i;
2502 
2503     c->vscale = 0; /* will be created in MatFDColoringApply() */
2504     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2505     for (k=0; k<c->ncolors; k++) {
2506       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2507       for (l=0; l<c->nrows[k]; l++) {
2508         col = c->columnsforrow[k][l];      /* global column index */
2509         c->vscaleforrow[k][l] = gtol[col]; /* local column index */
2510       }
2511     }
2512     ierr = PetscFree(gtol);CHKERRQ(ierr);
2513   }
2514   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
2515 
2516   ierr = PetscFree(rowhit);CHKERRQ(ierr);
2517   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
2518   ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2519   ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2520     CHKMEMQ;
2521   PetscFunctionReturn(0);
2522 }
2523 
2524 #undef __FUNCT__
2525 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ"
2526 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat)
2527 {
2528   Mat            B;
2529   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
2530   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2531   Mat_SeqAIJ     *b;
2532   PetscErrorCode ierr;
2533   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2534   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2535   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2536 
2537   PetscFunctionBegin;
2538   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2539   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2540 
2541   /* ----------------------------------------------------------------
2542      Tell every processor the number of nonzeros per row
2543   */
2544   ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
2545   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2546     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2547   }
2548   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2549   ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr);
2550   displs     = recvcounts + size;
2551   for (i=0; i<size; i++) {
2552     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2553     displs[i]     = A->rmap->range[i]/bs;
2554   }
2555 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2556   ierr  = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2557 #else
2558   ierr  = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2559 #endif
2560   /* ---------------------------------------------------------------
2561      Create the sequential matrix of the same type as the local block diagonal
2562   */
2563   ierr  = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2564   ierr  = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2565   ierr  = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2566   ierr  = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2567   b = (Mat_SeqAIJ *)B->data;
2568 
2569   /*--------------------------------------------------------------------
2570     Copy my part of matrix column indices over
2571   */
2572   sendcount  = ad->nz + bd->nz;
2573   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2574   a_jsendbuf = ad->j;
2575   b_jsendbuf = bd->j;
2576   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2577   cnt        = 0;
2578   for (i=0; i<n; i++) {
2579 
2580     /* put in lower diagonal portion */
2581     m = bd->i[i+1] - bd->i[i];
2582     while (m > 0) {
2583       /* is it above diagonal (in bd (compressed) numbering) */
2584       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2585       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2586       m--;
2587     }
2588 
2589     /* put in diagonal portion */
2590     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2591       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2592     }
2593 
2594     /* put in upper diagonal portion */
2595     while (m-- > 0) {
2596       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2597     }
2598   }
2599   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2600 
2601   /*--------------------------------------------------------------------
2602     Gather all column indices to all processors
2603   */
2604   for (i=0; i<size; i++) {
2605     recvcounts[i] = 0;
2606     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2607       recvcounts[i] += lens[j];
2608     }
2609   }
2610   displs[0]  = 0;
2611   for (i=1; i<size; i++) {
2612     displs[i] = displs[i-1] + recvcounts[i-1];
2613   }
2614 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2615   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2616 #else
2617   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2618 #endif
2619   /*--------------------------------------------------------------------
2620     Assemble the matrix into useable form (note numerical values not yet set)
2621   */
2622   /* set the b->ilen (length of each row) values */
2623   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2624   /* set the b->i indices */
2625   b->i[0] = 0;
2626   for (i=1; i<=A->rmap->N/bs; i++) {
2627     b->i[i] = b->i[i-1] + lens[i-1];
2628   }
2629   ierr = PetscFree(lens);CHKERRQ(ierr);
2630   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2631   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2632   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2633 
2634   if (A->symmetric){
2635     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2636   } else if (A->hermitian) {
2637     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2638   } else if (A->structurally_symmetric) {
2639     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2640   }
2641   *newmat = B;
2642   PetscFunctionReturn(0);
2643 }
2644 
2645 #undef __FUNCT__
2646 #define __FUNCT__ "MatSOR_MPIBAIJ"
2647 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2648 {
2649   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2650   PetscErrorCode ierr;
2651   Vec            bb1 = 0;
2652 
2653   PetscFunctionBegin;
2654   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2655     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2656   }
2657 
2658   if (flag == SOR_APPLY_UPPER) {
2659     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2660     PetscFunctionReturn(0);
2661   }
2662 
2663   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2664     if (flag & SOR_ZERO_INITIAL_GUESS) {
2665       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2666       its--;
2667     }
2668 
2669     while (its--) {
2670       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2671       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2672 
2673       /* update rhs: bb1 = bb - B*x */
2674       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2675       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2676 
2677       /* local sweep */
2678       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2679     }
2680   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
2681     if (flag & SOR_ZERO_INITIAL_GUESS) {
2682       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2683       its--;
2684     }
2685     while (its--) {
2686       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2687       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2688 
2689       /* update rhs: bb1 = bb - B*x */
2690       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2691       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2692 
2693       /* local sweep */
2694       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2695     }
2696   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
2697     if (flag & SOR_ZERO_INITIAL_GUESS) {
2698       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2699       its--;
2700     }
2701     while (its--) {
2702       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2703       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2704 
2705       /* update rhs: bb1 = bb - B*x */
2706       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2707       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2708 
2709       /* local sweep */
2710       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2711     }
2712   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2713 
2714   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
2715   PetscFunctionReturn(0);
2716 }
2717 
2718 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2719 
2720 
2721 /* -------------------------------------------------------------------*/
2722 static struct _MatOps MatOps_Values = {
2723        MatSetValues_MPIBAIJ,
2724        MatGetRow_MPIBAIJ,
2725        MatRestoreRow_MPIBAIJ,
2726        MatMult_MPIBAIJ,
2727 /* 4*/ MatMultAdd_MPIBAIJ,
2728        MatMultTranspose_MPIBAIJ,
2729        MatMultTransposeAdd_MPIBAIJ,
2730        0,
2731        0,
2732        0,
2733 /*10*/ 0,
2734        0,
2735        0,
2736        MatSOR_MPIBAIJ,
2737        MatTranspose_MPIBAIJ,
2738 /*15*/ MatGetInfo_MPIBAIJ,
2739        MatEqual_MPIBAIJ,
2740        MatGetDiagonal_MPIBAIJ,
2741        MatDiagonalScale_MPIBAIJ,
2742        MatNorm_MPIBAIJ,
2743 /*20*/ MatAssemblyBegin_MPIBAIJ,
2744        MatAssemblyEnd_MPIBAIJ,
2745        MatSetOption_MPIBAIJ,
2746        MatZeroEntries_MPIBAIJ,
2747 /*24*/ MatZeroRows_MPIBAIJ,
2748        0,
2749        0,
2750        0,
2751        0,
2752 /*29*/ MatSetUpPreallocation_MPIBAIJ,
2753        0,
2754        0,
2755        0,
2756        0,
2757 /*34*/ MatDuplicate_MPIBAIJ,
2758        0,
2759        0,
2760        0,
2761        0,
2762 /*39*/ MatAXPY_MPIBAIJ,
2763        MatGetSubMatrices_MPIBAIJ,
2764        MatIncreaseOverlap_MPIBAIJ,
2765        MatGetValues_MPIBAIJ,
2766        MatCopy_MPIBAIJ,
2767 /*44*/ 0,
2768        MatScale_MPIBAIJ,
2769        0,
2770        0,
2771        0,
2772 /*49*/ MatSetBlockSize_MPIBAIJ,
2773        0,
2774        0,
2775        0,
2776        0,
2777 /*54*/ MatFDColoringCreate_MPIBAIJ,
2778        0,
2779        MatSetUnfactored_MPIBAIJ,
2780        MatPermute_MPIBAIJ,
2781        MatSetValuesBlocked_MPIBAIJ,
2782 /*59*/ MatGetSubMatrix_MPIBAIJ,
2783        MatDestroy_MPIBAIJ,
2784        MatView_MPIBAIJ,
2785        0,
2786        0,
2787 /*64*/ 0,
2788        0,
2789        0,
2790        0,
2791        0,
2792 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2793        0,
2794        0,
2795        0,
2796        0,
2797 /*74*/ 0,
2798        MatFDColoringApply_BAIJ,
2799        0,
2800        0,
2801        0,
2802 /*79*/ 0,
2803        0,
2804        0,
2805        0,
2806        MatLoad_MPIBAIJ,
2807 /*84*/ 0,
2808        0,
2809        0,
2810        0,
2811        0,
2812 /*89*/ 0,
2813        0,
2814        0,
2815        0,
2816        0,
2817 /*94*/ 0,
2818        0,
2819        0,
2820        0,
2821        0,
2822 /*99*/ 0,
2823        0,
2824        0,
2825        0,
2826        0,
2827 /*104*/0,
2828        MatRealPart_MPIBAIJ,
2829        MatImaginaryPart_MPIBAIJ,
2830        0,
2831        0,
2832 /*109*/0,
2833        0,
2834        0,
2835        0,
2836        0,
2837 /*114*/MatGetSeqNonzerostructure_MPIBAIJ,
2838        0,
2839        MatGetGhosts_MPIBAIJ,
2840        0,
2841        0,
2842 /*119*/0,
2843        0,
2844        0,
2845        0
2846 };
2847 
2848 EXTERN_C_BEGIN
2849 #undef __FUNCT__
2850 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2851 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
2852 {
2853   PetscFunctionBegin;
2854   *a      = ((Mat_MPIBAIJ *)A->data)->A;
2855   *iscopy = PETSC_FALSE;
2856   PetscFunctionReturn(0);
2857 }
2858 EXTERN_C_END
2859 
2860 EXTERN_C_BEGIN
2861 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2862 EXTERN_C_END
2863 
2864 EXTERN_C_BEGIN
2865 #undef __FUNCT__
2866 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2867 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2868 {
2869   PetscInt       m,rstart,cstart,cend;
2870   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2871   const PetscInt *JJ=0;
2872   PetscScalar    *values=0;
2873   PetscErrorCode ierr;
2874 
2875   PetscFunctionBegin;
2876 
2877   if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2878   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2879   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2880   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2881   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2882   m      = B->rmap->n/bs;
2883   rstart = B->rmap->rstart/bs;
2884   cstart = B->cmap->rstart/bs;
2885   cend   = B->cmap->rend/bs;
2886 
2887   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2888   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
2889   for (i=0; i<m; i++) {
2890     nz = ii[i+1] - ii[i];
2891     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2892     nz_max = PetscMax(nz_max,nz);
2893     JJ  = jj + ii[i];
2894     for (j=0; j<nz; j++) {
2895       if (*JJ >= cstart) break;
2896       JJ++;
2897     }
2898     d = 0;
2899     for (; j<nz; j++) {
2900       if (*JJ++ >= cend) break;
2901       d++;
2902     }
2903     d_nnz[i] = d;
2904     o_nnz[i] = nz - d;
2905   }
2906   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2907   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2908 
2909   values = (PetscScalar*)V;
2910   if (!values) {
2911     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2912     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2913   }
2914   for (i=0; i<m; i++) {
2915     PetscInt          row    = i + rstart;
2916     PetscInt          ncols  = ii[i+1] - ii[i];
2917     const PetscInt    *icols = jj + ii[i];
2918     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2919     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2920   }
2921 
2922   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2923   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2924   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2925 
2926   PetscFunctionReturn(0);
2927 }
2928 EXTERN_C_END
2929 
2930 #undef __FUNCT__
2931 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2932 /*@C
2933    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2934    (the default parallel PETSc format).
2935 
2936    Collective on MPI_Comm
2937 
2938    Input Parameters:
2939 +  A - the matrix
2940 .  bs - the block size
2941 .  i - the indices into j for the start of each local row (starts with zero)
2942 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2943 -  v - optional values in the matrix
2944 
2945    Level: developer
2946 
2947 .keywords: matrix, aij, compressed row, sparse, parallel
2948 
2949 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2950 @*/
2951 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2952 {
2953   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
2954 
2955   PetscFunctionBegin;
2956   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2957   if (f) {
2958     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
2959   }
2960   PetscFunctionReturn(0);
2961 }
2962 
2963 EXTERN_C_BEGIN
2964 #undef __FUNCT__
2965 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2966 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2967 {
2968   Mat_MPIBAIJ    *b;
2969   PetscErrorCode ierr;
2970   PetscInt       i, newbs = PetscAbs(bs);
2971 
2972   PetscFunctionBegin;
2973   if (bs < 0) {
2974     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr);
2975       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2976     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2977     bs   = PetscAbs(bs);
2978   }
2979   if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
2980   bs = newbs;
2981 
2982 
2983   if (bs < 1) SETERRQ(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2984   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2985   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2986   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2987   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2988 
2989   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2990   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2991   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2992   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2993 
2994   if (d_nnz) {
2995     for (i=0; i<B->rmap->n/bs; i++) {
2996       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2997     }
2998   }
2999   if (o_nnz) {
3000     for (i=0; i<B->rmap->n/bs; i++) {
3001       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
3002     }
3003   }
3004 
3005   b = (Mat_MPIBAIJ*)B->data;
3006   b->bs2 = bs*bs;
3007   b->mbs = B->rmap->n/bs;
3008   b->nbs = B->cmap->n/bs;
3009   b->Mbs = B->rmap->N/bs;
3010   b->Nbs = B->cmap->N/bs;
3011 
3012   for (i=0; i<=b->size; i++) {
3013     b->rangebs[i] = B->rmap->range[i]/bs;
3014   }
3015   b->rstartbs = B->rmap->rstart/bs;
3016   b->rendbs   = B->rmap->rend/bs;
3017   b->cstartbs = B->cmap->rstart/bs;
3018   b->cendbs   = B->cmap->rend/bs;
3019 
3020   if (!B->preallocated) {
3021     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3022     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3023     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
3024     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3025     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3026     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3027     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
3028     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3029     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
3030   }
3031 
3032   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3033   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
3034   B->preallocated = PETSC_TRUE;
3035   PetscFunctionReturn(0);
3036 }
3037 EXTERN_C_END
3038 
3039 EXTERN_C_BEGIN
3040 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
3041 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
3042 EXTERN_C_END
3043 
3044 
3045 EXTERN_C_BEGIN
3046 #undef __FUNCT__
3047 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
3048 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj)
3049 {
3050   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
3051   PetscErrorCode ierr;
3052   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
3053   PetscInt       M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
3054   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
3055 
3056   PetscFunctionBegin;
3057   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr);
3058   ii[0] = 0;
3059   CHKMEMQ;
3060   for (i=0; i<M; i++) {
3061     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
3062     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
3063     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
3064     /* remove one from count of matrix has diagonal */
3065     for (j=id[i]; j<id[i+1]; j++) {
3066       if (jd[j] == i) {ii[i+1]--;break;}
3067     }
3068   CHKMEMQ;
3069   }
3070   ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3071   cnt = 0;
3072   for (i=0; i<M; i++) {
3073     for (j=io[i]; j<io[i+1]; j++) {
3074       if (garray[jo[j]] > rstart) break;
3075       jj[cnt++] = garray[jo[j]];
3076   CHKMEMQ;
3077     }
3078     for (k=id[i]; k<id[i+1]; k++) {
3079       if (jd[k] != i) {
3080         jj[cnt++] = rstart + jd[k];
3081   CHKMEMQ;
3082       }
3083     }
3084     for (;j<io[i+1]; j++) {
3085       jj[cnt++] = garray[jo[j]];
3086   CHKMEMQ;
3087     }
3088   }
3089   ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr);
3090   PetscFunctionReturn(0);
3091 }
3092 EXTERN_C_END
3093 
3094 EXTERN_C_BEGIN
3095 #if defined(PETSC_HAVE_MUMPS)
3096 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3097 #endif
3098 EXTERN_C_END
3099 
3100 /*MC
3101    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
3102 
3103    Options Database Keys:
3104 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
3105 . -mat_block_size <bs> - set the blocksize used to store the matrix
3106 - -mat_use_hash_table <fact>
3107 
3108   Level: beginner
3109 
3110 .seealso: MatCreateMPIBAIJ
3111 M*/
3112 
3113 EXTERN_C_BEGIN
3114 #undef __FUNCT__
3115 #define __FUNCT__ "MatCreate_MPIBAIJ"
3116 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B)
3117 {
3118   Mat_MPIBAIJ    *b;
3119   PetscErrorCode ierr;
3120   PetscTruth     flg;
3121 
3122   PetscFunctionBegin;
3123   ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
3124   B->data = (void*)b;
3125 
3126 
3127   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3128   B->mapping    = 0;
3129   B->assembled  = PETSC_FALSE;
3130 
3131   B->insertmode = NOT_SET_VALUES;
3132   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
3133   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
3134 
3135   /* build local table of row and column ownerships */
3136   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
3137 
3138   /* build cache for off array entries formed */
3139   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
3140   b->donotstash  = PETSC_FALSE;
3141   b->colmap      = PETSC_NULL;
3142   b->garray      = PETSC_NULL;
3143   b->roworiented = PETSC_TRUE;
3144 
3145   /* stuff used in block assembly */
3146   b->barray       = 0;
3147 
3148   /* stuff used for matrix vector multiply */
3149   b->lvec         = 0;
3150   b->Mvctx        = 0;
3151 
3152   /* stuff for MatGetRow() */
3153   b->rowindices   = 0;
3154   b->rowvalues    = 0;
3155   b->getrowactive = PETSC_FALSE;
3156 
3157   /* hash table stuff */
3158   b->ht           = 0;
3159   b->hd           = 0;
3160   b->ht_size      = 0;
3161   b->ht_flag      = PETSC_FALSE;
3162   b->ht_fact      = 0;
3163   b->ht_total_ct  = 0;
3164   b->ht_insert_ct = 0;
3165 
3166   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3167     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3168     if (flg) {
3169       PetscReal fact = 1.39;
3170       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3171       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
3172       if (fact <= 1.0) fact = 1.39;
3173       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3174       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3175     }
3176   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3177 
3178 #if defined(PETSC_HAVE_MUMPS)
3179   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr);
3180 #endif
3181   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",
3182                                      "MatConvert_MPIBAIJ_MPIAdj",
3183                                       MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3184   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3185                                      "MatStoreValues_MPIBAIJ",
3186                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3187   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3188                                      "MatRetrieveValues_MPIBAIJ",
3189                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3190   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3191                                      "MatGetDiagonalBlock_MPIBAIJ",
3192                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3193   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
3194                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
3195                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3196   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
3197 				     "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
3198 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3199   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3200                                      "MatDiagonalScaleLocal_MPIBAIJ",
3201                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3202   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
3203                                      "MatSetHashTableFactor_MPIBAIJ",
3204                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3205   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3206   PetscFunctionReturn(0);
3207 }
3208 EXTERN_C_END
3209 
3210 /*MC
3211    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3212 
3213    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3214    and MATMPIBAIJ otherwise.
3215 
3216    Options Database Keys:
3217 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3218 
3219   Level: beginner
3220 
3221 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3222 M*/
3223 
3224 EXTERN_C_BEGIN
3225 #undef __FUNCT__
3226 #define __FUNCT__ "MatCreate_BAIJ"
3227 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A)
3228 {
3229   PetscErrorCode ierr;
3230   PetscMPIInt    size;
3231 
3232   PetscFunctionBegin;
3233   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
3234   if (size == 1) {
3235     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
3236   } else {
3237     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
3238   }
3239   PetscFunctionReturn(0);
3240 }
3241 EXTERN_C_END
3242 
3243 #undef __FUNCT__
3244 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3245 /*@C
3246    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3247    (block compressed row).  For good matrix assembly performance
3248    the user should preallocate the matrix storage by setting the parameters
3249    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3250    performance can be increased by more than a factor of 50.
3251 
3252    Collective on Mat
3253 
3254    Input Parameters:
3255 +  A - the matrix
3256 .  bs   - size of blockk
3257 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3258            submatrix  (same for all local rows)
3259 .  d_nnz - array containing the number of block nonzeros in the various block rows
3260            of the in diagonal portion of the local (possibly different for each block
3261            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3262 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3263            submatrix (same for all local rows).
3264 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3265            off-diagonal portion of the local submatrix (possibly different for
3266            each block row) or PETSC_NULL.
3267 
3268    If the *_nnz parameter is given then the *_nz parameter is ignored
3269 
3270    Options Database Keys:
3271 +   -mat_block_size - size of the blocks to use
3272 -   -mat_use_hash_table <fact>
3273 
3274    Notes:
3275    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3276    than it must be used on all processors that share the object for that argument.
3277 
3278    Storage Information:
3279    For a square global matrix we define each processor's diagonal portion
3280    to be its local rows and the corresponding columns (a square submatrix);
3281    each processor's off-diagonal portion encompasses the remainder of the
3282    local matrix (a rectangular submatrix).
3283 
3284    The user can specify preallocated storage for the diagonal part of
3285    the local submatrix with either d_nz or d_nnz (not both).  Set
3286    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3287    memory allocation.  Likewise, specify preallocated storage for the
3288    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3289 
3290    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3291    the figure below we depict these three local rows and all columns (0-11).
3292 
3293 .vb
3294            0 1 2 3 4 5 6 7 8 9 10 11
3295           -------------------
3296    row 3  |  o o o d d d o o o o o o
3297    row 4  |  o o o d d d o o o o o o
3298    row 5  |  o o o d d d o o o o o o
3299           -------------------
3300 .ve
3301 
3302    Thus, any entries in the d locations are stored in the d (diagonal)
3303    submatrix, and any entries in the o locations are stored in the
3304    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3305    stored simply in the MATSEQBAIJ format for compressed row storage.
3306 
3307    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3308    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3309    In general, for PDE problems in which most nonzeros are near the diagonal,
3310    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3311    or you will get TERRIBLE performance; see the users' manual chapter on
3312    matrices.
3313 
3314    You can call MatGetInfo() to get information on how effective the preallocation was;
3315    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3316    You can also run with the option -info and look for messages with the string
3317    malloc in them to see if additional memory allocation was needed.
3318 
3319    Level: intermediate
3320 
3321 .keywords: matrix, block, aij, compressed row, sparse, parallel
3322 
3323 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
3324 @*/
3325 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3326 {
3327   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3328 
3329   PetscFunctionBegin;
3330   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3331   if (f) {
3332     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3333   }
3334   PetscFunctionReturn(0);
3335 }
3336 
3337 #undef __FUNCT__
3338 #define __FUNCT__ "MatCreateMPIBAIJ"
3339 /*@C
3340    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
3341    (block compressed row).  For good matrix assembly performance
3342    the user should preallocate the matrix storage by setting the parameters
3343    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3344    performance can be increased by more than a factor of 50.
3345 
3346    Collective on MPI_Comm
3347 
3348    Input Parameters:
3349 +  comm - MPI communicator
3350 .  bs   - size of blockk
3351 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3352            This value should be the same as the local size used in creating the
3353            y vector for the matrix-vector product y = Ax.
3354 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3355            This value should be the same as the local size used in creating the
3356            x vector for the matrix-vector product y = Ax.
3357 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3358 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3359 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3360            submatrix  (same for all local rows)
3361 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3362            of the in diagonal portion of the local (possibly different for each block
3363            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3364 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3365            submatrix (same for all local rows).
3366 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3367            off-diagonal portion of the local submatrix (possibly different for
3368            each block row) or PETSC_NULL.
3369 
3370    Output Parameter:
3371 .  A - the matrix
3372 
3373    Options Database Keys:
3374 +   -mat_block_size - size of the blocks to use
3375 -   -mat_use_hash_table <fact>
3376 
3377    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3378    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3379    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3380 
3381    Notes:
3382    If the *_nnz parameter is given then the *_nz parameter is ignored
3383 
3384    A nonzero block is any block that as 1 or more nonzeros in it
3385 
3386    The user MUST specify either the local or global matrix dimensions
3387    (possibly both).
3388 
3389    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3390    than it must be used on all processors that share the object for that argument.
3391 
3392    Storage Information:
3393    For a square global matrix we define each processor's diagonal portion
3394    to be its local rows and the corresponding columns (a square submatrix);
3395    each processor's off-diagonal portion encompasses the remainder of the
3396    local matrix (a rectangular submatrix).
3397 
3398    The user can specify preallocated storage for the diagonal part of
3399    the local submatrix with either d_nz or d_nnz (not both).  Set
3400    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3401    memory allocation.  Likewise, specify preallocated storage for the
3402    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3403 
3404    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3405    the figure below we depict these three local rows and all columns (0-11).
3406 
3407 .vb
3408            0 1 2 3 4 5 6 7 8 9 10 11
3409           -------------------
3410    row 3  |  o o o d d d o o o o o o
3411    row 4  |  o o o d d d o o o o o o
3412    row 5  |  o o o d d d o o o o o o
3413           -------------------
3414 .ve
3415 
3416    Thus, any entries in the d locations are stored in the d (diagonal)
3417    submatrix, and any entries in the o locations are stored in the
3418    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3419    stored simply in the MATSEQBAIJ format for compressed row storage.
3420 
3421    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3422    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3423    In general, for PDE problems in which most nonzeros are near the diagonal,
3424    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3425    or you will get TERRIBLE performance; see the users' manual chapter on
3426    matrices.
3427 
3428    Level: intermediate
3429 
3430 .keywords: matrix, block, aij, compressed row, sparse, parallel
3431 
3432 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3433 @*/
3434 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3435 {
3436   PetscErrorCode ierr;
3437   PetscMPIInt    size;
3438 
3439   PetscFunctionBegin;
3440   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3441   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3442   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3443   if (size > 1) {
3444     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3445     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3446   } else {
3447     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3448     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3449   }
3450   PetscFunctionReturn(0);
3451 }
3452 
3453 #undef __FUNCT__
3454 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3455 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3456 {
3457   Mat            mat;
3458   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3459   PetscErrorCode ierr;
3460   PetscInt       len=0;
3461 
3462   PetscFunctionBegin;
3463   *newmat       = 0;
3464   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3465   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3466   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3467   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3468 
3469   mat->factortype   = matin->factortype;
3470   mat->preallocated = PETSC_TRUE;
3471   mat->assembled    = PETSC_TRUE;
3472   mat->insertmode   = NOT_SET_VALUES;
3473 
3474   a      = (Mat_MPIBAIJ*)mat->data;
3475   mat->rmap->bs  = matin->rmap->bs;
3476   a->bs2   = oldmat->bs2;
3477   a->mbs   = oldmat->mbs;
3478   a->nbs   = oldmat->nbs;
3479   a->Mbs   = oldmat->Mbs;
3480   a->Nbs   = oldmat->Nbs;
3481 
3482   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3483   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3484 
3485   a->size         = oldmat->size;
3486   a->rank         = oldmat->rank;
3487   a->donotstash   = oldmat->donotstash;
3488   a->roworiented  = oldmat->roworiented;
3489   a->rowindices   = 0;
3490   a->rowvalues    = 0;
3491   a->getrowactive = PETSC_FALSE;
3492   a->barray       = 0;
3493   a->rstartbs     = oldmat->rstartbs;
3494   a->rendbs       = oldmat->rendbs;
3495   a->cstartbs     = oldmat->cstartbs;
3496   a->cendbs       = oldmat->cendbs;
3497 
3498   /* hash table stuff */
3499   a->ht           = 0;
3500   a->hd           = 0;
3501   a->ht_size      = 0;
3502   a->ht_flag      = oldmat->ht_flag;
3503   a->ht_fact      = oldmat->ht_fact;
3504   a->ht_total_ct  = 0;
3505   a->ht_insert_ct = 0;
3506 
3507   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3508   if (oldmat->colmap) {
3509 #if defined (PETSC_USE_CTABLE)
3510   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3511 #else
3512   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3513   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3514   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3515 #endif
3516   } else a->colmap = 0;
3517 
3518   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3519     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3520     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3521     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3522   } else a->garray = 0;
3523 
3524   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3525   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3526   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3527   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3528   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3529 
3530   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3531   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3532   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3533   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3534   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3535   *newmat = mat;
3536 
3537   PetscFunctionReturn(0);
3538 }
3539 
3540 #undef __FUNCT__
3541 #define __FUNCT__ "MatLoad_MPIBAIJ"
3542 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3543 {
3544   PetscErrorCode ierr;
3545   int            fd;
3546   PetscInt       i,nz,j,rstart,rend;
3547   PetscScalar    *vals,*buf;
3548   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3549   MPI_Status     status;
3550   PetscMPIInt    rank,size,maxnz;
3551   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3552   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
3553   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3554   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
3555   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
3556   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3557 
3558   PetscFunctionBegin;
3559   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3560     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3561   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3562 
3563   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3564   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3565   if (!rank) {
3566     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3567     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3568     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3569   }
3570 
3571   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3572 
3573   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3574   M = header[1]; N = header[2];
3575 
3576   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3577   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3578   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3579 
3580   /* If global sizes are set, check if they are consistent with that given in the file */
3581   if (sizesset) {
3582     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3583   }
3584   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3585   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3586 
3587   if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices");
3588 
3589   /*
3590      This code adds extra rows to make sure the number of rows is
3591      divisible by the blocksize
3592   */
3593   Mbs        = M/bs;
3594   extra_rows = bs - M + bs*Mbs;
3595   if (extra_rows == bs) extra_rows = 0;
3596   else                  Mbs++;
3597   if (extra_rows && !rank) {
3598     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3599   }
3600 
3601   /* determine ownership of all rows */
3602   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3603     mbs        = Mbs/size + ((Mbs % size) > rank);
3604     m          = mbs*bs;
3605   } else { /* User set */
3606     m          = newmat->rmap->n;
3607     mbs        = m/bs;
3608   }
3609   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
3610   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3611 
3612   /* process 0 needs enough room for process with most rows */
3613   if (!rank) {
3614     mmax = rowners[1];
3615     for (i=2; i<size; i++) {
3616       mmax = PetscMax(mmax,rowners[i]);
3617     }
3618     mmax*=bs;
3619   } else mmax = m;
3620 
3621   rowners[0] = 0;
3622   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
3623   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
3624   rstart = rowners[rank];
3625   rend   = rowners[rank+1];
3626 
3627   /* distribute row lengths to all processors */
3628   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
3629   if (!rank) {
3630     mend = m;
3631     if (size == 1) mend = mend - extra_rows;
3632     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3633     for (j=mend; j<m; j++) locrowlens[j] = 1;
3634     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3635     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3636     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3637     for (j=0; j<m; j++) {
3638       procsnz[0] += locrowlens[j];
3639     }
3640     for (i=1; i<size; i++) {
3641       mend = browners[i+1] - browners[i];
3642       if (i == size-1) mend = mend - extra_rows;
3643       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3644       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3645       /* calculate the number of nonzeros on each processor */
3646       for (j=0; j<browners[i+1]-browners[i]; j++) {
3647         procsnz[i] += rowlengths[j];
3648       }
3649       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3650     }
3651     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3652   } else {
3653     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3654   }
3655 
3656   if (!rank) {
3657     /* determine max buffer needed and allocate it */
3658     maxnz = procsnz[0];
3659     for (i=1; i<size; i++) {
3660       maxnz = PetscMax(maxnz,procsnz[i]);
3661     }
3662     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3663 
3664     /* read in my part of the matrix column indices  */
3665     nz     = procsnz[0];
3666     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3667     mycols = ibuf;
3668     if (size == 1)  nz -= extra_rows;
3669     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3670     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
3671 
3672     /* read in every ones (except the last) and ship off */
3673     for (i=1; i<size-1; i++) {
3674       nz   = procsnz[i];
3675       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3676       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3677     }
3678     /* read in the stuff for the last proc */
3679     if (size != 1) {
3680       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3681       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3682       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3683       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3684     }
3685     ierr = PetscFree(cols);CHKERRQ(ierr);
3686   } else {
3687     /* determine buffer space needed for message */
3688     nz = 0;
3689     for (i=0; i<m; i++) {
3690       nz += locrowlens[i];
3691     }
3692     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3693     mycols = ibuf;
3694     /* receive message of column indices*/
3695     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3696     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3697     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3698   }
3699 
3700   /* loop over local rows, determining number of off diagonal entries */
3701   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
3702   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
3703   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3704   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3705   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3706   rowcount = 0; nzcount = 0;
3707   for (i=0; i<mbs; i++) {
3708     dcount  = 0;
3709     odcount = 0;
3710     for (j=0; j<bs; j++) {
3711       kmax = locrowlens[rowcount];
3712       for (k=0; k<kmax; k++) {
3713         tmp = mycols[nzcount++]/bs;
3714         if (!mask[tmp]) {
3715           mask[tmp] = 1;
3716           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3717           else masked1[dcount++] = tmp;
3718         }
3719       }
3720       rowcount++;
3721     }
3722 
3723     dlens[i]  = dcount;
3724     odlens[i] = odcount;
3725 
3726     /* zero out the mask elements we set */
3727     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3728     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3729   }
3730 
3731 
3732   if (!sizesset) {
3733     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3734   }
3735   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3736 
3737   if (!rank) {
3738     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3739     /* read in my part of the matrix numerical values  */
3740     nz = procsnz[0];
3741     vals = buf;
3742     mycols = ibuf;
3743     if (size == 1)  nz -= extra_rows;
3744     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3745     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
3746 
3747     /* insert into matrix */
3748     jj      = rstart*bs;
3749     for (i=0; i<m; i++) {
3750       ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3751       mycols += locrowlens[i];
3752       vals   += locrowlens[i];
3753       jj++;
3754     }
3755     /* read in other processors (except the last one) and ship out */
3756     for (i=1; i<size-1; i++) {
3757       nz   = procsnz[i];
3758       vals = buf;
3759       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3760       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3761     }
3762     /* the last proc */
3763     if (size != 1){
3764       nz   = procsnz[i] - extra_rows;
3765       vals = buf;
3766       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3767       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3768       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3769     }
3770     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3771   } else {
3772     /* receive numeric values */
3773     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3774 
3775     /* receive message of values*/
3776     vals   = buf;
3777     mycols = ibuf;
3778     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
3779     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
3780     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3781 
3782     /* insert into matrix */
3783     jj      = rstart*bs;
3784     for (i=0; i<m; i++) {
3785       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3786       mycols += locrowlens[i];
3787       vals   += locrowlens[i];
3788       jj++;
3789     }
3790   }
3791   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3792   ierr = PetscFree(buf);CHKERRQ(ierr);
3793   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3794   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3795   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3796   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3797   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3798   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3799 
3800   PetscFunctionReturn(0);
3801 }
3802 
3803 #undef __FUNCT__
3804 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3805 /*@
3806    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3807 
3808    Input Parameters:
3809 .  mat  - the matrix
3810 .  fact - factor
3811 
3812    Not Collective, each process can use a different factor
3813 
3814    Level: advanced
3815 
3816   Notes:
3817    This can also be set by the command line option: -mat_use_hash_table <fact>
3818 
3819 .keywords: matrix, hashtable, factor, HT
3820 
3821 .seealso: MatSetOption()
3822 @*/
3823 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3824 {
3825   PetscErrorCode ierr,(*f)(Mat,PetscReal);
3826 
3827   PetscFunctionBegin;
3828   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
3829   if (f) {
3830     ierr = (*f)(mat,fact);CHKERRQ(ierr);
3831   }
3832   PetscFunctionReturn(0);
3833 }
3834 
3835 EXTERN_C_BEGIN
3836 #undef __FUNCT__
3837 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3838 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3839 {
3840   Mat_MPIBAIJ *baij;
3841 
3842   PetscFunctionBegin;
3843   baij = (Mat_MPIBAIJ*)mat->data;
3844   baij->ht_fact = fact;
3845   PetscFunctionReturn(0);
3846 }
3847 EXTERN_C_END
3848 
3849 #undef __FUNCT__
3850 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3851 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3852 {
3853   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3854   PetscFunctionBegin;
3855   *Ad     = a->A;
3856   *Ao     = a->B;
3857   *colmap = a->garray;
3858   PetscFunctionReturn(0);
3859 }
3860 
3861 /*
3862     Special version for direct calls from Fortran (to eliminate two function call overheads
3863 */
3864 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3865 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3866 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3867 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3868 #endif
3869 
3870 #undef __FUNCT__
3871 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3872 /*@C
3873   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3874 
3875   Collective on Mat
3876 
3877   Input Parameters:
3878 + mat - the matrix
3879 . min - number of input rows
3880 . im - input rows
3881 . nin - number of input columns
3882 . in - input columns
3883 . v - numerical values input
3884 - addvin - INSERT_VALUES or ADD_VALUES
3885 
3886   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3887 
3888   Level: advanced
3889 
3890 .seealso:   MatSetValuesBlocked()
3891 @*/
3892 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3893 {
3894   /* convert input arguments to C version */
3895   Mat             mat = *matin;
3896   PetscInt        m = *min, n = *nin;
3897   InsertMode      addv = *addvin;
3898 
3899   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3900   const MatScalar *value;
3901   MatScalar       *barray=baij->barray;
3902   PetscTruth      roworiented = baij->roworiented;
3903   PetscErrorCode  ierr;
3904   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3905   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3906   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3907 
3908   PetscFunctionBegin;
3909   /* tasks normally handled by MatSetValuesBlocked() */
3910   if (mat->insertmode == NOT_SET_VALUES) {
3911     mat->insertmode = addv;
3912   }
3913 #if defined(PETSC_USE_DEBUG)
3914   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3915   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3916 #endif
3917   if (mat->assembled) {
3918     mat->was_assembled = PETSC_TRUE;
3919     mat->assembled     = PETSC_FALSE;
3920   }
3921   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3922 
3923 
3924   if(!barray) {
3925     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
3926     baij->barray = barray;
3927   }
3928 
3929   if (roworiented) {
3930     stepval = (n-1)*bs;
3931   } else {
3932     stepval = (m-1)*bs;
3933   }
3934   for (i=0; i<m; i++) {
3935     if (im[i] < 0) continue;
3936 #if defined(PETSC_USE_DEBUG)
3937     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3938 #endif
3939     if (im[i] >= rstart && im[i] < rend) {
3940       row = im[i] - rstart;
3941       for (j=0; j<n; j++) {
3942         /* If NumCol = 1 then a copy is not required */
3943         if ((roworiented) && (n == 1)) {
3944           barray = (MatScalar*)v + i*bs2;
3945         } else if((!roworiented) && (m == 1)) {
3946           barray = (MatScalar*)v + j*bs2;
3947         } else { /* Here a copy is required */
3948           if (roworiented) {
3949             value = v + i*(stepval+bs)*bs + j*bs;
3950           } else {
3951             value = v + j*(stepval+bs)*bs + i*bs;
3952           }
3953           for (ii=0; ii<bs; ii++,value+=stepval) {
3954             for (jj=0; jj<bs; jj++) {
3955               *barray++  = *value++;
3956             }
3957           }
3958           barray -=bs2;
3959         }
3960 
3961         if (in[j] >= cstart && in[j] < cend){
3962           col  = in[j] - cstart;
3963           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3964         }
3965         else if (in[j] < 0) continue;
3966 #if defined(PETSC_USE_DEBUG)
3967         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3968 #endif
3969         else {
3970           if (mat->was_assembled) {
3971             if (!baij->colmap) {
3972               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3973             }
3974 
3975 #if defined(PETSC_USE_DEBUG)
3976 #if defined (PETSC_USE_CTABLE)
3977             { PetscInt data;
3978               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3979               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3980             }
3981 #else
3982             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3983 #endif
3984 #endif
3985 #if defined (PETSC_USE_CTABLE)
3986 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3987             col  = (col - 1)/bs;
3988 #else
3989             col = (baij->colmap[in[j]] - 1)/bs;
3990 #endif
3991             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3992               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3993               col =  in[j];
3994             }
3995           }
3996           else col = in[j];
3997           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3998         }
3999       }
4000     } else {
4001       if (!baij->donotstash) {
4002         if (roworiented) {
4003           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4004         } else {
4005           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4006         }
4007       }
4008     }
4009   }
4010 
4011   /* task normally handled by MatSetValuesBlocked() */
4012   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
4013   PetscFunctionReturn(0);
4014 }
4015 
4016 #undef __FUNCT__
4017 #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
4018 /*@
4019      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
4020          CSR format the local rows.
4021 
4022    Collective on MPI_Comm
4023 
4024    Input Parameters:
4025 +  comm - MPI communicator
4026 .  bs - the block size, only a block size of 1 is supported
4027 .  m - number of local rows (Cannot be PETSC_DECIDE)
4028 .  n - This value should be the same as the local size used in creating the
4029        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4030        calculated if N is given) For square matrices n is almost always m.
4031 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4032 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4033 .   i - row indices
4034 .   j - column indices
4035 -   a - matrix values
4036 
4037    Output Parameter:
4038 .   mat - the matrix
4039 
4040    Level: intermediate
4041 
4042    Notes:
4043        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4044      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4045      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4046 
4047        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4048 
4049 .keywords: matrix, aij, compressed row, sparse, parallel
4050 
4051 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4052           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
4053 @*/
4054 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4055 {
4056   PetscErrorCode ierr;
4057 
4058 
4059  PetscFunctionBegin;
4060   if (i[0]) {
4061     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4062   }
4063   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4064   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4065   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4066   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
4067   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
4068   PetscFunctionReturn(0);
4069 }
4070