xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 660746e022788435371e19f9203f027a146bcb3a)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4 #include "petscblaslapack.h"
5 
6 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
7 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
8 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
9 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
10 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
11 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
12 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
13 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
14 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
15 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar);
16 
17 #undef __FUNCT__
18 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
19 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
20 {
21   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
22   PetscErrorCode ierr;
23   PetscInt       i,*idxb = 0;
24   PetscScalar    *va,*vb;
25   Vec            vtmp;
26 
27   PetscFunctionBegin;
28   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
29   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
30   if (idx) {
31     for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;}
32   }
33 
34   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
35   if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
36   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
37   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
38 
39   for (i=0; i<A->rmap->n; i++){
40     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);}
41   }
42 
43   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
44   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
45   if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);}
46   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
47   PetscFunctionReturn(0);
48 }
49 
50 EXTERN_C_BEGIN
51 #undef __FUNCT__
52 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
53 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat)
54 {
55   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
56   PetscErrorCode ierr;
57 
58   PetscFunctionBegin;
59   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
60   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
61   PetscFunctionReturn(0);
62 }
63 EXTERN_C_END
64 
65 EXTERN_C_BEGIN
66 #undef __FUNCT__
67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
68 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat)
69 {
70   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
71   PetscErrorCode ierr;
72 
73   PetscFunctionBegin;
74   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
75   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
76   PetscFunctionReturn(0);
77 }
78 EXTERN_C_END
79 
80 /*
81      Local utility routine that creates a mapping from the global column
82    number to the local number in the off-diagonal part of the local
83    storage of the matrix.  This is done in a non scalable way since the
84    length of colmap equals the global matrix length.
85 */
86 #undef __FUNCT__
87 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
88 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
89 {
90   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
91   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
92   PetscErrorCode ierr;
93   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
94 
95   PetscFunctionBegin;
96 #if defined (PETSC_USE_CTABLE)
97   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
98   for (i=0; i<nbs; i++){
99     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
100   }
101 #else
102   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
103   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
104   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
105   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
106 #endif
107   PetscFunctionReturn(0);
108 }
109 
110 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
111 { \
112  \
113     brow = row/bs;  \
114     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
115     rmax = aimax[brow]; nrow = ailen[brow]; \
116       bcol = col/bs; \
117       ridx = row % bs; cidx = col % bs; \
118       low = 0; high = nrow; \
119       while (high-low > 3) { \
120         t = (low+high)/2; \
121         if (rp[t] > bcol) high = t; \
122         else              low  = t; \
123       } \
124       for (_i=low; _i<high; _i++) { \
125         if (rp[_i] > bcol) break; \
126         if (rp[_i] == bcol) { \
127           bap  = ap +  bs2*_i + bs*cidx + ridx; \
128           if (addv == ADD_VALUES) *bap += value;  \
129           else                    *bap  = value;  \
130           goto a_noinsert; \
131         } \
132       } \
133       if (a->nonew == 1) goto a_noinsert; \
134       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
135       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
136       N = nrow++ - 1;  \
137       /* shift up all the later entries in this row */ \
138       for (ii=N; ii>=_i; ii--) { \
139         rp[ii+1] = rp[ii]; \
140         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
141       } \
142       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
143       rp[_i]                      = bcol;  \
144       ap[bs2*_i + bs*cidx + ridx] = value;  \
145       a_noinsert:; \
146     ailen[brow] = nrow; \
147 }
148 
149 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
150 { \
151     brow = row/bs;  \
152     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
153     rmax = bimax[brow]; nrow = bilen[brow]; \
154       bcol = col/bs; \
155       ridx = row % bs; cidx = col % bs; \
156       low = 0; high = nrow; \
157       while (high-low > 3) { \
158         t = (low+high)/2; \
159         if (rp[t] > bcol) high = t; \
160         else              low  = t; \
161       } \
162       for (_i=low; _i<high; _i++) { \
163         if (rp[_i] > bcol) break; \
164         if (rp[_i] == bcol) { \
165           bap  = ap +  bs2*_i + bs*cidx + ridx; \
166           if (addv == ADD_VALUES) *bap += value;  \
167           else                    *bap  = value;  \
168           goto b_noinsert; \
169         } \
170       } \
171       if (b->nonew == 1) goto b_noinsert; \
172       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
173       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
174       CHKMEMQ;\
175       N = nrow++ - 1;  \
176       /* shift up all the later entries in this row */ \
177       for (ii=N; ii>=_i; ii--) { \
178         rp[ii+1] = rp[ii]; \
179         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
180       } \
181       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
182       rp[_i]                      = bcol;  \
183       ap[bs2*_i + bs*cidx + ridx] = value;  \
184       b_noinsert:; \
185     bilen[brow] = nrow; \
186 }
187 
188 #undef __FUNCT__
189 #define __FUNCT__ "MatSetValues_MPIBAIJ"
190 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
191 {
192   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
193   MatScalar      value;
194   PetscTruth     roworiented = baij->roworiented;
195   PetscErrorCode ierr;
196   PetscInt       i,j,row,col;
197   PetscInt       rstart_orig=mat->rmap->rstart;
198   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
199   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
200 
201   /* Some Variables required in the macro */
202   Mat            A = baij->A;
203   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
204   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
205   MatScalar      *aa=a->a;
206 
207   Mat            B = baij->B;
208   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
209   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
210   MatScalar      *ba=b->a;
211 
212   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
213   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
214   MatScalar      *ap,*bap;
215 
216   PetscFunctionBegin;
217   if (v) PetscValidScalarPointer(v,6);
218   for (i=0; i<m; i++) {
219     if (im[i] < 0) continue;
220 #if defined(PETSC_USE_DEBUG)
221     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
222 #endif
223     if (im[i] >= rstart_orig && im[i] < rend_orig) {
224       row = im[i] - rstart_orig;
225       for (j=0; j<n; j++) {
226         if (in[j] >= cstart_orig && in[j] < cend_orig){
227           col = in[j] - cstart_orig;
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
230           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
231         } else if (in[j] < 0) continue;
232 #if defined(PETSC_USE_DEBUG)
233         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
234 #endif
235         else {
236           if (mat->was_assembled) {
237             if (!baij->colmap) {
238               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
239             }
240 #if defined (PETSC_USE_CTABLE)
241             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
242             col  = col - 1;
243 #else
244             col = baij->colmap[in[j]/bs] - 1;
245 #endif
246             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
247               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
248               col =  in[j];
249               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
250               B = baij->B;
251               b = (Mat_SeqBAIJ*)(B)->data;
252               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
253               ba=b->a;
254             } else col += in[j]%bs;
255           } else col = in[j];
256           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
257           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
258           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
259         }
260       }
261     } else {
262       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
263       if (!baij->donotstash) {
264         if (roworiented) {
265           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
266         } else {
267           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
268         }
269       }
270     }
271   }
272   PetscFunctionReturn(0);
273 }
274 
275 #undef __FUNCT__
276 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
277 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
278 {
279   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
280   const PetscScalar *value;
281   MatScalar         *barray=baij->barray;
282   PetscTruth        roworiented = baij->roworiented;
283   PetscErrorCode    ierr;
284   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
285   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
286   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
287 
288   PetscFunctionBegin;
289   if(!barray) {
290     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
291     baij->barray = barray;
292   }
293 
294   if (roworiented) {
295     stepval = (n-1)*bs;
296   } else {
297     stepval = (m-1)*bs;
298   }
299   for (i=0; i<m; i++) {
300     if (im[i] < 0) continue;
301 #if defined(PETSC_USE_DEBUG)
302     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
303 #endif
304     if (im[i] >= rstart && im[i] < rend) {
305       row = im[i] - rstart;
306       for (j=0; j<n; j++) {
307         /* If NumCol = 1 then a copy is not required */
308         if ((roworiented) && (n == 1)) {
309           barray = (MatScalar*)v + i*bs2;
310         } else if((!roworiented) && (m == 1)) {
311           barray = (MatScalar*)v + j*bs2;
312         } else { /* Here a copy is required */
313           if (roworiented) {
314             value = v + (i*(stepval+bs) + j)*bs;
315           } else {
316 	    value = v + (j*(stepval+bs) + i)*bs;
317           }
318           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
319             for (jj=0; jj<bs; jj++) {
320               barray[jj]  = value[jj];
321             }
322             barray += bs;
323           }
324           barray -= bs2;
325         }
326 
327         if (in[j] >= cstart && in[j] < cend){
328           col  = in[j] - cstart;
329           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
330         }
331         else if (in[j] < 0) continue;
332 #if defined(PETSC_USE_DEBUG)
333         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
334 #endif
335         else {
336           if (mat->was_assembled) {
337             if (!baij->colmap) {
338               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
339             }
340 
341 #if defined(PETSC_USE_DEBUG)
342 #if defined (PETSC_USE_CTABLE)
343             { PetscInt data;
344               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
345               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
346             }
347 #else
348             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
349 #endif
350 #endif
351 #if defined (PETSC_USE_CTABLE)
352 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
353             col  = (col - 1)/bs;
354 #else
355             col = (baij->colmap[in[j]] - 1)/bs;
356 #endif
357             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
358               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
359               col =  in[j];
360             }
361           }
362           else col = in[j];
363           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
364         }
365       }
366     } else {
367       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
368       if (!baij->donotstash) {
369         if (roworiented) {
370           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
371         } else {
372           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
373         }
374       }
375     }
376   }
377   PetscFunctionReturn(0);
378 }
379 
380 #define HASH_KEY 0.6180339887
381 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
382 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
383 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
384 #undef __FUNCT__
385 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
386 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
387 {
388   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
389   PetscTruth     roworiented = baij->roworiented;
390   PetscErrorCode ierr;
391   PetscInt       i,j,row,col;
392   PetscInt       rstart_orig=mat->rmap->rstart;
393   PetscInt       rend_orig=mat->rmap->rend,Nbs=baij->Nbs;
394   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
395   PetscReal      tmp;
396   MatScalar      **HD = baij->hd,value;
397 #if defined(PETSC_USE_DEBUG)
398   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
399 #endif
400 
401   PetscFunctionBegin;
402   if (v) PetscValidScalarPointer(v,6);
403   for (i=0; i<m; i++) {
404 #if defined(PETSC_USE_DEBUG)
405     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
406     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
407 #endif
408       row = im[i];
409     if (row >= rstart_orig && row < rend_orig) {
410       for (j=0; j<n; j++) {
411         col = in[j];
412         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
413         /* Look up PetscInto the Hash Table */
414         key = (row/bs)*Nbs+(col/bs)+1;
415         h1  = HASH(size,key,tmp);
416 
417 
418         idx = h1;
419 #if defined(PETSC_USE_DEBUG)
420         insert_ct++;
421         total_ct++;
422         if (HT[idx] != key) {
423           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
424           if (idx == size) {
425             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
426             if (idx == h1) {
427               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
428             }
429           }
430         }
431 #else
432         if (HT[idx] != key) {
433           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
434           if (idx == size) {
435             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
436             if (idx == h1) {
437               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
438             }
439           }
440         }
441 #endif
442         /* A HASH table entry is found, so insert the values at the correct address */
443         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
444         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
445       }
446     } else {
447       if (!baij->donotstash) {
448         if (roworiented) {
449           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
450         } else {
451           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
452         }
453       }
454     }
455   }
456 #if defined(PETSC_USE_DEBUG)
457   baij->ht_total_ct = total_ct;
458   baij->ht_insert_ct = insert_ct;
459 #endif
460   PetscFunctionReturn(0);
461 }
462 
463 #undef __FUNCT__
464 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
465 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
466 {
467   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
468   PetscTruth        roworiented = baij->roworiented;
469   PetscErrorCode    ierr;
470   PetscInt          i,j,ii,jj,row,col;
471   PetscInt          rstart=baij->rstartbs;
472   PetscInt          rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
473   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
474   PetscReal         tmp;
475   MatScalar         **HD = baij->hd,*baij_a;
476   const PetscScalar *v_t,*value;
477 #if defined(PETSC_USE_DEBUG)
478   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
479 #endif
480 
481   PetscFunctionBegin;
482 
483   if (roworiented) {
484     stepval = (n-1)*bs;
485   } else {
486     stepval = (m-1)*bs;
487   }
488   for (i=0; i<m; i++) {
489 #if defined(PETSC_USE_DEBUG)
490     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
491     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
492 #endif
493     row   = im[i];
494     v_t   = v + i*nbs2;
495     if (row >= rstart && row < rend) {
496       for (j=0; j<n; j++) {
497         col = in[j];
498 
499         /* Look up into the Hash Table */
500         key = row*Nbs+col+1;
501         h1  = HASH(size,key,tmp);
502 
503         idx = h1;
504 #if defined(PETSC_USE_DEBUG)
505         total_ct++;
506         insert_ct++;
507        if (HT[idx] != key) {
508           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
509           if (idx == size) {
510             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
511             if (idx == h1) {
512               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
513             }
514           }
515         }
516 #else
517         if (HT[idx] != key) {
518           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
519           if (idx == size) {
520             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
521             if (idx == h1) {
522               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
523             }
524           }
525         }
526 #endif
527         baij_a = HD[idx];
528         if (roworiented) {
529           /*value = v + i*(stepval+bs)*bs + j*bs;*/
530           /* value = v + (i*(stepval+bs)+j)*bs; */
531           value = v_t;
532           v_t  += bs;
533           if (addv == ADD_VALUES) {
534             for (ii=0; ii<bs; ii++,value+=stepval) {
535               for (jj=ii; jj<bs2; jj+=bs) {
536                 baij_a[jj]  += *value++;
537               }
538             }
539           } else {
540             for (ii=0; ii<bs; ii++,value+=stepval) {
541               for (jj=ii; jj<bs2; jj+=bs) {
542                 baij_a[jj]  = *value++;
543               }
544             }
545           }
546         } else {
547           value = v + j*(stepval+bs)*bs + i*bs;
548           if (addv == ADD_VALUES) {
549             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
550               for (jj=0; jj<bs; jj++) {
551                 baij_a[jj]  += *value++;
552               }
553             }
554           } else {
555             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
556               for (jj=0; jj<bs; jj++) {
557                 baij_a[jj]  = *value++;
558               }
559             }
560           }
561         }
562       }
563     } else {
564       if (!baij->donotstash) {
565         if (roworiented) {
566           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
567         } else {
568           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
569         }
570       }
571     }
572   }
573 #if defined(PETSC_USE_DEBUG)
574   baij->ht_total_ct = total_ct;
575   baij->ht_insert_ct = insert_ct;
576 #endif
577   PetscFunctionReturn(0);
578 }
579 
580 #undef __FUNCT__
581 #define __FUNCT__ "MatGetValues_MPIBAIJ"
582 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
583 {
584   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
585   PetscErrorCode ierr;
586   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
587   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
588 
589   PetscFunctionBegin;
590   for (i=0; i<m; i++) {
591     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
592     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
593     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
594       row = idxm[i] - bsrstart;
595       for (j=0; j<n; j++) {
596         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
597         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
598         if (idxn[j] >= bscstart && idxn[j] < bscend){
599           col = idxn[j] - bscstart;
600           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
601         } else {
602           if (!baij->colmap) {
603             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
604           }
605 #if defined (PETSC_USE_CTABLE)
606           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
607           data --;
608 #else
609           data = baij->colmap[idxn[j]/bs]-1;
610 #endif
611           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
612           else {
613             col  = data + idxn[j]%bs;
614             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
615           }
616         }
617       }
618     } else {
619       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
620     }
621   }
622  PetscFunctionReturn(0);
623 }
624 
625 #undef __FUNCT__
626 #define __FUNCT__ "MatNorm_MPIBAIJ"
627 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
628 {
629   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
630   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
631   PetscErrorCode ierr;
632   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
633   PetscReal      sum = 0.0;
634   MatScalar      *v;
635 
636   PetscFunctionBegin;
637   if (baij->size == 1) {
638     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
639   } else {
640     if (type == NORM_FROBENIUS) {
641       v = amat->a;
642       nz = amat->nz*bs2;
643       for (i=0; i<nz; i++) {
644 #if defined(PETSC_USE_COMPLEX)
645         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
646 #else
647         sum += (*v)*(*v); v++;
648 #endif
649       }
650       v = bmat->a;
651       nz = bmat->nz*bs2;
652       for (i=0; i<nz; i++) {
653 #if defined(PETSC_USE_COMPLEX)
654         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
655 #else
656         sum += (*v)*(*v); v++;
657 #endif
658       }
659       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
660       *nrm = sqrt(*nrm);
661     } else if (type == NORM_1) { /* max column sum */
662       PetscReal *tmp,*tmp2;
663       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
664       ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
665       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
666       v = amat->a; jj = amat->j;
667       for (i=0; i<amat->nz; i++) {
668         for (j=0; j<bs; j++){
669           col = bs*(cstart + *jj) + j; /* column index */
670           for (row=0; row<bs; row++){
671             tmp[col] += PetscAbsScalar(*v);  v++;
672           }
673         }
674         jj++;
675       }
676       v = bmat->a; jj = bmat->j;
677       for (i=0; i<bmat->nz; i++) {
678         for (j=0; j<bs; j++){
679           col = bs*garray[*jj] + j;
680           for (row=0; row<bs; row++){
681             tmp[col] += PetscAbsScalar(*v); v++;
682           }
683         }
684         jj++;
685       }
686       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
687       *nrm = 0.0;
688       for (j=0; j<mat->cmap->N; j++) {
689         if (tmp2[j] > *nrm) *nrm = tmp2[j];
690       }
691       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
692     } else if (type == NORM_INFINITY) { /* max row sum */
693       PetscReal *sums;
694       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr);
695       sum = 0.0;
696       for (j=0; j<amat->mbs; j++) {
697         for (row=0; row<bs; row++) sums[row] = 0.0;
698         v = amat->a + bs2*amat->i[j];
699         nz = amat->i[j+1]-amat->i[j];
700         for (i=0; i<nz; i++) {
701           for (col=0; col<bs; col++){
702             for (row=0; row<bs; row++){
703               sums[row] += PetscAbsScalar(*v); v++;
704             }
705           }
706         }
707         v = bmat->a + bs2*bmat->i[j];
708         nz = bmat->i[j+1]-bmat->i[j];
709         for (i=0; i<nz; i++) {
710           for (col=0; col<bs; col++){
711             for (row=0; row<bs; row++){
712               sums[row] += PetscAbsScalar(*v); v++;
713             }
714           }
715         }
716         for (row=0; row<bs; row++){
717           if (sums[row] > sum) sum = sums[row];
718         }
719       }
720       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
721       ierr = PetscFree(sums);CHKERRQ(ierr);
722     } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet");
723   }
724   PetscFunctionReturn(0);
725 }
726 
727 /*
728   Creates the hash table, and sets the table
729   This table is created only once.
730   If new entried need to be added to the matrix
731   then the hash table has to be destroyed and
732   recreated.
733 */
734 #undef __FUNCT__
735 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
736 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
737 {
738   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
739   Mat            A = baij->A,B=baij->B;
740   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
741   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
742   PetscErrorCode ierr;
743   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
744   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
745   PetscInt       *HT,key;
746   MatScalar      **HD;
747   PetscReal      tmp;
748 #if defined(PETSC_USE_INFO)
749   PetscInt       ct=0,max=0;
750 #endif
751 
752   PetscFunctionBegin;
753   if (baij->ht) PetscFunctionReturn(0);
754 
755   baij->ht_size = (PetscInt)(factor*nz);
756   ht_size       = baij->ht_size;
757 
758   /* Allocate Memory for Hash Table */
759   ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr);
760   ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr);
761   ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr);
762   HD   = baij->hd;
763   HT   = baij->ht;
764 
765   /* Loop Over A */
766   for (i=0; i<a->mbs; i++) {
767     for (j=ai[i]; j<ai[i+1]; j++) {
768       row = i+rstart;
769       col = aj[j]+cstart;
770 
771       key = row*Nbs + col + 1;
772       h1  = HASH(ht_size,key,tmp);
773       for (k=0; k<ht_size; k++){
774         if (!HT[(h1+k)%ht_size]) {
775           HT[(h1+k)%ht_size] = key;
776           HD[(h1+k)%ht_size] = a->a + j*bs2;
777           break;
778 #if defined(PETSC_USE_INFO)
779         } else {
780           ct++;
781 #endif
782         }
783       }
784 #if defined(PETSC_USE_INFO)
785       if (k> max) max = k;
786 #endif
787     }
788   }
789   /* Loop Over B */
790   for (i=0; i<b->mbs; i++) {
791     for (j=bi[i]; j<bi[i+1]; j++) {
792       row = i+rstart;
793       col = garray[bj[j]];
794       key = row*Nbs + col + 1;
795       h1  = HASH(ht_size,key,tmp);
796       for (k=0; k<ht_size; k++){
797         if (!HT[(h1+k)%ht_size]) {
798           HT[(h1+k)%ht_size] = key;
799           HD[(h1+k)%ht_size] = b->a + j*bs2;
800           break;
801 #if defined(PETSC_USE_INFO)
802         } else {
803           ct++;
804 #endif
805         }
806       }
807 #if defined(PETSC_USE_INFO)
808       if (k> max) max = k;
809 #endif
810     }
811   }
812 
813   /* Print Summary */
814 #if defined(PETSC_USE_INFO)
815   for (i=0,j=0; i<ht_size; i++) {
816     if (HT[i]) {j++;}
817   }
818   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
819 #endif
820   PetscFunctionReturn(0);
821 }
822 
823 #undef __FUNCT__
824 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
825 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
826 {
827   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
828   PetscErrorCode ierr;
829   PetscInt       nstash,reallocs;
830   InsertMode     addv;
831 
832   PetscFunctionBegin;
833   if (baij->donotstash || mat->nooffprocentries) {
834     PetscFunctionReturn(0);
835   }
836 
837   /* make sure all processors are either in INSERTMODE or ADDMODE */
838   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
839   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
840   mat->insertmode = addv; /* in case this processor had no cache */
841 
842   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
843   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
844   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
845   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
846   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
847   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
848   PetscFunctionReturn(0);
849 }
850 
851 #undef __FUNCT__
852 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
853 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
854 {
855   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
856   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
857   PetscErrorCode ierr;
858   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
859   PetscInt       *row,*col;
860   PetscTruth     r1,r2,r3,other_disassembled;
861   MatScalar      *val;
862   InsertMode     addv = mat->insertmode;
863   PetscMPIInt    n;
864 
865   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
866   PetscFunctionBegin;
867   if (!baij->donotstash && !mat->nooffprocentries) {
868     while (1) {
869       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
870       if (!flg) break;
871 
872       for (i=0; i<n;) {
873         /* Now identify the consecutive vals belonging to the same row */
874         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
875         if (j < n) ncols = j-i;
876         else       ncols = n-i;
877         /* Now assemble all these values with a single function call */
878         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
879         i = j;
880       }
881     }
882     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
883     /* Now process the block-stash. Since the values are stashed column-oriented,
884        set the roworiented flag to column oriented, and after MatSetValues()
885        restore the original flags */
886     r1 = baij->roworiented;
887     r2 = a->roworiented;
888     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
889     baij->roworiented = PETSC_FALSE;
890     a->roworiented    = PETSC_FALSE;
891     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
892     while (1) {
893       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
894       if (!flg) break;
895 
896       for (i=0; i<n;) {
897         /* Now identify the consecutive vals belonging to the same row */
898         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
899         if (j < n) ncols = j-i;
900         else       ncols = n-i;
901         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
902         i = j;
903       }
904     }
905     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
906     baij->roworiented = r1;
907     a->roworiented    = r2;
908     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
909   }
910 
911   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
912   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
913 
914   /* determine if any processor has disassembled, if so we must
915      also disassemble ourselfs, in order that we may reassemble. */
916   /*
917      if nonzero structure of submatrix B cannot change then we know that
918      no processor disassembled thus we can skip this stuff
919   */
920   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
921     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
922     if (mat->was_assembled && !other_disassembled) {
923       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
924     }
925   }
926 
927   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
928     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
929   }
930   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
931   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
932   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
933 
934 #if defined(PETSC_USE_INFO)
935   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
936     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
937     baij->ht_total_ct  = 0;
938     baij->ht_insert_ct = 0;
939   }
940 #endif
941   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
942     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
943     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
944     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
945   }
946 
947   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
948   baij->rowvalues = 0;
949   PetscFunctionReturn(0);
950 }
951 
952 #undef __FUNCT__
953 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
954 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
955 {
956   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
957   PetscErrorCode    ierr;
958   PetscMPIInt       size = baij->size,rank = baij->rank;
959   PetscInt          bs = mat->rmap->bs;
960   PetscTruth        iascii,isdraw;
961   PetscViewer       sviewer;
962   PetscViewerFormat format;
963 
964   PetscFunctionBegin;
965   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
966   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
967   if (iascii) {
968     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
969     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
970       MatInfo info;
971       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
972       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
973       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
974              rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
975       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
976       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
977       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
978       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
979       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
980       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
981       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
982       PetscFunctionReturn(0);
983     } else if (format == PETSC_VIEWER_ASCII_INFO) {
984       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
985       PetscFunctionReturn(0);
986     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
987       PetscFunctionReturn(0);
988     }
989   }
990 
991   if (isdraw) {
992     PetscDraw       draw;
993     PetscTruth isnull;
994     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
995     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
996   }
997 
998   if (size == 1) {
999     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1000     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1001   } else {
1002     /* assemble the entire matrix onto first processor. */
1003     Mat         A;
1004     Mat_SeqBAIJ *Aloc;
1005     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1006     MatScalar   *a;
1007 
1008     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1009     /* Perhaps this should be the type of mat? */
1010     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1011     if (!rank) {
1012       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1013     } else {
1014       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1015     }
1016     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1017     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1018     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1019 
1020     /* copy over the A part */
1021     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1022     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1023     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1024 
1025     for (i=0; i<mbs; i++) {
1026       rvals[0] = bs*(baij->rstartbs + i);
1027       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1028       for (j=ai[i]; j<ai[i+1]; j++) {
1029         col = (baij->cstartbs+aj[j])*bs;
1030         for (k=0; k<bs; k++) {
1031           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1032           col++; a += bs;
1033         }
1034       }
1035     }
1036     /* copy over the B part */
1037     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1038     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1039     for (i=0; i<mbs; i++) {
1040       rvals[0] = bs*(baij->rstartbs + i);
1041       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1042       for (j=ai[i]; j<ai[i+1]; j++) {
1043         col = baij->garray[aj[j]]*bs;
1044         for (k=0; k<bs; k++) {
1045           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1046           col++; a += bs;
1047         }
1048       }
1049     }
1050     ierr = PetscFree(rvals);CHKERRQ(ierr);
1051     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1052     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1053     /*
1054        Everyone has to call to draw the matrix since the graphics waits are
1055        synchronized across all processors that share the PetscDraw object
1056     */
1057     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1058     if (!rank) {
1059       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1060       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1061     }
1062     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1063     ierr = MatDestroy(A);CHKERRQ(ierr);
1064   }
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 #undef __FUNCT__
1069 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1070 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1071 {
1072   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1073   Mat_SeqBAIJ*   A = (Mat_SeqBAIJ*)a->A->data;
1074   Mat_SeqBAIJ*   B = (Mat_SeqBAIJ*)a->B->data;
1075   PetscErrorCode ierr;
1076   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,count,j,k,bs2=a->bs2,header[4],nz,rlen;
1077   PetscInt       *range,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1078   int            fd;
1079   PetscScalar    *column_values;
1080   FILE           *file;
1081   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1082 
1083   PetscFunctionBegin;
1084   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1085   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1086   nz   = bs2*(A->nz + B->nz);
1087   rlen = mat->rmap->n;
1088   if (!rank) {
1089     header[0] = MAT_FILE_CLASSID;
1090     header[1] = mat->rmap->N;
1091     header[2] = mat->cmap->N;
1092     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1093     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1094     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1095     /* get largest number of rows any processor has */
1096     range = mat->rmap->range;
1097     for (i=1; i<size; i++) {
1098       rlen = PetscMax(rlen,range[i+1] - range[i]);
1099     }
1100   } else {
1101     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1102   }
1103 
1104   ierr  = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr);
1105   /* compute lengths of each row  */
1106   count = 0;
1107   for (i=0; i<a->mbs; i++) {
1108     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1109   }
1110   /* store the row lengths to the file */
1111   if (!rank) {
1112     MPI_Status status;
1113     ierr  = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr);
1114     rlen  = (range[1] - range[0])/bs;
1115     for (i=0; i<rlen; i++) {
1116       for (j=0; j<bs; j++) {
1117         row_lens[i*bs+j] = bs*crow_lens[i];
1118       }
1119     }
1120     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1121     for (i=1; i<size; i++) {
1122       rlen = (range[i+1] - range[i])/bs;
1123       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1124       for (k=0; k<rlen; k++) {
1125 	for (j=0; j<bs; j++) {
1126 	  row_lens[k*bs+j] = bs*crow_lens[k];
1127 	}
1128       }
1129       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1130     }
1131     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1132   } else {
1133     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1134   }
1135   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1136 
1137   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1138      information needed to make it for each row from a block row. This does require more communication but still not more than
1139      the communication needed for the nonzero values  */
1140   nzmax = nz; /*  space a largest processor needs */
1141   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1142   ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1143   cnt  = 0;
1144   for (i=0; i<a->mbs; i++) {
1145     pcnt = cnt;
1146     for (j=B->i[i]; j<B->i[i+1]; j++) {
1147       if ( (col = garray[B->j[j]]) > cstart) break;
1148       for (l=0; l<bs; l++) {
1149 	column_indices[cnt++] = bs*col+l;
1150       }
1151     }
1152     for (k=A->i[i]; k<A->i[i+1]; k++) {
1153       for (l=0; l<bs; l++) {
1154         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1155       }
1156     }
1157     for (; j<B->i[i+1]; j++) {
1158       for (l=0; l<bs; l++) {
1159         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1160       }
1161     }
1162     len = cnt - pcnt;
1163     for (k=1; k<bs; k++) {
1164       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1165       cnt += len;
1166     }
1167   }
1168   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1169 
1170   /* store the columns to the file */
1171   if (!rank) {
1172     MPI_Status status;
1173     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1174     for (i=1; i<size; i++) {
1175       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1176       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1177       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1178     }
1179   } else {
1180     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1181     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1182   }
1183   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1184 
1185   /* load up the numerical values */
1186   ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1187   cnt = 0;
1188   for (i=0; i<a->mbs; i++) {
1189     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1190     for (j=B->i[i]; j<B->i[i+1]; j++) {
1191       if ( garray[B->j[j]] > cstart) break;
1192       for (l=0; l<bs; l++) {
1193         for (ll=0; ll<bs; ll++) {
1194 	  column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1195         }
1196       }
1197       cnt += bs;
1198     }
1199     for (k=A->i[i]; k<A->i[i+1]; k++) {
1200       for (l=0; l<bs; l++) {
1201         for (ll=0; ll<bs; ll++) {
1202           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1203         }
1204       }
1205       cnt += bs;
1206     }
1207     for (; j<B->i[i+1]; j++) {
1208       for (l=0; l<bs; l++) {
1209         for (ll=0; ll<bs; ll++) {
1210 	  column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1211         }
1212       }
1213       cnt += bs;
1214     }
1215     cnt += (bs-1)*rlen;
1216   }
1217   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1218 
1219   /* store the column values to the file */
1220   if (!rank) {
1221     MPI_Status status;
1222     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1223     for (i=1; i<size; i++) {
1224       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1225       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1226       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1227     }
1228   } else {
1229     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1230     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1231   }
1232   ierr = PetscFree(column_values);CHKERRQ(ierr);
1233 
1234   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1235   if (file) {
1236     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1237   }
1238   PetscFunctionReturn(0);
1239 }
1240 
1241 #undef __FUNCT__
1242 #define __FUNCT__ "MatView_MPIBAIJ"
1243 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1244 {
1245   PetscErrorCode ierr;
1246   PetscTruth     iascii,isdraw,issocket,isbinary;
1247 
1248   PetscFunctionBegin;
1249   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1250   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1251   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1252   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1253   if (iascii || isdraw || issocket) {
1254     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1255   } else if (isbinary) {
1256     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1257   } else {
1258     SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1259   }
1260   PetscFunctionReturn(0);
1261 }
1262 
1263 #undef __FUNCT__
1264 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1265 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1266 {
1267   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1268   PetscErrorCode ierr;
1269 
1270   PetscFunctionBegin;
1271 #if defined(PETSC_USE_LOG)
1272   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1273 #endif
1274   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1275   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1276   if (baij->A){ierr = MatDestroy(baij->A);CHKERRQ(ierr);}
1277   if (baij->B){ierr = MatDestroy(baij->B);CHKERRQ(ierr);}
1278 #if defined (PETSC_USE_CTABLE)
1279   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
1280 #else
1281   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1282 #endif
1283   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1284   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1285   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1286   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1287   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1288   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1289   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1290   ierr = PetscFree(baij);CHKERRQ(ierr);
1291 
1292   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1293   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1294   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1295   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1296   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1297   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1298   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1300   PetscFunctionReturn(0);
1301 }
1302 
1303 #undef __FUNCT__
1304 #define __FUNCT__ "MatMult_MPIBAIJ"
1305 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1306 {
1307   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1308   PetscErrorCode ierr;
1309   PetscInt       nt;
1310 
1311   PetscFunctionBegin;
1312   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1313   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1314   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1315   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1316   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1317   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1318   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1319   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1320   PetscFunctionReturn(0);
1321 }
1322 
1323 #undef __FUNCT__
1324 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1325 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1326 {
1327   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1328   PetscErrorCode ierr;
1329 
1330   PetscFunctionBegin;
1331   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1332   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1333   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1334   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1335   PetscFunctionReturn(0);
1336 }
1337 
1338 #undef __FUNCT__
1339 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1340 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1341 {
1342   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1343   PetscErrorCode ierr;
1344   PetscTruth     merged;
1345 
1346   PetscFunctionBegin;
1347   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1348   /* do nondiagonal part */
1349   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1350   if (!merged) {
1351     /* send it on its way */
1352     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1353     /* do local part */
1354     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1355     /* receive remote parts: note this assumes the values are not actually */
1356     /* inserted in yy until the next line */
1357     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1358   } else {
1359     /* do local part */
1360     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1361     /* send it on its way */
1362     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1363     /* values actually were received in the Begin() but we need to call this nop */
1364     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1365   }
1366   PetscFunctionReturn(0);
1367 }
1368 
1369 #undef __FUNCT__
1370 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1371 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1372 {
1373   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1374   PetscErrorCode ierr;
1375 
1376   PetscFunctionBegin;
1377   /* do nondiagonal part */
1378   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1379   /* send it on its way */
1380   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1381   /* do local part */
1382   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1383   /* receive remote parts: note this assumes the values are not actually */
1384   /* inserted in yy until the next line, which is true for my implementation*/
1385   /* but is not perhaps always true. */
1386   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1387   PetscFunctionReturn(0);
1388 }
1389 
1390 /*
1391   This only works correctly for square matrices where the subblock A->A is the
1392    diagonal block
1393 */
1394 #undef __FUNCT__
1395 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1396 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1397 {
1398   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1399   PetscErrorCode ierr;
1400 
1401   PetscFunctionBegin;
1402   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1403   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1404   PetscFunctionReturn(0);
1405 }
1406 
1407 #undef __FUNCT__
1408 #define __FUNCT__ "MatScale_MPIBAIJ"
1409 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1410 {
1411   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1412   PetscErrorCode ierr;
1413 
1414   PetscFunctionBegin;
1415   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1416   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1417   PetscFunctionReturn(0);
1418 }
1419 
1420 #undef __FUNCT__
1421 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1422 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1423 {
1424   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1425   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1426   PetscErrorCode ierr;
1427   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1428   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1429   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1430 
1431   PetscFunctionBegin;
1432   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1433   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1434   mat->getrowactive = PETSC_TRUE;
1435 
1436   if (!mat->rowvalues && (idx || v)) {
1437     /*
1438         allocate enough space to hold information from the longest row.
1439     */
1440     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1441     PetscInt     max = 1,mbs = mat->mbs,tmp;
1442     for (i=0; i<mbs; i++) {
1443       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1444       if (max < tmp) { max = tmp; }
1445     }
1446     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1447   }
1448   lrow = row - brstart;
1449 
1450   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1451   if (!v)   {pvA = 0; pvB = 0;}
1452   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1453   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1454   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1455   nztot = nzA + nzB;
1456 
1457   cmap  = mat->garray;
1458   if (v  || idx) {
1459     if (nztot) {
1460       /* Sort by increasing column numbers, assuming A and B already sorted */
1461       PetscInt imark = -1;
1462       if (v) {
1463         *v = v_p = mat->rowvalues;
1464         for (i=0; i<nzB; i++) {
1465           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1466           else break;
1467         }
1468         imark = i;
1469         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1470         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1471       }
1472       if (idx) {
1473         *idx = idx_p = mat->rowindices;
1474         if (imark > -1) {
1475           for (i=0; i<imark; i++) {
1476             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1477           }
1478         } else {
1479           for (i=0; i<nzB; i++) {
1480             if (cmap[cworkB[i]/bs] < cstart)
1481               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1482             else break;
1483           }
1484           imark = i;
1485         }
1486         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1487         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1488       }
1489     } else {
1490       if (idx) *idx = 0;
1491       if (v)   *v   = 0;
1492     }
1493   }
1494   *nz = nztot;
1495   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1496   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1497   PetscFunctionReturn(0);
1498 }
1499 
1500 #undef __FUNCT__
1501 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1502 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1503 {
1504   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1505 
1506   PetscFunctionBegin;
1507   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1508   baij->getrowactive = PETSC_FALSE;
1509   PetscFunctionReturn(0);
1510 }
1511 
1512 #undef __FUNCT__
1513 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1514 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1515 {
1516   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1517   PetscErrorCode ierr;
1518 
1519   PetscFunctionBegin;
1520   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1521   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1522   PetscFunctionReturn(0);
1523 }
1524 
1525 #undef __FUNCT__
1526 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1527 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1528 {
1529   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1530   Mat            A = a->A,B = a->B;
1531   PetscErrorCode ierr;
1532   PetscReal      isend[5],irecv[5];
1533 
1534   PetscFunctionBegin;
1535   info->block_size     = (PetscReal)matin->rmap->bs;
1536   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1537   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1538   isend[3] = info->memory;  isend[4] = info->mallocs;
1539   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1540   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1541   isend[3] += info->memory;  isend[4] += info->mallocs;
1542   if (flag == MAT_LOCAL) {
1543     info->nz_used      = isend[0];
1544     info->nz_allocated = isend[1];
1545     info->nz_unneeded  = isend[2];
1546     info->memory       = isend[3];
1547     info->mallocs      = isend[4];
1548   } else if (flag == MAT_GLOBAL_MAX) {
1549     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1550     info->nz_used      = irecv[0];
1551     info->nz_allocated = irecv[1];
1552     info->nz_unneeded  = irecv[2];
1553     info->memory       = irecv[3];
1554     info->mallocs      = irecv[4];
1555   } else if (flag == MAT_GLOBAL_SUM) {
1556     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1557     info->nz_used      = irecv[0];
1558     info->nz_allocated = irecv[1];
1559     info->nz_unneeded  = irecv[2];
1560     info->memory       = irecv[3];
1561     info->mallocs      = irecv[4];
1562   } else {
1563     SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1564   }
1565   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1566   info->fill_ratio_needed = 0;
1567   info->factor_mallocs    = 0;
1568   PetscFunctionReturn(0);
1569 }
1570 
1571 #undef __FUNCT__
1572 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1573 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscTruth flg)
1574 {
1575   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1576   PetscErrorCode ierr;
1577 
1578   PetscFunctionBegin;
1579   switch (op) {
1580   case MAT_NEW_NONZERO_LOCATIONS:
1581   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1582   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1583   case MAT_KEEP_NONZERO_PATTERN:
1584   case MAT_NEW_NONZERO_LOCATION_ERR:
1585     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1586     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1587     break;
1588   case MAT_ROW_ORIENTED:
1589     a->roworiented = flg;
1590     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1591     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1592     break;
1593   case MAT_NEW_DIAGONALS:
1594     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1595     break;
1596   case MAT_IGNORE_OFF_PROC_ENTRIES:
1597     a->donotstash = flg;
1598     break;
1599   case MAT_USE_HASH_TABLE:
1600     a->ht_flag = flg;
1601     break;
1602   case MAT_SYMMETRIC:
1603   case MAT_STRUCTURALLY_SYMMETRIC:
1604   case MAT_HERMITIAN:
1605   case MAT_SYMMETRY_ETERNAL:
1606     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1607     break;
1608   default:
1609     SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op);
1610   }
1611   PetscFunctionReturn(0);
1612 }
1613 
1614 #undef __FUNCT__
1615 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1616 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1617 {
1618   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1619   Mat_SeqBAIJ    *Aloc;
1620   Mat            B;
1621   PetscErrorCode ierr;
1622   PetscInt       M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1623   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1624   MatScalar      *a;
1625 
1626   PetscFunctionBegin;
1627   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1628   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1629     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1630     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1631     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1632     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1633   } else {
1634     B = *matout;
1635   }
1636 
1637   /* copy over the A part */
1638   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1639   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1640   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1641 
1642   for (i=0; i<mbs; i++) {
1643     rvals[0] = bs*(baij->rstartbs + i);
1644     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1645     for (j=ai[i]; j<ai[i+1]; j++) {
1646       col = (baij->cstartbs+aj[j])*bs;
1647       for (k=0; k<bs; k++) {
1648         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1649         col++; a += bs;
1650       }
1651     }
1652   }
1653   /* copy over the B part */
1654   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1655   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1656   for (i=0; i<mbs; i++) {
1657     rvals[0] = bs*(baij->rstartbs + i);
1658     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1659     for (j=ai[i]; j<ai[i+1]; j++) {
1660       col = baij->garray[aj[j]]*bs;
1661       for (k=0; k<bs; k++) {
1662         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1663         col++; a += bs;
1664       }
1665     }
1666   }
1667   ierr = PetscFree(rvals);CHKERRQ(ierr);
1668   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1669   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1670 
1671   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1672     *matout = B;
1673   } else {
1674     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1675   }
1676   PetscFunctionReturn(0);
1677 }
1678 
1679 #undef __FUNCT__
1680 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1681 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1682 {
1683   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1684   Mat            a = baij->A,b = baij->B;
1685   PetscErrorCode ierr;
1686   PetscInt       s1,s2,s3;
1687 
1688   PetscFunctionBegin;
1689   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1690   if (rr) {
1691     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1692     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1693     /* Overlap communication with computation. */
1694     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1695   }
1696   if (ll) {
1697     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1698     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1699     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1700   }
1701   /* scale  the diagonal block */
1702   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1703 
1704   if (rr) {
1705     /* Do a scatter end and then right scale the off-diagonal block */
1706     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1707     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1708   }
1709 
1710   PetscFunctionReturn(0);
1711 }
1712 
1713 #undef __FUNCT__
1714 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1715 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
1716 {
1717   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1718   PetscErrorCode ierr;
1719   PetscMPIInt    imdex,size = l->size,n,rank = l->rank;
1720   PetscInt       i,*owners = A->rmap->range;
1721   PetscInt       *nprocs,j,idx,nsends,row;
1722   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
1723   PetscInt       *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1724   PetscInt       *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1725   MPI_Comm       comm = ((PetscObject)A)->comm;
1726   MPI_Request    *send_waits,*recv_waits;
1727   MPI_Status     recv_status,*send_status;
1728 #if defined(PETSC_DEBUG)
1729   PetscTruth     found = PETSC_FALSE;
1730 #endif
1731 
1732   PetscFunctionBegin;
1733   /*  first count number of contributors to each processor */
1734   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1735   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1736   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1737   j = 0;
1738   for (i=0; i<N; i++) {
1739     if (lastidx > (idx = rows[i])) j = 0;
1740     lastidx = idx;
1741     for (; j<size; j++) {
1742       if (idx >= owners[j] && idx < owners[j+1]) {
1743         nprocs[2*j]++;
1744         nprocs[2*j+1] = 1;
1745         owner[i] = j;
1746 #if defined(PETSC_DEBUG)
1747         found = PETSC_TRUE;
1748 #endif
1749         break;
1750       }
1751     }
1752 #if defined(PETSC_DEBUG)
1753     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1754     found = PETSC_FALSE;
1755 #endif
1756   }
1757   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1758 
1759   if (A->nooffproczerorows) {
1760     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
1761     nrecvs = nsends;
1762     nmax   = N;
1763   } else {
1764     /* inform other processors of number of messages and max length*/
1765     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1766   }
1767 
1768   /* post receives:   */
1769   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1770   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1771   for (i=0; i<nrecvs; i++) {
1772     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1773   }
1774 
1775   /* do sends:
1776      1) starts[i] gives the starting index in svalues for stuff going to
1777      the ith processor
1778   */
1779   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1780   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1781   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1782   starts[0]  = 0;
1783   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1784   for (i=0; i<N; i++) {
1785     svalues[starts[owner[i]]++] = rows[i];
1786   }
1787 
1788   starts[0] = 0;
1789   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1790   count = 0;
1791   for (i=0; i<size; i++) {
1792     if (nprocs[2*i+1]) {
1793       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1794     }
1795   }
1796   ierr = PetscFree(starts);CHKERRQ(ierr);
1797 
1798   base = owners[rank];
1799 
1800   /*  wait on receives */
1801   ierr   = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr);
1802   count  = nrecvs;
1803   slen = 0;
1804   while (count) {
1805     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1806     /* unpack receives into our local space */
1807     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1808     source[imdex]  = recv_status.MPI_SOURCE;
1809     lens[imdex]    = n;
1810     slen          += n;
1811     count--;
1812   }
1813   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1814 
1815   /* move the data into the send scatter */
1816   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1817   count = 0;
1818   for (i=0; i<nrecvs; i++) {
1819     values = rvalues + i*nmax;
1820     for (j=0; j<lens[i]; j++) {
1821       lrows[count++] = values[j] - base;
1822     }
1823   }
1824   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1825   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1826   ierr = PetscFree(owner);CHKERRQ(ierr);
1827   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1828 
1829   /* actually zap the local rows */
1830   /*
1831         Zero the required rows. If the "diagonal block" of the matrix
1832      is square and the user wishes to set the diagonal we use separate
1833      code so that MatSetValues() is not called for each diagonal allocating
1834      new memory, thus calling lots of mallocs and slowing things down.
1835 
1836   */
1837   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1838   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr);
1839   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1840     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr);
1841   } else if (diag != 0.0) {
1842     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1843     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1844        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1845     for (i=0; i<slen; i++) {
1846       row  = lrows[i] + rstart_bs;
1847       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1848     }
1849     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1850     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1851   } else {
1852     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1853   }
1854 
1855   ierr = PetscFree(lrows);CHKERRQ(ierr);
1856 
1857   /* wait on sends */
1858   if (nsends) {
1859     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1860     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1861     ierr = PetscFree(send_status);CHKERRQ(ierr);
1862   }
1863   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1864   ierr = PetscFree(svalues);CHKERRQ(ierr);
1865 
1866   PetscFunctionReturn(0);
1867 }
1868 
1869 #undef __FUNCT__
1870 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1871 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1872 {
1873   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1874   PetscErrorCode ierr;
1875 
1876   PetscFunctionBegin;
1877   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1878   PetscFunctionReturn(0);
1879 }
1880 
1881 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1882 
1883 #undef __FUNCT__
1884 #define __FUNCT__ "MatEqual_MPIBAIJ"
1885 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1886 {
1887   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1888   Mat            a,b,c,d;
1889   PetscTruth     flg;
1890   PetscErrorCode ierr;
1891 
1892   PetscFunctionBegin;
1893   a = matA->A; b = matA->B;
1894   c = matB->A; d = matB->B;
1895 
1896   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1897   if (flg) {
1898     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1899   }
1900   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1901   PetscFunctionReturn(0);
1902 }
1903 
1904 #undef __FUNCT__
1905 #define __FUNCT__ "MatCopy_MPIBAIJ"
1906 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1907 {
1908   PetscErrorCode ierr;
1909   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1910   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1911 
1912   PetscFunctionBegin;
1913   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1914   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1915     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1916   } else {
1917     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1918     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1919   }
1920   PetscFunctionReturn(0);
1921 }
1922 
1923 #undef __FUNCT__
1924 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1925 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1926 {
1927   PetscErrorCode ierr;
1928 
1929   PetscFunctionBegin;
1930   ierr =  MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1931   PetscFunctionReturn(0);
1932 }
1933 
1934 #undef __FUNCT__
1935 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1936 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1937 {
1938   PetscErrorCode ierr;
1939   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1940   PetscBLASInt   bnz,one=1;
1941   Mat_SeqBAIJ    *x,*y;
1942 
1943   PetscFunctionBegin;
1944   if (str == SAME_NONZERO_PATTERN) {
1945     PetscScalar alpha = a;
1946     x = (Mat_SeqBAIJ *)xx->A->data;
1947     y = (Mat_SeqBAIJ *)yy->A->data;
1948     bnz = PetscBLASIntCast(x->nz);
1949     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1950     x = (Mat_SeqBAIJ *)xx->B->data;
1951     y = (Mat_SeqBAIJ *)yy->B->data;
1952     bnz = PetscBLASIntCast(x->nz);
1953     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1954   } else {
1955     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1956   }
1957   PetscFunctionReturn(0);
1958 }
1959 
1960 #undef __FUNCT__
1961 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ"
1962 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs)
1963 {
1964   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1965   PetscInt rbs,cbs;
1966   PetscErrorCode ierr;
1967 
1968   PetscFunctionBegin;
1969   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1970   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1971   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
1972   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
1973   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
1974   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
1975   PetscFunctionReturn(0);
1976 }
1977 
1978 #undef __FUNCT__
1979 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1980 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1981 {
1982   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1983   PetscErrorCode ierr;
1984 
1985   PetscFunctionBegin;
1986   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1987   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1988   PetscFunctionReturn(0);
1989 }
1990 
1991 #undef __FUNCT__
1992 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
1993 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1994 {
1995   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1996   PetscErrorCode ierr;
1997 
1998   PetscFunctionBegin;
1999   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2000   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2001   PetscFunctionReturn(0);
2002 }
2003 
2004 #undef __FUNCT__
2005 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
2006 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2007 {
2008   PetscErrorCode ierr;
2009   IS             iscol_local;
2010   PetscInt       csize;
2011 
2012   PetscFunctionBegin;
2013   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2014   if (call == MAT_REUSE_MATRIX) {
2015     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2016     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2017   } else {
2018     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2019   }
2020   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2021   if (call == MAT_INITIAL_MATRIX) {
2022     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2023     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
2024   }
2025   PetscFunctionReturn(0);
2026 }
2027 
2028 #undef __FUNCT__
2029 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
2030 /*
2031     Not great since it makes two copies of the submatrix, first an SeqBAIJ
2032   in local and then by concatenating the local matrices the end result.
2033   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
2034 */
2035 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2036 {
2037   PetscErrorCode ierr;
2038   PetscMPIInt    rank,size;
2039   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2040   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2041   Mat            *local,M,Mreuse;
2042   MatScalar      *vwork,*aa;
2043   MPI_Comm       comm = ((PetscObject)mat)->comm;
2044   Mat_SeqBAIJ    *aij;
2045 
2046 
2047   PetscFunctionBegin;
2048   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2049   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2050 
2051   if (call ==  MAT_REUSE_MATRIX) {
2052     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2053     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2054     local = &Mreuse;
2055     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2056   } else {
2057     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2058     Mreuse = *local;
2059     ierr   = PetscFree(local);CHKERRQ(ierr);
2060   }
2061 
2062   /*
2063       m - number of local rows
2064       n - number of columns (same on all processors)
2065       rstart - first row in new global matrix generated
2066   */
2067   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2068   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2069   m    = m/bs;
2070   n    = n/bs;
2071 
2072   if (call == MAT_INITIAL_MATRIX) {
2073     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2074     ii  = aij->i;
2075     jj  = aij->j;
2076 
2077     /*
2078         Determine the number of non-zeros in the diagonal and off-diagonal
2079         portions of the matrix in order to do correct preallocation
2080     */
2081 
2082     /* first get start and end of "diagonal" columns */
2083     if (csize == PETSC_DECIDE) {
2084       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2085       if (mglobal == n*bs) { /* square matrix */
2086 	nlocal = m;
2087       } else {
2088         nlocal = n/size + ((n % size) > rank);
2089       }
2090     } else {
2091       nlocal = csize/bs;
2092     }
2093     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2094     rstart = rend - nlocal;
2095     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2096 
2097     /* next, compute all the lengths */
2098     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2099     olens = dlens + m;
2100     for (i=0; i<m; i++) {
2101       jend = ii[i+1] - ii[i];
2102       olen = 0;
2103       dlen = 0;
2104       for (j=0; j<jend; j++) {
2105         if (*jj < rstart || *jj >= rend) olen++;
2106         else dlen++;
2107         jj++;
2108       }
2109       olens[i] = olen;
2110       dlens[i] = dlen;
2111     }
2112     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2113     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2114     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2115     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2116     ierr = PetscFree(dlens);CHKERRQ(ierr);
2117   } else {
2118     PetscInt ml,nl;
2119 
2120     M = *newmat;
2121     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2122     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2123     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2124     /*
2125          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2126        rather than the slower MatSetValues().
2127     */
2128     M->was_assembled = PETSC_TRUE;
2129     M->assembled     = PETSC_FALSE;
2130   }
2131   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2132   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2133   aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2134   ii  = aij->i;
2135   jj  = aij->j;
2136   aa  = aij->a;
2137   for (i=0; i<m; i++) {
2138     row   = rstart/bs + i;
2139     nz    = ii[i+1] - ii[i];
2140     cwork = jj;     jj += nz;
2141     vwork = aa;     aa += nz;
2142     ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2143   }
2144 
2145   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2146   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2147   *newmat = M;
2148 
2149   /* save submatrix used in processor for next request */
2150   if (call ==  MAT_INITIAL_MATRIX) {
2151     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2152     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2153   }
2154 
2155   PetscFunctionReturn(0);
2156 }
2157 
2158 #undef __FUNCT__
2159 #define __FUNCT__ "MatPermute_MPIBAIJ"
2160 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2161 {
2162   MPI_Comm       comm,pcomm;
2163   PetscInt       first,local_size,nrows;
2164   const PetscInt *rows;
2165   PetscMPIInt    size;
2166   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
2167   PetscErrorCode ierr;
2168 
2169   PetscFunctionBegin;
2170   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2171   /* make a collective version of 'rowp' */
2172   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2173   if (pcomm==comm) {
2174     crowp = rowp;
2175   } else {
2176     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2177     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2178     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
2179     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2180   }
2181   /* collect the global row permutation and invert it */
2182   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
2183   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
2184   if (pcomm!=comm) {
2185     ierr = ISDestroy(crowp);CHKERRQ(ierr);
2186   }
2187   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
2188   /* get the local target indices */
2189   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
2190   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
2191   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
2192   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
2193   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
2194   ierr = ISDestroy(irowp);CHKERRQ(ierr);
2195   /* the column permutation is so much easier;
2196      make a local version of 'colp' and invert it */
2197   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2198   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2199   if (size==1) {
2200     lcolp = colp;
2201   } else {
2202     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
2203     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
2204     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
2205   }
2206   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2207   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
2208   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
2209   if (size>1) {
2210     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
2211     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
2212   }
2213   /* now we just get the submatrix */
2214   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2215   /* clean up */
2216   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
2217   ierr = ISDestroy(icolp);CHKERRQ(ierr);
2218   PetscFunctionReturn(0);
2219 }
2220 
2221 #undef __FUNCT__
2222 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2223 PetscErrorCode PETSCMAT_DLLEXPORT MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2224 {
2225   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*) mat->data;
2226   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
2227 
2228   PetscFunctionBegin;
2229   if (nghosts) { *nghosts = B->nbs;}
2230   if (ghosts) {*ghosts = baij->garray;}
2231   PetscFunctionReturn(0);
2232 }
2233 
2234 EXTERN PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat);
2235 
2236 #undef __FUNCT__
2237 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ"
2238 /*
2239     This routine is almost identical to MatFDColoringCreate_MPIBAIJ()!
2240 */
2241 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
2242 {
2243   Mat_MPIBAIJ            *baij = (Mat_MPIBAIJ*)mat->data;
2244   PetscErrorCode        ierr;
2245   PetscMPIInt           size,*ncolsonproc,*disp,nn;
2246   PetscInt              bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col;
2247   const PetscInt        *is;
2248   PetscInt              nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj;
2249   PetscInt              *rowhit,M,cstart,cend,colb;
2250   PetscInt              *columnsforrow,l;
2251   IS                    *isa;
2252   PetscTruth             done,flg;
2253   ISLocalToGlobalMapping map = mat->bmapping;
2254   PetscInt               *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype;
2255 
2256   PetscFunctionBegin;
2257   if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();");
2258   if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock");
2259 
2260   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
2261   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2262   M                = mat->rmap->n/bs;
2263   cstart           = mat->cmap->rstart/bs;
2264   cend             = mat->cmap->rend/bs;
2265   c->M             = mat->rmap->N/bs;  /* set the global rows and columns and local rows */
2266   c->N             = mat->cmap->N/bs;
2267   c->m             = mat->rmap->n/bs;
2268   c->rstart        = mat->rmap->rstart/bs;
2269 
2270   c->ncolors       = nis;
2271   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
2272   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
2273   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
2274   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
2275   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
2276   ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
2277 
2278   /* Allow access to data structures of local part of matrix */
2279   if (!baij->colmap) {
2280     ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
2281   }
2282   ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2283   ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2284 
2285   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
2286   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
2287 
2288   for (i=0; i<nis; i++) {
2289     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
2290     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
2291     c->ncolumns[i] = n;
2292     if (n) {
2293       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
2294       ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr);
2295       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
2296     } else {
2297       c->columns[i]  = 0;
2298     }
2299 
2300     if (ctype == IS_COLORING_GLOBAL){
2301       /* Determine the total (parallel) number of columns of this color */
2302       ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
2303       ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr);
2304 
2305       nn   = PetscMPIIntCast(n);
2306       ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2307       nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];}
2308       if (!nctot) {
2309         ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
2310       }
2311 
2312       disp[0] = 0;
2313       for (j=1; j<size; j++) {
2314         disp[j] = disp[j-1] + ncolsonproc[j-1];
2315       }
2316 
2317       /* Get complete list of columns for color on each processor */
2318       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2319       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2320       ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr);
2321     } else if (ctype == IS_COLORING_GHOSTED){
2322       /* Determine local number of columns of this color on this process, including ghost points */
2323       nctot = n;
2324       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2325       ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
2326     } else {
2327       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
2328     }
2329 
2330     /*
2331        Mark all rows affect by these columns
2332     */
2333     /* Temporary option to allow for debugging/testing */
2334     flg  = PETSC_FALSE;
2335     ierr = PetscOptionsGetTruth(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr);
2336     if (!flg) {/*-----------------------------------------------------------------------------*/
2337       /* crude, fast version */
2338       ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
2339       /* loop over columns*/
2340       for (j=0; j<nctot; j++) {
2341         if (ctype == IS_COLORING_GHOSTED) {
2342           col = ltog[cols[j]];
2343         } else {
2344           col  = cols[j];
2345         }
2346         if (col >= cstart && col < cend) {
2347           /* column is in diagonal block of matrix */
2348           rows = A_cj + A_ci[col-cstart];
2349           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2350         } else {
2351 #if defined (PETSC_USE_CTABLE)
2352           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2353 	  colb --;
2354 #else
2355           colb = baij->colmap[col] - 1;
2356 #endif
2357           if (colb == -1) {
2358             m = 0;
2359           } else {
2360             colb = colb/bs;
2361             rows = B_cj + B_ci[colb];
2362             m    = B_ci[colb+1] - B_ci[colb];
2363           }
2364         }
2365         /* loop over columns marking them in rowhit */
2366         for (k=0; k<m; k++) {
2367           rowhit[*rows++] = col + 1;
2368         }
2369       }
2370 
2371       /* count the number of hits */
2372       nrows = 0;
2373       for (j=0; j<M; j++) {
2374         if (rowhit[j]) nrows++;
2375       }
2376       c->nrows[i]         = nrows;
2377       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2378       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2379       ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2380       nrows = 0;
2381       for (j=0; j<M; j++) {
2382         if (rowhit[j]) {
2383           c->rows[i][nrows]           = j;
2384           c->columnsforrow[i][nrows] = rowhit[j] - 1;
2385           nrows++;
2386         }
2387       }
2388     } else {/*-------------------------------------------------------------------------------*/
2389       /* slow version, using rowhit as a linked list */
2390       PetscInt currentcol,fm,mfm;
2391       rowhit[M] = M;
2392       nrows     = 0;
2393       /* loop over columns*/
2394       for (j=0; j<nctot; j++) {
2395         if (ctype == IS_COLORING_GHOSTED) {
2396           col = ltog[cols[j]];
2397         } else {
2398           col  = cols[j];
2399         }
2400         if (col >= cstart && col < cend) {
2401           /* column is in diagonal block of matrix */
2402           rows = A_cj + A_ci[col-cstart];
2403           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2404         } else {
2405 #if defined (PETSC_USE_CTABLE)
2406           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2407           colb --;
2408 #else
2409           colb = baij->colmap[col] - 1;
2410 #endif
2411           if (colb == -1) {
2412             m = 0;
2413           } else {
2414             colb = colb/bs;
2415             rows = B_cj + B_ci[colb];
2416             m    = B_ci[colb+1] - B_ci[colb];
2417           }
2418         }
2419 
2420         /* loop over columns marking them in rowhit */
2421         fm    = M; /* fm points to first entry in linked list */
2422         for (k=0; k<m; k++) {
2423           currentcol = *rows++;
2424 	  /* is it already in the list? */
2425           do {
2426             mfm  = fm;
2427             fm   = rowhit[fm];
2428           } while (fm < currentcol);
2429           /* not in list so add it */
2430           if (fm != currentcol) {
2431             nrows++;
2432             columnsforrow[currentcol] = col;
2433             /* next three lines insert new entry into linked list */
2434             rowhit[mfm]               = currentcol;
2435             rowhit[currentcol]        = fm;
2436             fm                        = currentcol;
2437             /* fm points to present position in list since we know the columns are sorted */
2438           } else {
2439             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
2440           }
2441         }
2442       }
2443       c->nrows[i]         = nrows;
2444       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2445       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2446       ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2447       /* now store the linked list of rows into c->rows[i] */
2448       nrows = 0;
2449       fm    = rowhit[M];
2450       do {
2451         c->rows[i][nrows]            = fm;
2452         c->columnsforrow[i][nrows++] = columnsforrow[fm];
2453         fm                           = rowhit[fm];
2454       } while (fm < M);
2455     } /* ---------------------------------------------------------------------------------------*/
2456     ierr = PetscFree(cols);CHKERRQ(ierr);
2457   }
2458 
2459   /* Optimize by adding the vscale, and scaleforrow[][] fields */
2460   /*
2461        vscale will contain the "diagonal" on processor scalings followed by the off processor
2462   */
2463   if (ctype == IS_COLORING_GLOBAL) {
2464     PetscInt *garray;
2465     ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2466     for (i=0; i<baij->B->cmap->n/bs; i++) {
2467       for (j=0; j<bs; j++) {
2468         garray[i*bs+j] = bs*baij->garray[i]+j;
2469       }
2470     }
2471     ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr);
2472     ierr = PetscFree(garray);CHKERRQ(ierr);
2473     CHKMEMQ;
2474     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2475     for (k=0; k<c->ncolors; k++) {
2476       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2477       for (l=0; l<c->nrows[k]; l++) {
2478         col = c->columnsforrow[k][l];
2479         if (col >= cstart && col < cend) {
2480           /* column is in diagonal block of matrix */
2481           colb = col - cstart;
2482         } else {
2483           /* column  is in "off-processor" part */
2484 #if defined (PETSC_USE_CTABLE)
2485           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2486           colb --;
2487 #else
2488           colb = baij->colmap[col] - 1;
2489 #endif
2490           colb = colb/bs;
2491           colb += cend - cstart;
2492         }
2493         c->vscaleforrow[k][l] = colb;
2494       }
2495     }
2496   } else if (ctype == IS_COLORING_GHOSTED) {
2497     /* Get gtol mapping */
2498     PetscInt N = mat->cmap->N, *gtol;
2499     ierr = PetscMalloc((N+1)*sizeof(PetscInt),&gtol);CHKERRQ(ierr);
2500     for (i=0; i<N; i++) gtol[i] = -1;
2501     for (i=0; i<map->n; i++) gtol[ltog[i]] = i;
2502 
2503     c->vscale = 0; /* will be created in MatFDColoringApply() */
2504     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2505     for (k=0; k<c->ncolors; k++) {
2506       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2507       for (l=0; l<c->nrows[k]; l++) {
2508         col = c->columnsforrow[k][l];      /* global column index */
2509         c->vscaleforrow[k][l] = gtol[col]; /* local column index */
2510       }
2511     }
2512     ierr = PetscFree(gtol);CHKERRQ(ierr);
2513   }
2514   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
2515 
2516   ierr = PetscFree(rowhit);CHKERRQ(ierr);
2517   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
2518   ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2519   ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2520     CHKMEMQ;
2521   PetscFunctionReturn(0);
2522 }
2523 
2524 #undef __FUNCT__
2525 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ"
2526 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat)
2527 {
2528   Mat            B;
2529   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
2530   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2531   Mat_SeqAIJ     *b;
2532   PetscErrorCode ierr;
2533   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2534   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2535   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2536 
2537   PetscFunctionBegin;
2538   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2539   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2540 
2541   /* ----------------------------------------------------------------
2542      Tell every processor the number of nonzeros per row
2543   */
2544   ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
2545   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2546     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2547   }
2548   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2549   ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr);
2550   displs     = recvcounts + size;
2551   for (i=0; i<size; i++) {
2552     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2553     displs[i]     = A->rmap->range[i]/bs;
2554   }
2555 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2556   ierr  = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2557 #else
2558   ierr  = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2559 #endif
2560   /* ---------------------------------------------------------------
2561      Create the sequential matrix of the same type as the local block diagonal
2562   */
2563   ierr  = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2564   ierr  = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2565   ierr  = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2566   ierr  = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2567   b = (Mat_SeqAIJ *)B->data;
2568 
2569   /*--------------------------------------------------------------------
2570     Copy my part of matrix column indices over
2571   */
2572   sendcount  = ad->nz + bd->nz;
2573   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2574   a_jsendbuf = ad->j;
2575   b_jsendbuf = bd->j;
2576   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2577   cnt        = 0;
2578   for (i=0; i<n; i++) {
2579 
2580     /* put in lower diagonal portion */
2581     m = bd->i[i+1] - bd->i[i];
2582     while (m > 0) {
2583       /* is it above diagonal (in bd (compressed) numbering) */
2584       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2585       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2586       m--;
2587     }
2588 
2589     /* put in diagonal portion */
2590     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2591       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2592     }
2593 
2594     /* put in upper diagonal portion */
2595     while (m-- > 0) {
2596       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2597     }
2598   }
2599   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2600 
2601   /*--------------------------------------------------------------------
2602     Gather all column indices to all processors
2603   */
2604   for (i=0; i<size; i++) {
2605     recvcounts[i] = 0;
2606     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2607       recvcounts[i] += lens[j];
2608     }
2609   }
2610   displs[0]  = 0;
2611   for (i=1; i<size; i++) {
2612     displs[i] = displs[i-1] + recvcounts[i-1];
2613   }
2614 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2615   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2616 #else
2617   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2618 #endif
2619   /*--------------------------------------------------------------------
2620     Assemble the matrix into useable form (note numerical values not yet set)
2621   */
2622   /* set the b->ilen (length of each row) values */
2623   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2624   /* set the b->i indices */
2625   b->i[0] = 0;
2626   for (i=1; i<=A->rmap->N/bs; i++) {
2627     b->i[i] = b->i[i-1] + lens[i-1];
2628   }
2629   ierr = PetscFree(lens);CHKERRQ(ierr);
2630   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2631   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2632   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2633 
2634   if (A->symmetric){
2635     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2636   } else if (A->hermitian) {
2637     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2638   } else if (A->structurally_symmetric) {
2639     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2640   }
2641   *newmat = B;
2642   PetscFunctionReturn(0);
2643 }
2644 
2645 #undef __FUNCT__
2646 #define __FUNCT__ "MatSOR_MPIBAIJ"
2647 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2648 {
2649   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2650   PetscErrorCode ierr;
2651   Vec            bb1 = 0;
2652 
2653   PetscFunctionBegin;
2654   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2655     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2656   }
2657 
2658   if (flag == SOR_APPLY_UPPER) {
2659     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2660     PetscFunctionReturn(0);
2661   }
2662 
2663   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2664     if (flag & SOR_ZERO_INITIAL_GUESS) {
2665       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2666       its--;
2667     }
2668 
2669     while (its--) {
2670       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2671       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2672 
2673       /* update rhs: bb1 = bb - B*x */
2674       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2675       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2676 
2677       /* local sweep */
2678       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2679     }
2680   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
2681     if (flag & SOR_ZERO_INITIAL_GUESS) {
2682       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2683       its--;
2684     }
2685     while (its--) {
2686       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2687       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2688 
2689       /* update rhs: bb1 = bb - B*x */
2690       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2691       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2692 
2693       /* local sweep */
2694       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2695     }
2696   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
2697     if (flag & SOR_ZERO_INITIAL_GUESS) {
2698       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2699       its--;
2700     }
2701     while (its--) {
2702       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2703       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2704 
2705       /* update rhs: bb1 = bb - B*x */
2706       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2707       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2708 
2709       /* local sweep */
2710       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2711     }
2712   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2713 
2714   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
2715   PetscFunctionReturn(0);
2716 }
2717 
2718 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2719 
2720 
2721 /* -------------------------------------------------------------------*/
2722 static struct _MatOps MatOps_Values = {
2723        MatSetValues_MPIBAIJ,
2724        MatGetRow_MPIBAIJ,
2725        MatRestoreRow_MPIBAIJ,
2726        MatMult_MPIBAIJ,
2727 /* 4*/ MatMultAdd_MPIBAIJ,
2728        MatMultTranspose_MPIBAIJ,
2729        MatMultTransposeAdd_MPIBAIJ,
2730        0,
2731        0,
2732        0,
2733 /*10*/ 0,
2734        0,
2735        0,
2736        MatSOR_MPIBAIJ,
2737        MatTranspose_MPIBAIJ,
2738 /*15*/ MatGetInfo_MPIBAIJ,
2739        MatEqual_MPIBAIJ,
2740        MatGetDiagonal_MPIBAIJ,
2741        MatDiagonalScale_MPIBAIJ,
2742        MatNorm_MPIBAIJ,
2743 /*20*/ MatAssemblyBegin_MPIBAIJ,
2744        MatAssemblyEnd_MPIBAIJ,
2745        MatSetOption_MPIBAIJ,
2746        MatZeroEntries_MPIBAIJ,
2747 /*24*/ MatZeroRows_MPIBAIJ,
2748        0,
2749        0,
2750        0,
2751        0,
2752 /*29*/ MatSetUpPreallocation_MPIBAIJ,
2753        0,
2754        0,
2755        0,
2756        0,
2757 /*34*/ MatDuplicate_MPIBAIJ,
2758        0,
2759        0,
2760        0,
2761        0,
2762 /*39*/ MatAXPY_MPIBAIJ,
2763        MatGetSubMatrices_MPIBAIJ,
2764        MatIncreaseOverlap_MPIBAIJ,
2765        MatGetValues_MPIBAIJ,
2766        MatCopy_MPIBAIJ,
2767 /*44*/ 0,
2768        MatScale_MPIBAIJ,
2769        0,
2770        0,
2771        0,
2772 /*49*/ MatSetBlockSize_MPIBAIJ,
2773        0,
2774        0,
2775        0,
2776        0,
2777 /*54*/ MatFDColoringCreate_MPIBAIJ,
2778        0,
2779        MatSetUnfactored_MPIBAIJ,
2780        MatPermute_MPIBAIJ,
2781        MatSetValuesBlocked_MPIBAIJ,
2782 /*59*/ MatGetSubMatrix_MPIBAIJ,
2783        MatDestroy_MPIBAIJ,
2784        MatView_MPIBAIJ,
2785        0,
2786        0,
2787 /*64*/ 0,
2788        0,
2789        0,
2790        0,
2791        0,
2792 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2793        0,
2794        0,
2795        0,
2796        0,
2797 /*74*/ 0,
2798        MatFDColoringApply_BAIJ,
2799        0,
2800        0,
2801        0,
2802 /*79*/ 0,
2803        0,
2804        0,
2805        0,
2806        MatLoad_MPIBAIJ,
2807 /*84*/ 0,
2808        0,
2809        0,
2810        0,
2811        0,
2812 /*89*/ 0,
2813        0,
2814        0,
2815        0,
2816        0,
2817 /*94*/ 0,
2818        0,
2819        0,
2820        0,
2821        0,
2822 /*99*/ 0,
2823        0,
2824        0,
2825        0,
2826        0,
2827 /*104*/0,
2828        MatRealPart_MPIBAIJ,
2829        MatImaginaryPart_MPIBAIJ,
2830        0,
2831        0,
2832 /*109*/0,
2833        0,
2834        0,
2835        0,
2836        0,
2837 /*114*/MatGetSeqNonzerostructure_MPIBAIJ,
2838        0,
2839        MatGetGhosts_MPIBAIJ,
2840        0,
2841        0,
2842 /*119*/0,
2843        0,
2844        0,
2845        0
2846 };
2847 
2848 EXTERN_C_BEGIN
2849 #undef __FUNCT__
2850 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2851 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
2852 {
2853   PetscFunctionBegin;
2854   *a      = ((Mat_MPIBAIJ *)A->data)->A;
2855   *iscopy = PETSC_FALSE;
2856   PetscFunctionReturn(0);
2857 }
2858 EXTERN_C_END
2859 
2860 EXTERN_C_BEGIN
2861 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2862 EXTERN_C_END
2863 
2864 EXTERN_C_BEGIN
2865 #undef __FUNCT__
2866 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2867 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2868 {
2869   PetscInt       m,rstart,cstart,cend;
2870   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2871   const PetscInt *JJ=0;
2872   PetscScalar    *values=0;
2873   PetscErrorCode ierr;
2874 
2875   PetscFunctionBegin;
2876 
2877   if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2878   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2879   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2880   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2881   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2882   m      = B->rmap->n/bs;
2883   rstart = B->rmap->rstart/bs;
2884   cstart = B->cmap->rstart/bs;
2885   cend   = B->cmap->rend/bs;
2886 
2887   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2888   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
2889   for (i=0; i<m; i++) {
2890     nz = ii[i+1] - ii[i];
2891     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2892     nz_max = PetscMax(nz_max,nz);
2893     JJ  = jj + ii[i];
2894     for (j=0; j<nz; j++) {
2895       if (*JJ >= cstart) break;
2896       JJ++;
2897     }
2898     d = 0;
2899     for (; j<nz; j++) {
2900       if (*JJ++ >= cend) break;
2901       d++;
2902     }
2903     d_nnz[i] = d;
2904     o_nnz[i] = nz - d;
2905   }
2906   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2907   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2908 
2909   values = (PetscScalar*)V;
2910   if (!values) {
2911     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2912     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2913   }
2914   for (i=0; i<m; i++) {
2915     PetscInt          row    = i + rstart;
2916     PetscInt          ncols  = ii[i+1] - ii[i];
2917     const PetscInt    *icols = jj + ii[i];
2918     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2919     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2920   }
2921 
2922   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2923   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2924   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2925 
2926   PetscFunctionReturn(0);
2927 }
2928 EXTERN_C_END
2929 
2930 #undef __FUNCT__
2931 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2932 /*@C
2933    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2934    (the default parallel PETSc format).
2935 
2936    Collective on MPI_Comm
2937 
2938    Input Parameters:
2939 +  A - the matrix
2940 .  i - the indices into j for the start of each local row (starts with zero)
2941 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2942 -  v - optional values in the matrix
2943 
2944    Level: developer
2945 
2946 .keywords: matrix, aij, compressed row, sparse, parallel
2947 
2948 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2949 @*/
2950 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2951 {
2952   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
2953 
2954   PetscFunctionBegin;
2955   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2956   if (f) {
2957     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
2958   }
2959   PetscFunctionReturn(0);
2960 }
2961 
2962 EXTERN_C_BEGIN
2963 #undef __FUNCT__
2964 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2965 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2966 {
2967   Mat_MPIBAIJ    *b;
2968   PetscErrorCode ierr;
2969   PetscInt       i, newbs = PetscAbs(bs);
2970 
2971   PetscFunctionBegin;
2972   if (bs < 0) {
2973     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr);
2974       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2975     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2976     bs   = PetscAbs(bs);
2977   }
2978   if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
2979   bs = newbs;
2980 
2981 
2982   if (bs < 1) SETERRQ(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2983   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2984   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2985   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2986   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2987 
2988   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2989   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2990   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2991   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2992 
2993   if (d_nnz) {
2994     for (i=0; i<B->rmap->n/bs; i++) {
2995       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2996     }
2997   }
2998   if (o_nnz) {
2999     for (i=0; i<B->rmap->n/bs; i++) {
3000       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
3001     }
3002   }
3003 
3004   b = (Mat_MPIBAIJ*)B->data;
3005   b->bs2 = bs*bs;
3006   b->mbs = B->rmap->n/bs;
3007   b->nbs = B->cmap->n/bs;
3008   b->Mbs = B->rmap->N/bs;
3009   b->Nbs = B->cmap->N/bs;
3010 
3011   for (i=0; i<=b->size; i++) {
3012     b->rangebs[i] = B->rmap->range[i]/bs;
3013   }
3014   b->rstartbs = B->rmap->rstart/bs;
3015   b->rendbs   = B->rmap->rend/bs;
3016   b->cstartbs = B->cmap->rstart/bs;
3017   b->cendbs   = B->cmap->rend/bs;
3018 
3019   if (!B->preallocated) {
3020     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3021     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3022     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
3023     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3024     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3025     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3026     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
3027     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3028     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
3029   }
3030 
3031   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3032   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
3033   B->preallocated = PETSC_TRUE;
3034   PetscFunctionReturn(0);
3035 }
3036 EXTERN_C_END
3037 
3038 EXTERN_C_BEGIN
3039 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
3040 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
3041 EXTERN_C_END
3042 
3043 
3044 EXTERN_C_BEGIN
3045 #undef __FUNCT__
3046 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
3047 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj)
3048 {
3049   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
3050   PetscErrorCode ierr;
3051   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
3052   PetscInt       M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
3053   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
3054 
3055   PetscFunctionBegin;
3056   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr);
3057   ii[0] = 0;
3058   CHKMEMQ;
3059   for (i=0; i<M; i++) {
3060     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
3061     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
3062     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
3063     /* remove one from count of matrix has diagonal */
3064     for (j=id[i]; j<id[i+1]; j++) {
3065       if (jd[j] == i) {ii[i+1]--;break;}
3066     }
3067   CHKMEMQ;
3068   }
3069   ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3070   cnt = 0;
3071   for (i=0; i<M; i++) {
3072     for (j=io[i]; j<io[i+1]; j++) {
3073       if (garray[jo[j]] > rstart) break;
3074       jj[cnt++] = garray[jo[j]];
3075   CHKMEMQ;
3076     }
3077     for (k=id[i]; k<id[i+1]; k++) {
3078       if (jd[k] != i) {
3079         jj[cnt++] = rstart + jd[k];
3080   CHKMEMQ;
3081       }
3082     }
3083     for (;j<io[i+1]; j++) {
3084       jj[cnt++] = garray[jo[j]];
3085   CHKMEMQ;
3086     }
3087   }
3088   ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr);
3089   PetscFunctionReturn(0);
3090 }
3091 EXTERN_C_END
3092 
3093 EXTERN_C_BEGIN
3094 #if defined(PETSC_HAVE_MUMPS)
3095 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3096 #endif
3097 EXTERN_C_END
3098 
3099 /*MC
3100    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
3101 
3102    Options Database Keys:
3103 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
3104 . -mat_block_size <bs> - set the blocksize used to store the matrix
3105 - -mat_use_hash_table <fact>
3106 
3107   Level: beginner
3108 
3109 .seealso: MatCreateMPIBAIJ
3110 M*/
3111 
3112 EXTERN_C_BEGIN
3113 #undef __FUNCT__
3114 #define __FUNCT__ "MatCreate_MPIBAIJ"
3115 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B)
3116 {
3117   Mat_MPIBAIJ    *b;
3118   PetscErrorCode ierr;
3119   PetscTruth     flg;
3120 
3121   PetscFunctionBegin;
3122   ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
3123   B->data = (void*)b;
3124 
3125 
3126   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3127   B->mapping    = 0;
3128   B->assembled  = PETSC_FALSE;
3129 
3130   B->insertmode = NOT_SET_VALUES;
3131   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
3132   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
3133 
3134   /* build local table of row and column ownerships */
3135   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
3136 
3137   /* build cache for off array entries formed */
3138   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
3139   b->donotstash  = PETSC_FALSE;
3140   b->colmap      = PETSC_NULL;
3141   b->garray      = PETSC_NULL;
3142   b->roworiented = PETSC_TRUE;
3143 
3144   /* stuff used in block assembly */
3145   b->barray       = 0;
3146 
3147   /* stuff used for matrix vector multiply */
3148   b->lvec         = 0;
3149   b->Mvctx        = 0;
3150 
3151   /* stuff for MatGetRow() */
3152   b->rowindices   = 0;
3153   b->rowvalues    = 0;
3154   b->getrowactive = PETSC_FALSE;
3155 
3156   /* hash table stuff */
3157   b->ht           = 0;
3158   b->hd           = 0;
3159   b->ht_size      = 0;
3160   b->ht_flag      = PETSC_FALSE;
3161   b->ht_fact      = 0;
3162   b->ht_total_ct  = 0;
3163   b->ht_insert_ct = 0;
3164 
3165   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3166     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3167     if (flg) {
3168       PetscReal fact = 1.39;
3169       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3170       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
3171       if (fact <= 1.0) fact = 1.39;
3172       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3173       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3174     }
3175   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3176 
3177 #if defined(PETSC_HAVE_MUMPS)
3178   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr);
3179 #endif
3180   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",
3181                                      "MatConvert_MPIBAIJ_MPIAdj",
3182                                       MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3183   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3184                                      "MatStoreValues_MPIBAIJ",
3185                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3186   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3187                                      "MatRetrieveValues_MPIBAIJ",
3188                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3189   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3190                                      "MatGetDiagonalBlock_MPIBAIJ",
3191                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3192   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
3193                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
3194                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3195   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
3196 				     "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
3197 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3198   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3199                                      "MatDiagonalScaleLocal_MPIBAIJ",
3200                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3201   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
3202                                      "MatSetHashTableFactor_MPIBAIJ",
3203                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3204   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3205   PetscFunctionReturn(0);
3206 }
3207 EXTERN_C_END
3208 
3209 /*MC
3210    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3211 
3212    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3213    and MATMPIBAIJ otherwise.
3214 
3215    Options Database Keys:
3216 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3217 
3218   Level: beginner
3219 
3220 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3221 M*/
3222 
3223 EXTERN_C_BEGIN
3224 #undef __FUNCT__
3225 #define __FUNCT__ "MatCreate_BAIJ"
3226 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A)
3227 {
3228   PetscErrorCode ierr;
3229   PetscMPIInt    size;
3230 
3231   PetscFunctionBegin;
3232   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
3233   if (size == 1) {
3234     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
3235   } else {
3236     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
3237   }
3238   PetscFunctionReturn(0);
3239 }
3240 EXTERN_C_END
3241 
3242 #undef __FUNCT__
3243 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3244 /*@C
3245    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3246    (block compressed row).  For good matrix assembly performance
3247    the user should preallocate the matrix storage by setting the parameters
3248    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3249    performance can be increased by more than a factor of 50.
3250 
3251    Collective on Mat
3252 
3253    Input Parameters:
3254 +  A - the matrix
3255 .  bs   - size of blockk
3256 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3257            submatrix  (same for all local rows)
3258 .  d_nnz - array containing the number of block nonzeros in the various block rows
3259            of the in diagonal portion of the local (possibly different for each block
3260            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3261 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3262            submatrix (same for all local rows).
3263 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3264            off-diagonal portion of the local submatrix (possibly different for
3265            each block row) or PETSC_NULL.
3266 
3267    If the *_nnz parameter is given then the *_nz parameter is ignored
3268 
3269    Options Database Keys:
3270 +   -mat_block_size - size of the blocks to use
3271 -   -mat_use_hash_table <fact>
3272 
3273    Notes:
3274    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3275    than it must be used on all processors that share the object for that argument.
3276 
3277    Storage Information:
3278    For a square global matrix we define each processor's diagonal portion
3279    to be its local rows and the corresponding columns (a square submatrix);
3280    each processor's off-diagonal portion encompasses the remainder of the
3281    local matrix (a rectangular submatrix).
3282 
3283    The user can specify preallocated storage for the diagonal part of
3284    the local submatrix with either d_nz or d_nnz (not both).  Set
3285    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3286    memory allocation.  Likewise, specify preallocated storage for the
3287    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3288 
3289    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3290    the figure below we depict these three local rows and all columns (0-11).
3291 
3292 .vb
3293            0 1 2 3 4 5 6 7 8 9 10 11
3294           -------------------
3295    row 3  |  o o o d d d o o o o o o
3296    row 4  |  o o o d d d o o o o o o
3297    row 5  |  o o o d d d o o o o o o
3298           -------------------
3299 .ve
3300 
3301    Thus, any entries in the d locations are stored in the d (diagonal)
3302    submatrix, and any entries in the o locations are stored in the
3303    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3304    stored simply in the MATSEQBAIJ format for compressed row storage.
3305 
3306    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3307    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3308    In general, for PDE problems in which most nonzeros are near the diagonal,
3309    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3310    or you will get TERRIBLE performance; see the users' manual chapter on
3311    matrices.
3312 
3313    You can call MatGetInfo() to get information on how effective the preallocation was;
3314    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3315    You can also run with the option -info and look for messages with the string
3316    malloc in them to see if additional memory allocation was needed.
3317 
3318    Level: intermediate
3319 
3320 .keywords: matrix, block, aij, compressed row, sparse, parallel
3321 
3322 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
3323 @*/
3324 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3325 {
3326   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3327 
3328   PetscFunctionBegin;
3329   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3330   if (f) {
3331     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3332   }
3333   PetscFunctionReturn(0);
3334 }
3335 
3336 #undef __FUNCT__
3337 #define __FUNCT__ "MatCreateMPIBAIJ"
3338 /*@C
3339    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
3340    (block compressed row).  For good matrix assembly performance
3341    the user should preallocate the matrix storage by setting the parameters
3342    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3343    performance can be increased by more than a factor of 50.
3344 
3345    Collective on MPI_Comm
3346 
3347    Input Parameters:
3348 +  comm - MPI communicator
3349 .  bs   - size of blockk
3350 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3351            This value should be the same as the local size used in creating the
3352            y vector for the matrix-vector product y = Ax.
3353 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3354            This value should be the same as the local size used in creating the
3355            x vector for the matrix-vector product y = Ax.
3356 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3357 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3358 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3359            submatrix  (same for all local rows)
3360 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3361            of the in diagonal portion of the local (possibly different for each block
3362            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3363 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3364            submatrix (same for all local rows).
3365 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3366            off-diagonal portion of the local submatrix (possibly different for
3367            each block row) or PETSC_NULL.
3368 
3369    Output Parameter:
3370 .  A - the matrix
3371 
3372    Options Database Keys:
3373 +   -mat_block_size - size of the blocks to use
3374 -   -mat_use_hash_table <fact>
3375 
3376    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3377    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3378    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3379 
3380    Notes:
3381    If the *_nnz parameter is given then the *_nz parameter is ignored
3382 
3383    A nonzero block is any block that as 1 or more nonzeros in it
3384 
3385    The user MUST specify either the local or global matrix dimensions
3386    (possibly both).
3387 
3388    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3389    than it must be used on all processors that share the object for that argument.
3390 
3391    Storage Information:
3392    For a square global matrix we define each processor's diagonal portion
3393    to be its local rows and the corresponding columns (a square submatrix);
3394    each processor's off-diagonal portion encompasses the remainder of the
3395    local matrix (a rectangular submatrix).
3396 
3397    The user can specify preallocated storage for the diagonal part of
3398    the local submatrix with either d_nz or d_nnz (not both).  Set
3399    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3400    memory allocation.  Likewise, specify preallocated storage for the
3401    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3402 
3403    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3404    the figure below we depict these three local rows and all columns (0-11).
3405 
3406 .vb
3407            0 1 2 3 4 5 6 7 8 9 10 11
3408           -------------------
3409    row 3  |  o o o d d d o o o o o o
3410    row 4  |  o o o d d d o o o o o o
3411    row 5  |  o o o d d d o o o o o o
3412           -------------------
3413 .ve
3414 
3415    Thus, any entries in the d locations are stored in the d (diagonal)
3416    submatrix, and any entries in the o locations are stored in the
3417    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3418    stored simply in the MATSEQBAIJ format for compressed row storage.
3419 
3420    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3421    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3422    In general, for PDE problems in which most nonzeros are near the diagonal,
3423    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3424    or you will get TERRIBLE performance; see the users' manual chapter on
3425    matrices.
3426 
3427    Level: intermediate
3428 
3429 .keywords: matrix, block, aij, compressed row, sparse, parallel
3430 
3431 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3432 @*/
3433 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3434 {
3435   PetscErrorCode ierr;
3436   PetscMPIInt    size;
3437 
3438   PetscFunctionBegin;
3439   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3440   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3441   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3442   if (size > 1) {
3443     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3444     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3445   } else {
3446     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3447     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3448   }
3449   PetscFunctionReturn(0);
3450 }
3451 
3452 #undef __FUNCT__
3453 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3454 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3455 {
3456   Mat            mat;
3457   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3458   PetscErrorCode ierr;
3459   PetscInt       len=0;
3460 
3461   PetscFunctionBegin;
3462   *newmat       = 0;
3463   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3464   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3465   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3466   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3467 
3468   mat->factortype   = matin->factortype;
3469   mat->preallocated = PETSC_TRUE;
3470   mat->assembled    = PETSC_TRUE;
3471   mat->insertmode   = NOT_SET_VALUES;
3472 
3473   a      = (Mat_MPIBAIJ*)mat->data;
3474   mat->rmap->bs  = matin->rmap->bs;
3475   a->bs2   = oldmat->bs2;
3476   a->mbs   = oldmat->mbs;
3477   a->nbs   = oldmat->nbs;
3478   a->Mbs   = oldmat->Mbs;
3479   a->Nbs   = oldmat->Nbs;
3480 
3481   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3482   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3483 
3484   a->size         = oldmat->size;
3485   a->rank         = oldmat->rank;
3486   a->donotstash   = oldmat->donotstash;
3487   a->roworiented  = oldmat->roworiented;
3488   a->rowindices   = 0;
3489   a->rowvalues    = 0;
3490   a->getrowactive = PETSC_FALSE;
3491   a->barray       = 0;
3492   a->rstartbs     = oldmat->rstartbs;
3493   a->rendbs       = oldmat->rendbs;
3494   a->cstartbs     = oldmat->cstartbs;
3495   a->cendbs       = oldmat->cendbs;
3496 
3497   /* hash table stuff */
3498   a->ht           = 0;
3499   a->hd           = 0;
3500   a->ht_size      = 0;
3501   a->ht_flag      = oldmat->ht_flag;
3502   a->ht_fact      = oldmat->ht_fact;
3503   a->ht_total_ct  = 0;
3504   a->ht_insert_ct = 0;
3505 
3506   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3507   if (oldmat->colmap) {
3508 #if defined (PETSC_USE_CTABLE)
3509   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3510 #else
3511   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3512   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3513   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3514 #endif
3515   } else a->colmap = 0;
3516 
3517   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3518     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3519     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3520     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3521   } else a->garray = 0;
3522 
3523   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3524   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3525   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3526   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3527   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3528 
3529   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3530   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3531   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3532   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3533   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3534   *newmat = mat;
3535 
3536   PetscFunctionReturn(0);
3537 }
3538 
3539 #undef __FUNCT__
3540 #define __FUNCT__ "MatLoad_MPIBAIJ"
3541 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3542 {
3543   PetscErrorCode ierr;
3544   int            fd;
3545   PetscInt       i,nz,j,rstart,rend;
3546   PetscScalar    *vals,*buf;
3547   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3548   MPI_Status     status;
3549   PetscMPIInt    rank,size,maxnz;
3550   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3551   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
3552   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3553   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
3554   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
3555   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3556 
3557   PetscFunctionBegin;
3558   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3559     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3560   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3561 
3562   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3563   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3564   if (!rank) {
3565     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3566     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3567     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3568   }
3569 
3570   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3571 
3572   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3573   M = header[1]; N = header[2];
3574 
3575   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3576   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3577   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3578 
3579   /* If global sizes are set, check if they are consistent with that given in the file */
3580   if (sizesset) {
3581     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3582   }
3583   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3584   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3585 
3586   if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices");
3587 
3588   /*
3589      This code adds extra rows to make sure the number of rows is
3590      divisible by the blocksize
3591   */
3592   Mbs        = M/bs;
3593   extra_rows = bs - M + bs*Mbs;
3594   if (extra_rows == bs) extra_rows = 0;
3595   else                  Mbs++;
3596   if (extra_rows && !rank) {
3597     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3598   }
3599 
3600   /* determine ownership of all rows */
3601   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3602     mbs        = Mbs/size + ((Mbs % size) > rank);
3603     m          = mbs*bs;
3604   } else { /* User set */
3605     m          = newmat->rmap->n;
3606     mbs        = m/bs;
3607   }
3608   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
3609   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3610 
3611   /* process 0 needs enough room for process with most rows */
3612   if (!rank) {
3613     mmax = rowners[1];
3614     for (i=2; i<size; i++) {
3615       mmax = PetscMax(mmax,rowners[i]);
3616     }
3617     mmax*=bs;
3618   } else mmax = m;
3619 
3620   rowners[0] = 0;
3621   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
3622   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
3623   rstart = rowners[rank];
3624   rend   = rowners[rank+1];
3625 
3626   /* distribute row lengths to all processors */
3627   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
3628   if (!rank) {
3629     mend = m;
3630     if (size == 1) mend = mend - extra_rows;
3631     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3632     for (j=mend; j<m; j++) locrowlens[j] = 1;
3633     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3634     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3635     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3636     for (j=0; j<m; j++) {
3637       procsnz[0] += locrowlens[j];
3638     }
3639     for (i=1; i<size; i++) {
3640       mend = browners[i+1] - browners[i];
3641       if (i == size-1) mend = mend - extra_rows;
3642       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3643       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3644       /* calculate the number of nonzeros on each processor */
3645       for (j=0; j<browners[i+1]-browners[i]; j++) {
3646         procsnz[i] += rowlengths[j];
3647       }
3648       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3649     }
3650     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3651   } else {
3652     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3653   }
3654 
3655   if (!rank) {
3656     /* determine max buffer needed and allocate it */
3657     maxnz = procsnz[0];
3658     for (i=1; i<size; i++) {
3659       maxnz = PetscMax(maxnz,procsnz[i]);
3660     }
3661     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3662 
3663     /* read in my part of the matrix column indices  */
3664     nz     = procsnz[0];
3665     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3666     mycols = ibuf;
3667     if (size == 1)  nz -= extra_rows;
3668     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3669     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
3670 
3671     /* read in every ones (except the last) and ship off */
3672     for (i=1; i<size-1; i++) {
3673       nz   = procsnz[i];
3674       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3675       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3676     }
3677     /* read in the stuff for the last proc */
3678     if (size != 1) {
3679       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3680       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3681       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3682       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3683     }
3684     ierr = PetscFree(cols);CHKERRQ(ierr);
3685   } else {
3686     /* determine buffer space needed for message */
3687     nz = 0;
3688     for (i=0; i<m; i++) {
3689       nz += locrowlens[i];
3690     }
3691     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3692     mycols = ibuf;
3693     /* receive message of column indices*/
3694     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3695     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3696     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3697   }
3698 
3699   /* loop over local rows, determining number of off diagonal entries */
3700   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
3701   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
3702   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3703   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3704   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3705   rowcount = 0; nzcount = 0;
3706   for (i=0; i<mbs; i++) {
3707     dcount  = 0;
3708     odcount = 0;
3709     for (j=0; j<bs; j++) {
3710       kmax = locrowlens[rowcount];
3711       for (k=0; k<kmax; k++) {
3712         tmp = mycols[nzcount++]/bs;
3713         if (!mask[tmp]) {
3714           mask[tmp] = 1;
3715           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3716           else masked1[dcount++] = tmp;
3717         }
3718       }
3719       rowcount++;
3720     }
3721 
3722     dlens[i]  = dcount;
3723     odlens[i] = odcount;
3724 
3725     /* zero out the mask elements we set */
3726     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3727     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3728   }
3729 
3730 
3731   if (!sizesset) {
3732     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3733   }
3734   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3735 
3736   if (!rank) {
3737     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3738     /* read in my part of the matrix numerical values  */
3739     nz = procsnz[0];
3740     vals = buf;
3741     mycols = ibuf;
3742     if (size == 1)  nz -= extra_rows;
3743     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3744     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
3745 
3746     /* insert into matrix */
3747     jj      = rstart*bs;
3748     for (i=0; i<m; i++) {
3749       ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3750       mycols += locrowlens[i];
3751       vals   += locrowlens[i];
3752       jj++;
3753     }
3754     /* read in other processors (except the last one) and ship out */
3755     for (i=1; i<size-1; i++) {
3756       nz   = procsnz[i];
3757       vals = buf;
3758       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3759       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3760     }
3761     /* the last proc */
3762     if (size != 1){
3763       nz   = procsnz[i] - extra_rows;
3764       vals = buf;
3765       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3766       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3767       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3768     }
3769     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3770   } else {
3771     /* receive numeric values */
3772     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3773 
3774     /* receive message of values*/
3775     vals   = buf;
3776     mycols = ibuf;
3777     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
3778     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
3779     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3780 
3781     /* insert into matrix */
3782     jj      = rstart*bs;
3783     for (i=0; i<m; i++) {
3784       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3785       mycols += locrowlens[i];
3786       vals   += locrowlens[i];
3787       jj++;
3788     }
3789   }
3790   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3791   ierr = PetscFree(buf);CHKERRQ(ierr);
3792   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3793   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3794   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3795   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3796   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3797   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3798 
3799   PetscFunctionReturn(0);
3800 }
3801 
3802 #undef __FUNCT__
3803 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3804 /*@
3805    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3806 
3807    Input Parameters:
3808 .  mat  - the matrix
3809 .  fact - factor
3810 
3811    Not Collective, each process can use a different factor
3812 
3813    Level: advanced
3814 
3815   Notes:
3816    This can also be set by the command line option: -mat_use_hash_table <fact>
3817 
3818 .keywords: matrix, hashtable, factor, HT
3819 
3820 .seealso: MatSetOption()
3821 @*/
3822 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3823 {
3824   PetscErrorCode ierr,(*f)(Mat,PetscReal);
3825 
3826   PetscFunctionBegin;
3827   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
3828   if (f) {
3829     ierr = (*f)(mat,fact);CHKERRQ(ierr);
3830   }
3831   PetscFunctionReturn(0);
3832 }
3833 
3834 EXTERN_C_BEGIN
3835 #undef __FUNCT__
3836 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3837 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3838 {
3839   Mat_MPIBAIJ *baij;
3840 
3841   PetscFunctionBegin;
3842   baij = (Mat_MPIBAIJ*)mat->data;
3843   baij->ht_fact = fact;
3844   PetscFunctionReturn(0);
3845 }
3846 EXTERN_C_END
3847 
3848 #undef __FUNCT__
3849 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3850 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3851 {
3852   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3853   PetscFunctionBegin;
3854   *Ad     = a->A;
3855   *Ao     = a->B;
3856   *colmap = a->garray;
3857   PetscFunctionReturn(0);
3858 }
3859 
3860 /*
3861     Special version for direct calls from Fortran (to eliminate two function call overheads
3862 */
3863 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3864 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3865 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3866 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3867 #endif
3868 
3869 #undef __FUNCT__
3870 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3871 /*@C
3872   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3873 
3874   Collective on Mat
3875 
3876   Input Parameters:
3877 + mat - the matrix
3878 . min - number of input rows
3879 . im - input rows
3880 . nin - number of input columns
3881 . in - input columns
3882 . v - numerical values input
3883 - addvin - INSERT_VALUES or ADD_VALUES
3884 
3885   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3886 
3887   Level: advanced
3888 
3889 .seealso:   MatSetValuesBlocked()
3890 @*/
3891 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3892 {
3893   /* convert input arguments to C version */
3894   Mat             mat = *matin;
3895   PetscInt        m = *min, n = *nin;
3896   InsertMode      addv = *addvin;
3897 
3898   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3899   const MatScalar *value;
3900   MatScalar       *barray=baij->barray;
3901   PetscTruth      roworiented = baij->roworiented;
3902   PetscErrorCode  ierr;
3903   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3904   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3905   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3906 
3907   PetscFunctionBegin;
3908   /* tasks normally handled by MatSetValuesBlocked() */
3909   if (mat->insertmode == NOT_SET_VALUES) {
3910     mat->insertmode = addv;
3911   }
3912 #if defined(PETSC_USE_DEBUG)
3913   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3914   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3915 #endif
3916   if (mat->assembled) {
3917     mat->was_assembled = PETSC_TRUE;
3918     mat->assembled     = PETSC_FALSE;
3919   }
3920   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3921 
3922 
3923   if(!barray) {
3924     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
3925     baij->barray = barray;
3926   }
3927 
3928   if (roworiented) {
3929     stepval = (n-1)*bs;
3930   } else {
3931     stepval = (m-1)*bs;
3932   }
3933   for (i=0; i<m; i++) {
3934     if (im[i] < 0) continue;
3935 #if defined(PETSC_USE_DEBUG)
3936     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3937 #endif
3938     if (im[i] >= rstart && im[i] < rend) {
3939       row = im[i] - rstart;
3940       for (j=0; j<n; j++) {
3941         /* If NumCol = 1 then a copy is not required */
3942         if ((roworiented) && (n == 1)) {
3943           barray = (MatScalar*)v + i*bs2;
3944         } else if((!roworiented) && (m == 1)) {
3945           barray = (MatScalar*)v + j*bs2;
3946         } else { /* Here a copy is required */
3947           if (roworiented) {
3948             value = v + i*(stepval+bs)*bs + j*bs;
3949           } else {
3950             value = v + j*(stepval+bs)*bs + i*bs;
3951           }
3952           for (ii=0; ii<bs; ii++,value+=stepval) {
3953             for (jj=0; jj<bs; jj++) {
3954               *barray++  = *value++;
3955             }
3956           }
3957           barray -=bs2;
3958         }
3959 
3960         if (in[j] >= cstart && in[j] < cend){
3961           col  = in[j] - cstart;
3962           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3963         }
3964         else if (in[j] < 0) continue;
3965 #if defined(PETSC_USE_DEBUG)
3966         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3967 #endif
3968         else {
3969           if (mat->was_assembled) {
3970             if (!baij->colmap) {
3971               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3972             }
3973 
3974 #if defined(PETSC_USE_DEBUG)
3975 #if defined (PETSC_USE_CTABLE)
3976             { PetscInt data;
3977               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3978               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3979             }
3980 #else
3981             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3982 #endif
3983 #endif
3984 #if defined (PETSC_USE_CTABLE)
3985 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3986             col  = (col - 1)/bs;
3987 #else
3988             col = (baij->colmap[in[j]] - 1)/bs;
3989 #endif
3990             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3991               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3992               col =  in[j];
3993             }
3994           }
3995           else col = in[j];
3996           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3997         }
3998       }
3999     } else {
4000       if (!baij->donotstash) {
4001         if (roworiented) {
4002           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4003         } else {
4004           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4005         }
4006       }
4007     }
4008   }
4009 
4010   /* task normally handled by MatSetValuesBlocked() */
4011   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
4012   PetscFunctionReturn(0);
4013 }
4014