xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision db609ea75e24d1bf22e421c6099bd64ada4af056)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 #include <petscblaslapack.h>
4 
5 extern PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 extern PetscErrorCode MatDisAssemble_MPIBAIJ(Mat);
7 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
8 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
9 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
10 extern PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
11 extern PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
12 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar,Vec,Vec);
13 
14 #undef __FUNCT__
15 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
16 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
17 {
18   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
19   PetscErrorCode ierr;
20   PetscInt       i,*idxb = 0;
21   PetscScalar    *va,*vb;
22   Vec            vtmp;
23 
24   PetscFunctionBegin;
25   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
26   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
27   if (idx) {
28     for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;}
29   }
30 
31   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
32   if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
33   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
34   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
35 
36   for (i=0; i<A->rmap->n; i++){
37     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);}
38   }
39 
40   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
41   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
42   ierr = PetscFree(idxb);CHKERRQ(ierr);
43   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
44   PetscFunctionReturn(0);
45 }
46 
47 EXTERN_C_BEGIN
48 #undef __FUNCT__
49 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
50 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
51 {
52   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
53   PetscErrorCode ierr;
54 
55   PetscFunctionBegin;
56   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
57   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
58   PetscFunctionReturn(0);
59 }
60 EXTERN_C_END
61 
62 EXTERN_C_BEGIN
63 #undef __FUNCT__
64 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
65 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
66 {
67   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
68   PetscErrorCode ierr;
69 
70   PetscFunctionBegin;
71   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
72   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
73   PetscFunctionReturn(0);
74 }
75 EXTERN_C_END
76 
77 /*
78      Local utility routine that creates a mapping from the global column
79    number to the local number in the off-diagonal part of the local
80    storage of the matrix.  This is done in a non scalable way since the
81    length of colmap equals the global matrix length.
82 */
83 #undef __FUNCT__
84 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private"
85 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
86 {
87   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
88   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
89   PetscErrorCode ierr;
90   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
91 
92   PetscFunctionBegin;
93 #if defined (PETSC_USE_CTABLE)
94   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
95   for (i=0; i<nbs; i++){
96     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
97   }
98 #else
99   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
100   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
101   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
102   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
103 #endif
104   PetscFunctionReturn(0);
105 }
106 
107 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
108 { \
109  \
110     brow = row/bs;  \
111     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
112     rmax = aimax[brow]; nrow = ailen[brow]; \
113       bcol = col/bs; \
114       ridx = row % bs; cidx = col % bs; \
115       low = 0; high = nrow; \
116       while (high-low > 3) { \
117         t = (low+high)/2; \
118         if (rp[t] > bcol) high = t; \
119         else              low  = t; \
120       } \
121       for (_i=low; _i<high; _i++) { \
122         if (rp[_i] > bcol) break; \
123         if (rp[_i] == bcol) { \
124           bap  = ap +  bs2*_i + bs*cidx + ridx; \
125           if (addv == ADD_VALUES) *bap += value;  \
126           else                    *bap  = value;  \
127           goto a_noinsert; \
128         } \
129       } \
130       if (a->nonew == 1) goto a_noinsert; \
131       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
132       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
133       N = nrow++ - 1;  \
134       /* shift up all the later entries in this row */ \
135       for (ii=N; ii>=_i; ii--) { \
136         rp[ii+1] = rp[ii]; \
137         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
138       } \
139       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
140       rp[_i]                      = bcol;  \
141       ap[bs2*_i + bs*cidx + ridx] = value;  \
142       a_noinsert:; \
143     ailen[brow] = nrow; \
144 }
145 
146 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
147 { \
148     brow = row/bs;  \
149     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
150     rmax = bimax[brow]; nrow = bilen[brow]; \
151       bcol = col/bs; \
152       ridx = row % bs; cidx = col % bs; \
153       low = 0; high = nrow; \
154       while (high-low > 3) { \
155         t = (low+high)/2; \
156         if (rp[t] > bcol) high = t; \
157         else              low  = t; \
158       } \
159       for (_i=low; _i<high; _i++) { \
160         if (rp[_i] > bcol) break; \
161         if (rp[_i] == bcol) { \
162           bap  = ap +  bs2*_i + bs*cidx + ridx; \
163           if (addv == ADD_VALUES) *bap += value;  \
164           else                    *bap  = value;  \
165           goto b_noinsert; \
166         } \
167       } \
168       if (b->nonew == 1) goto b_noinsert; \
169       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
170       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
171       CHKMEMQ;\
172       N = nrow++ - 1;  \
173       /* shift up all the later entries in this row */ \
174       for (ii=N; ii>=_i; ii--) { \
175         rp[ii+1] = rp[ii]; \
176         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
177       } \
178       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
179       rp[_i]                      = bcol;  \
180       ap[bs2*_i + bs*cidx + ridx] = value;  \
181       b_noinsert:; \
182     bilen[brow] = nrow; \
183 }
184 
185 #undef __FUNCT__
186 #define __FUNCT__ "MatSetValues_MPIBAIJ"
187 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
188 {
189   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
190   MatScalar      value;
191   PetscBool      roworiented = baij->roworiented;
192   PetscErrorCode ierr;
193   PetscInt       i,j,row,col;
194   PetscInt       rstart_orig=mat->rmap->rstart;
195   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
196   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
197 
198   /* Some Variables required in the macro */
199   Mat            A = baij->A;
200   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
201   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
202   MatScalar      *aa=a->a;
203 
204   Mat            B = baij->B;
205   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
206   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
207   MatScalar      *ba=b->a;
208 
209   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
210   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
211   MatScalar      *ap,*bap;
212 
213   PetscFunctionBegin;
214   if (v) PetscValidScalarPointer(v,6);
215   for (i=0; i<m; i++) {
216     if (im[i] < 0) continue;
217 #if defined(PETSC_USE_DEBUG)
218     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
219 #endif
220     if (im[i] >= rstart_orig && im[i] < rend_orig) {
221       row = im[i] - rstart_orig;
222       for (j=0; j<n; j++) {
223         if (in[j] >= cstart_orig && in[j] < cend_orig){
224           col = in[j] - cstart_orig;
225           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
226           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
227           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
228         } else if (in[j] < 0) continue;
229 #if defined(PETSC_USE_DEBUG)
230         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
231 #endif
232         else {
233           if (mat->was_assembled) {
234             if (!baij->colmap) {
235               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
236             }
237 #if defined (PETSC_USE_CTABLE)
238             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
239             col  = col - 1;
240 #else
241             col = baij->colmap[in[j]/bs] - 1;
242 #endif
243             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
244               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
245               col =  in[j];
246               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
247               B = baij->B;
248               b = (Mat_SeqBAIJ*)(B)->data;
249               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
250               ba=b->a;
251             } else col += in[j]%bs;
252           } else col = in[j];
253           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
254           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
255           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
256         }
257       }
258     } else {
259       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
260       if (!baij->donotstash) {
261         mat->assembled = PETSC_FALSE;
262         if (roworiented) {
263           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
264         } else {
265           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
266         }
267       }
268     }
269   }
270   PetscFunctionReturn(0);
271 }
272 
273 #undef __FUNCT__
274 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
275 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
276 {
277   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
278   const PetscScalar *value;
279   MatScalar         *barray=baij->barray;
280   PetscBool         roworiented = baij->roworiented;
281   PetscErrorCode    ierr;
282   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
283   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
284   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
285 
286   PetscFunctionBegin;
287   if(!barray) {
288     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
289     baij->barray = barray;
290   }
291 
292   if (roworiented) {
293     stepval = (n-1)*bs;
294   } else {
295     stepval = (m-1)*bs;
296   }
297   for (i=0; i<m; i++) {
298     if (im[i] < 0) continue;
299 #if defined(PETSC_USE_DEBUG)
300     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
301 #endif
302     if (im[i] >= rstart && im[i] < rend) {
303       row = im[i] - rstart;
304       for (j=0; j<n; j++) {
305         /* If NumCol = 1 then a copy is not required */
306         if ((roworiented) && (n == 1)) {
307           barray = (MatScalar*)v + i*bs2;
308         } else if((!roworiented) && (m == 1)) {
309           barray = (MatScalar*)v + j*bs2;
310         } else { /* Here a copy is required */
311           if (roworiented) {
312             value = v + (i*(stepval+bs) + j)*bs;
313           } else {
314 	    value = v + (j*(stepval+bs) + i)*bs;
315           }
316           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
317             for (jj=0; jj<bs; jj++) {
318               barray[jj]  = value[jj];
319             }
320             barray += bs;
321           }
322           barray -= bs2;
323         }
324 
325         if (in[j] >= cstart && in[j] < cend){
326           col  = in[j] - cstart;
327           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
328         }
329         else if (in[j] < 0) continue;
330 #if defined(PETSC_USE_DEBUG)
331         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
332 #endif
333         else {
334           if (mat->was_assembled) {
335             if (!baij->colmap) {
336               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
337             }
338 
339 #if defined(PETSC_USE_DEBUG)
340 #if defined (PETSC_USE_CTABLE)
341             { PetscInt data;
342               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
343               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
344             }
345 #else
346             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
347 #endif
348 #endif
349 #if defined (PETSC_USE_CTABLE)
350 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
351             col  = (col - 1)/bs;
352 #else
353             col = (baij->colmap[in[j]] - 1)/bs;
354 #endif
355             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
356               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
357               col =  in[j];
358             }
359           }
360           else col = in[j];
361           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
362         }
363       }
364     } else {
365       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
366       if (!baij->donotstash) {
367         if (roworiented) {
368           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
369         } else {
370           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
371         }
372       }
373     }
374   }
375   PetscFunctionReturn(0);
376 }
377 
378 #define HASH_KEY 0.6180339887
379 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
380 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
381 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
382 #undef __FUNCT__
383 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
384 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
385 {
386   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
387   PetscBool      roworiented = baij->roworiented;
388   PetscErrorCode ierr;
389   PetscInt       i,j,row,col;
390   PetscInt       rstart_orig=mat->rmap->rstart;
391   PetscInt       rend_orig=mat->rmap->rend,Nbs=baij->Nbs;
392   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
393   PetscReal      tmp;
394   MatScalar      **HD = baij->hd,value;
395 #if defined(PETSC_USE_DEBUG)
396   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
397 #endif
398 
399   PetscFunctionBegin;
400   if (v) PetscValidScalarPointer(v,6);
401   for (i=0; i<m; i++) {
402 #if defined(PETSC_USE_DEBUG)
403     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
404     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
405 #endif
406       row = im[i];
407     if (row >= rstart_orig && row < rend_orig) {
408       for (j=0; j<n; j++) {
409         col = in[j];
410         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
411         /* Look up PetscInto the Hash Table */
412         key = (row/bs)*Nbs+(col/bs)+1;
413         h1  = HASH(size,key,tmp);
414 
415 
416         idx = h1;
417 #if defined(PETSC_USE_DEBUG)
418         insert_ct++;
419         total_ct++;
420         if (HT[idx] != key) {
421           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
422           if (idx == size) {
423             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
424             if (idx == h1) {
425               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
426             }
427           }
428         }
429 #else
430         if (HT[idx] != key) {
431           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
432           if (idx == size) {
433             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
434             if (idx == h1) {
435               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
436             }
437           }
438         }
439 #endif
440         /* A HASH table entry is found, so insert the values at the correct address */
441         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
442         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
443       }
444     } else {
445       if (!baij->donotstash) {
446         if (roworiented) {
447           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
448         } else {
449           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
450         }
451       }
452     }
453   }
454 #if defined(PETSC_USE_DEBUG)
455   baij->ht_total_ct = total_ct;
456   baij->ht_insert_ct = insert_ct;
457 #endif
458   PetscFunctionReturn(0);
459 }
460 
461 #undef __FUNCT__
462 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
463 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
464 {
465   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
466   PetscBool         roworiented = baij->roworiented;
467   PetscErrorCode    ierr;
468   PetscInt          i,j,ii,jj,row,col;
469   PetscInt          rstart=baij->rstartbs;
470   PetscInt          rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
471   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
472   PetscReal         tmp;
473   MatScalar         **HD = baij->hd,*baij_a;
474   const PetscScalar *v_t,*value;
475 #if defined(PETSC_USE_DEBUG)
476   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
477 #endif
478 
479   PetscFunctionBegin;
480 
481   if (roworiented) {
482     stepval = (n-1)*bs;
483   } else {
484     stepval = (m-1)*bs;
485   }
486   for (i=0; i<m; i++) {
487 #if defined(PETSC_USE_DEBUG)
488     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
489     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
490 #endif
491     row   = im[i];
492     v_t   = v + i*nbs2;
493     if (row >= rstart && row < rend) {
494       for (j=0; j<n; j++) {
495         col = in[j];
496 
497         /* Look up into the Hash Table */
498         key = row*Nbs+col+1;
499         h1  = HASH(size,key,tmp);
500 
501         idx = h1;
502 #if defined(PETSC_USE_DEBUG)
503         total_ct++;
504         insert_ct++;
505        if (HT[idx] != key) {
506           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
507           if (idx == size) {
508             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
509             if (idx == h1) {
510               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
511             }
512           }
513         }
514 #else
515         if (HT[idx] != key) {
516           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
517           if (idx == size) {
518             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
519             if (idx == h1) {
520               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
521             }
522           }
523         }
524 #endif
525         baij_a = HD[idx];
526         if (roworiented) {
527           /*value = v + i*(stepval+bs)*bs + j*bs;*/
528           /* value = v + (i*(stepval+bs)+j)*bs; */
529           value = v_t;
530           v_t  += bs;
531           if (addv == ADD_VALUES) {
532             for (ii=0; ii<bs; ii++,value+=stepval) {
533               for (jj=ii; jj<bs2; jj+=bs) {
534                 baij_a[jj]  += *value++;
535               }
536             }
537           } else {
538             for (ii=0; ii<bs; ii++,value+=stepval) {
539               for (jj=ii; jj<bs2; jj+=bs) {
540                 baij_a[jj]  = *value++;
541               }
542             }
543           }
544         } else {
545           value = v + j*(stepval+bs)*bs + i*bs;
546           if (addv == ADD_VALUES) {
547             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
548               for (jj=0; jj<bs; jj++) {
549                 baij_a[jj]  += *value++;
550               }
551             }
552           } else {
553             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
554               for (jj=0; jj<bs; jj++) {
555                 baij_a[jj]  = *value++;
556               }
557             }
558           }
559         }
560       }
561     } else {
562       if (!baij->donotstash) {
563         if (roworiented) {
564           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
565         } else {
566           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
567         }
568       }
569     }
570   }
571 #if defined(PETSC_USE_DEBUG)
572   baij->ht_total_ct = total_ct;
573   baij->ht_insert_ct = insert_ct;
574 #endif
575   PetscFunctionReturn(0);
576 }
577 
578 #undef __FUNCT__
579 #define __FUNCT__ "MatGetValues_MPIBAIJ"
580 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
581 {
582   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
583   PetscErrorCode ierr;
584   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
585   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
586 
587   PetscFunctionBegin;
588   for (i=0; i<m; i++) {
589     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
590     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
591     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
592       row = idxm[i] - bsrstart;
593       for (j=0; j<n; j++) {
594         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
595         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
596         if (idxn[j] >= bscstart && idxn[j] < bscend){
597           col = idxn[j] - bscstart;
598           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
599         } else {
600           if (!baij->colmap) {
601             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
602           }
603 #if defined (PETSC_USE_CTABLE)
604           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
605           data --;
606 #else
607           data = baij->colmap[idxn[j]/bs]-1;
608 #endif
609           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
610           else {
611             col  = data + idxn[j]%bs;
612             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
613           }
614         }
615       }
616     } else {
617       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
618     }
619   }
620  PetscFunctionReturn(0);
621 }
622 
623 #undef __FUNCT__
624 #define __FUNCT__ "MatNorm_MPIBAIJ"
625 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
626 {
627   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
628   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
629   PetscErrorCode ierr;
630   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
631   PetscReal      sum = 0.0;
632   MatScalar      *v;
633 
634   PetscFunctionBegin;
635   if (baij->size == 1) {
636     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
637   } else {
638     if (type == NORM_FROBENIUS) {
639       v = amat->a;
640       nz = amat->nz*bs2;
641       for (i=0; i<nz; i++) {
642 #if defined(PETSC_USE_COMPLEX)
643         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
644 #else
645         sum += (*v)*(*v); v++;
646 #endif
647       }
648       v = bmat->a;
649       nz = bmat->nz*bs2;
650       for (i=0; i<nz; i++) {
651 #if defined(PETSC_USE_COMPLEX)
652         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
653 #else
654         sum += (*v)*(*v); v++;
655 #endif
656       }
657       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
658       *nrm = PetscSqrtReal(*nrm);
659     } else if (type == NORM_1) { /* max column sum */
660       PetscReal *tmp,*tmp2;
661       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
662       ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
663       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
664       v = amat->a; jj = amat->j;
665       for (i=0; i<amat->nz; i++) {
666         for (j=0; j<bs; j++){
667           col = bs*(cstart + *jj) + j; /* column index */
668           for (row=0; row<bs; row++){
669             tmp[col] += PetscAbsScalar(*v);  v++;
670           }
671         }
672         jj++;
673       }
674       v = bmat->a; jj = bmat->j;
675       for (i=0; i<bmat->nz; i++) {
676         for (j=0; j<bs; j++){
677           col = bs*garray[*jj] + j;
678           for (row=0; row<bs; row++){
679             tmp[col] += PetscAbsScalar(*v); v++;
680           }
681         }
682         jj++;
683       }
684       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
685       *nrm = 0.0;
686       for (j=0; j<mat->cmap->N; j++) {
687         if (tmp2[j] > *nrm) *nrm = tmp2[j];
688       }
689       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
690     } else if (type == NORM_INFINITY) { /* max row sum */
691       PetscReal *sums;
692       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr);
693       sum = 0.0;
694       for (j=0; j<amat->mbs; j++) {
695         for (row=0; row<bs; row++) sums[row] = 0.0;
696         v = amat->a + bs2*amat->i[j];
697         nz = amat->i[j+1]-amat->i[j];
698         for (i=0; i<nz; i++) {
699           for (col=0; col<bs; col++){
700             for (row=0; row<bs; row++){
701               sums[row] += PetscAbsScalar(*v); v++;
702             }
703           }
704         }
705         v = bmat->a + bs2*bmat->i[j];
706         nz = bmat->i[j+1]-bmat->i[j];
707         for (i=0; i<nz; i++) {
708           for (col=0; col<bs; col++){
709             for (row=0; row<bs; row++){
710               sums[row] += PetscAbsScalar(*v); v++;
711             }
712           }
713         }
714         for (row=0; row<bs; row++){
715           if (sums[row] > sum) sum = sums[row];
716         }
717       }
718       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
719       ierr = PetscFree(sums);CHKERRQ(ierr);
720     } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet");
721   }
722   PetscFunctionReturn(0);
723 }
724 
725 /*
726   Creates the hash table, and sets the table
727   This table is created only once.
728   If new entried need to be added to the matrix
729   then the hash table has to be destroyed and
730   recreated.
731 */
732 #undef __FUNCT__
733 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
734 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
735 {
736   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
737   Mat            A = baij->A,B=baij->B;
738   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
739   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
740   PetscErrorCode ierr;
741   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
742   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
743   PetscInt       *HT,key;
744   MatScalar      **HD;
745   PetscReal      tmp;
746 #if defined(PETSC_USE_INFO)
747   PetscInt       ct=0,max=0;
748 #endif
749 
750   PetscFunctionBegin;
751   if (baij->ht) PetscFunctionReturn(0);
752 
753   baij->ht_size = (PetscInt)(factor*nz);
754   ht_size       = baij->ht_size;
755 
756   /* Allocate Memory for Hash Table */
757   ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr);
758   ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr);
759   ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr);
760   HD   = baij->hd;
761   HT   = baij->ht;
762 
763   /* Loop Over A */
764   for (i=0; i<a->mbs; i++) {
765     for (j=ai[i]; j<ai[i+1]; j++) {
766       row = i+rstart;
767       col = aj[j]+cstart;
768 
769       key = row*Nbs + col + 1;
770       h1  = HASH(ht_size,key,tmp);
771       for (k=0; k<ht_size; k++){
772         if (!HT[(h1+k)%ht_size]) {
773           HT[(h1+k)%ht_size] = key;
774           HD[(h1+k)%ht_size] = a->a + j*bs2;
775           break;
776 #if defined(PETSC_USE_INFO)
777         } else {
778           ct++;
779 #endif
780         }
781       }
782 #if defined(PETSC_USE_INFO)
783       if (k> max) max = k;
784 #endif
785     }
786   }
787   /* Loop Over B */
788   for (i=0; i<b->mbs; i++) {
789     for (j=bi[i]; j<bi[i+1]; j++) {
790       row = i+rstart;
791       col = garray[bj[j]];
792       key = row*Nbs + col + 1;
793       h1  = HASH(ht_size,key,tmp);
794       for (k=0; k<ht_size; k++){
795         if (!HT[(h1+k)%ht_size]) {
796           HT[(h1+k)%ht_size] = key;
797           HD[(h1+k)%ht_size] = b->a + j*bs2;
798           break;
799 #if defined(PETSC_USE_INFO)
800         } else {
801           ct++;
802 #endif
803         }
804       }
805 #if defined(PETSC_USE_INFO)
806       if (k> max) max = k;
807 #endif
808     }
809   }
810 
811   /* Print Summary */
812 #if defined(PETSC_USE_INFO)
813   for (i=0,j=0; i<ht_size; i++) {
814     if (HT[i]) {j++;}
815   }
816   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
817 #endif
818   PetscFunctionReturn(0);
819 }
820 
821 #undef __FUNCT__
822 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
823 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
824 {
825   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
826   PetscErrorCode ierr;
827   PetscInt       nstash,reallocs;
828   InsertMode     addv;
829 
830   PetscFunctionBegin;
831   if (baij->donotstash || mat->nooffprocentries) {
832     PetscFunctionReturn(0);
833   }
834 
835   /* make sure all processors are either in INSERTMODE or ADDMODE */
836   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
837   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
838   mat->insertmode = addv; /* in case this processor had no cache */
839 
840   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
841   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
842   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
843   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
844   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
845   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
846   PetscFunctionReturn(0);
847 }
848 
849 #undef __FUNCT__
850 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
851 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
852 {
853   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
854   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
855   PetscErrorCode ierr;
856   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
857   PetscInt       *row,*col;
858   PetscBool      r1,r2,r3,other_disassembled;
859   MatScalar      *val;
860   InsertMode     addv = mat->insertmode;
861   PetscMPIInt    n;
862 
863   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
864   PetscFunctionBegin;
865   if (!baij->donotstash && !mat->nooffprocentries) {
866     while (1) {
867       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
868       if (!flg) break;
869 
870       for (i=0; i<n;) {
871         /* Now identify the consecutive vals belonging to the same row */
872         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
873         if (j < n) ncols = j-i;
874         else       ncols = n-i;
875         /* Now assemble all these values with a single function call */
876         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
877         i = j;
878       }
879     }
880     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
881     /* Now process the block-stash. Since the values are stashed column-oriented,
882        set the roworiented flag to column oriented, and after MatSetValues()
883        restore the original flags */
884     r1 = baij->roworiented;
885     r2 = a->roworiented;
886     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
887     baij->roworiented = PETSC_FALSE;
888     a->roworiented    = PETSC_FALSE;
889     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
890     while (1) {
891       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
892       if (!flg) break;
893 
894       for (i=0; i<n;) {
895         /* Now identify the consecutive vals belonging to the same row */
896         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
897         if (j < n) ncols = j-i;
898         else       ncols = n-i;
899         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
900         i = j;
901       }
902     }
903     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
904     baij->roworiented = r1;
905     a->roworiented    = r2;
906     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
907   }
908 
909   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
910   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
911 
912   /* determine if any processor has disassembled, if so we must
913      also disassemble ourselfs, in order that we may reassemble. */
914   /*
915      if nonzero structure of submatrix B cannot change then we know that
916      no processor disassembled thus we can skip this stuff
917   */
918   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
919     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
920     if (mat->was_assembled && !other_disassembled) {
921       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
922     }
923   }
924 
925   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
926     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
927   }
928   ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_FALSE);CHKERRQ(ierr);
929   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
930   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
931 
932 #if defined(PETSC_USE_INFO)
933   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
934     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
935     baij->ht_total_ct  = 0;
936     baij->ht_insert_ct = 0;
937   }
938 #endif
939   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
940     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
941     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
942     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
943   }
944 
945   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
946   baij->rowvalues = 0;
947   PetscFunctionReturn(0);
948 }
949 
950 #undef __FUNCT__
951 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
952 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
953 {
954   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
955   PetscErrorCode    ierr;
956   PetscMPIInt       size = baij->size,rank = baij->rank;
957   PetscInt          bs = mat->rmap->bs;
958   PetscBool         iascii,isdraw;
959   PetscViewer       sviewer;
960   PetscViewerFormat format;
961 
962   PetscFunctionBegin;
963   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
964   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
965   if (iascii) {
966     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
967     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
968       MatInfo info;
969       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
970       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
971       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
972       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
973              rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
974       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
975       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
976       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
977       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
978       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
979       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
980       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
981       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
982       PetscFunctionReturn(0);
983     } else if (format == PETSC_VIEWER_ASCII_INFO) {
984       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
985       PetscFunctionReturn(0);
986     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
987       PetscFunctionReturn(0);
988     }
989   }
990 
991   if (isdraw) {
992     PetscDraw       draw;
993     PetscBool  isnull;
994     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
995     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
996   }
997 
998   if (size == 1) {
999     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1000     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1001   } else {
1002     /* assemble the entire matrix onto first processor. */
1003     Mat         A;
1004     Mat_SeqBAIJ *Aloc;
1005     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1006     MatScalar   *a;
1007 
1008     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1009     /* Perhaps this should be the type of mat? */
1010     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1011     if (!rank) {
1012       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1013     } else {
1014       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1015     }
1016     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1017     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1018     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
1019     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1020 
1021     /* copy over the A part */
1022     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1023     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1024     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1025 
1026     for (i=0; i<mbs; i++) {
1027       rvals[0] = bs*(baij->rstartbs + i);
1028       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1029       for (j=ai[i]; j<ai[i+1]; j++) {
1030         col = (baij->cstartbs+aj[j])*bs;
1031         for (k=0; k<bs; k++) {
1032           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1033           col++; a += bs;
1034         }
1035       }
1036     }
1037     /* copy over the B part */
1038     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1039     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1040     for (i=0; i<mbs; i++) {
1041       rvals[0] = bs*(baij->rstartbs + i);
1042       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1043       for (j=ai[i]; j<ai[i+1]; j++) {
1044         col = baij->garray[aj[j]]*bs;
1045         for (k=0; k<bs; k++) {
1046           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1047           col++; a += bs;
1048         }
1049       }
1050     }
1051     ierr = PetscFree(rvals);CHKERRQ(ierr);
1052     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1053     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1054     /*
1055        Everyone has to call to draw the matrix since the graphics waits are
1056        synchronized across all processors that share the PetscDraw object
1057     */
1058     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1059     if (!rank) {
1060       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1061     /* Set the type name to MATMPIBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqBAIJ_ASCII()*/
1062       PetscStrcpy(((PetscObject)((Mat_MPIBAIJ*)(A->data))->A)->type_name,MATMPIBAIJ);
1063       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1064     }
1065     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1066     ierr = MatDestroy(&A);CHKERRQ(ierr);
1067   }
1068   PetscFunctionReturn(0);
1069 }
1070 
1071 #undef __FUNCT__
1072 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1073 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1074 {
1075   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1076   Mat_SeqBAIJ*   A = (Mat_SeqBAIJ*)a->A->data;
1077   Mat_SeqBAIJ*   B = (Mat_SeqBAIJ*)a->B->data;
1078   PetscErrorCode ierr;
1079   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1080   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1081   int            fd;
1082   PetscScalar    *column_values;
1083   FILE           *file;
1084   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1085   PetscInt       message_count,flowcontrolcount;
1086 
1087   PetscFunctionBegin;
1088   ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
1089   ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
1090   nz   = bs2*(A->nz + B->nz);
1091   rlen = mat->rmap->n;
1092   if (!rank) {
1093     header[0] = MAT_FILE_CLASSID;
1094     header[1] = mat->rmap->N;
1095     header[2] = mat->cmap->N;
1096     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1097     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1098     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1099     /* get largest number of rows any processor has */
1100     range = mat->rmap->range;
1101     for (i=1; i<size; i++) {
1102       rlen = PetscMax(rlen,range[i+1] - range[i]);
1103     }
1104   } else {
1105     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1106   }
1107 
1108   ierr  = PetscMalloc((rlen/bs)*sizeof(PetscInt),&crow_lens);CHKERRQ(ierr);
1109   /* compute lengths of each row  */
1110   for (i=0; i<a->mbs; i++) {
1111     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1112   }
1113   /* store the row lengths to the file */
1114   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1115   if (!rank) {
1116     MPI_Status status;
1117     ierr  = PetscMalloc(rlen*sizeof(PetscInt),&row_lens);CHKERRQ(ierr);
1118     rlen  = (range[1] - range[0])/bs;
1119     for (i=0; i<rlen; i++) {
1120       for (j=0; j<bs; j++) {
1121         row_lens[i*bs+j] = bs*crow_lens[i];
1122       }
1123     }
1124     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1125     for (i=1; i<size; i++) {
1126       rlen = (range[i+1] - range[i])/bs;
1127       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1128       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1129       for (k=0; k<rlen; k++) {
1130 	for (j=0; j<bs; j++) {
1131 	  row_lens[k*bs+j] = bs*crow_lens[k];
1132 	}
1133       }
1134       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1135     }
1136     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1137     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1138   } else {
1139     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1140     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1141     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1142   }
1143   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1144 
1145   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1146      information needed to make it for each row from a block row. This does require more communication but still not more than
1147      the communication needed for the nonzero values  */
1148   nzmax = nz; /*  space a largest processor needs */
1149   ierr = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,((PetscObject)mat)->comm);CHKERRQ(ierr);
1150   ierr = PetscMalloc(nzmax*sizeof(PetscInt),&column_indices);CHKERRQ(ierr);
1151   cnt  = 0;
1152   for (i=0; i<a->mbs; i++) {
1153     pcnt = cnt;
1154     for (j=B->i[i]; j<B->i[i+1]; j++) {
1155       if ( (col = garray[B->j[j]]) > cstart) break;
1156       for (l=0; l<bs; l++) {
1157 	column_indices[cnt++] = bs*col+l;
1158       }
1159     }
1160     for (k=A->i[i]; k<A->i[i+1]; k++) {
1161       for (l=0; l<bs; l++) {
1162         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1163       }
1164     }
1165     for (; j<B->i[i+1]; j++) {
1166       for (l=0; l<bs; l++) {
1167         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1168       }
1169     }
1170     len = cnt - pcnt;
1171     for (k=1; k<bs; k++) {
1172       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1173       cnt += len;
1174     }
1175   }
1176   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1177 
1178   /* store the columns to the file */
1179   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1180   if (!rank) {
1181     MPI_Status status;
1182     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1183     for (i=1; i<size; i++) {
1184       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1185       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1186       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1187       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1188     }
1189     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1190   } else {
1191     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1192     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1193     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1194     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1195   }
1196   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1197 
1198   /* load up the numerical values */
1199   ierr = PetscMalloc(nzmax*sizeof(PetscScalar),&column_values);CHKERRQ(ierr);
1200   cnt = 0;
1201   for (i=0; i<a->mbs; i++) {
1202     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1203     for (j=B->i[i]; j<B->i[i+1]; j++) {
1204       if ( garray[B->j[j]] > cstart) break;
1205       for (l=0; l<bs; l++) {
1206         for (ll=0; ll<bs; ll++) {
1207 	  column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1208         }
1209       }
1210       cnt += bs;
1211     }
1212     for (k=A->i[i]; k<A->i[i+1]; k++) {
1213       for (l=0; l<bs; l++) {
1214         for (ll=0; ll<bs; ll++) {
1215           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1216         }
1217       }
1218       cnt += bs;
1219     }
1220     for (; j<B->i[i+1]; j++) {
1221       for (l=0; l<bs; l++) {
1222         for (ll=0; ll<bs; ll++) {
1223 	  column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1224         }
1225       }
1226       cnt += bs;
1227     }
1228     cnt += (bs-1)*rlen;
1229   }
1230   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1231 
1232   /* store the column values to the file */
1233   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1234   if (!rank) {
1235     MPI_Status status;
1236     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1237     for (i=1; i<size; i++) {
1238       ierr = PetscViewerFlowControlStepMaster(viewer,i,message_count,flowcontrolcount);CHKERRQ(ierr);
1239       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1240       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,((PetscObject)mat)->comm,&status);CHKERRQ(ierr);
1241       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1242     }
1243     ierr = PetscViewerFlowControlEndMaster(viewer,message_count);CHKERRQ(ierr);
1244   } else {
1245     ierr = PetscViewerFlowControlStepWorker(viewer,rank,message_count);CHKERRQ(ierr);
1246     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1247     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,((PetscObject)mat)->comm);CHKERRQ(ierr);
1248     ierr = PetscViewerFlowControlEndWorker(viewer,message_count);CHKERRQ(ierr);
1249   }
1250   ierr = PetscFree(column_values);CHKERRQ(ierr);
1251 
1252   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1253   if (file) {
1254     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1255   }
1256   PetscFunctionReturn(0);
1257 }
1258 
1259 #undef __FUNCT__
1260 #define __FUNCT__ "MatView_MPIBAIJ"
1261 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1262 {
1263   PetscErrorCode ierr;
1264   PetscBool      iascii,isdraw,issocket,isbinary;
1265 
1266   PetscFunctionBegin;
1267   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1268   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1269   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1270   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1271   if (iascii || isdraw || issocket) {
1272     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1273   } else if (isbinary) {
1274     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1275   } else {
1276     SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1277   }
1278   PetscFunctionReturn(0);
1279 }
1280 
1281 #undef __FUNCT__
1282 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1283 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1284 {
1285   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1286   PetscErrorCode ierr;
1287 
1288   PetscFunctionBegin;
1289 #if defined(PETSC_USE_LOG)
1290   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1291 #endif
1292   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1293   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1294   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1295   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1296 #if defined (PETSC_USE_CTABLE)
1297   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1298 #else
1299   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1300 #endif
1301   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1302   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1303   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1304   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1305   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1306   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1307   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1308   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1309 
1310   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1311   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1312   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1313   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1314   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1315   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1316   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1317   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1318   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1319   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C","",PETSC_NULL);CHKERRQ(ierr);
1320   PetscFunctionReturn(0);
1321 }
1322 
1323 #undef __FUNCT__
1324 #define __FUNCT__ "MatMult_MPIBAIJ"
1325 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1326 {
1327   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1328   PetscErrorCode ierr;
1329   PetscInt       nt;
1330 
1331   PetscFunctionBegin;
1332   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1333   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1334   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1335   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1336   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1337   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1338   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1339   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1340   PetscFunctionReturn(0);
1341 }
1342 
1343 #undef __FUNCT__
1344 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1345 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1346 {
1347   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1348   PetscErrorCode ierr;
1349 
1350   PetscFunctionBegin;
1351   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1352   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1353   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1354   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1355   PetscFunctionReturn(0);
1356 }
1357 
1358 #undef __FUNCT__
1359 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1360 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1361 {
1362   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1363   PetscErrorCode ierr;
1364   PetscBool      merged;
1365 
1366   PetscFunctionBegin;
1367   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1368   /* do nondiagonal part */
1369   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1370   if (!merged) {
1371     /* send it on its way */
1372     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1373     /* do local part */
1374     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1375     /* receive remote parts: note this assumes the values are not actually */
1376     /* inserted in yy until the next line */
1377     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1378   } else {
1379     /* do local part */
1380     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1381     /* send it on its way */
1382     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1383     /* values actually were received in the Begin() but we need to call this nop */
1384     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1385   }
1386   PetscFunctionReturn(0);
1387 }
1388 
1389 #undef __FUNCT__
1390 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1391 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1392 {
1393   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1394   PetscErrorCode ierr;
1395 
1396   PetscFunctionBegin;
1397   /* do nondiagonal part */
1398   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1399   /* send it on its way */
1400   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1401   /* do local part */
1402   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1403   /* receive remote parts: note this assumes the values are not actually */
1404   /* inserted in yy until the next line, which is true for my implementation*/
1405   /* but is not perhaps always true. */
1406   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1407   PetscFunctionReturn(0);
1408 }
1409 
1410 /*
1411   This only works correctly for square matrices where the subblock A->A is the
1412    diagonal block
1413 */
1414 #undef __FUNCT__
1415 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1416 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1417 {
1418   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1419   PetscErrorCode ierr;
1420 
1421   PetscFunctionBegin;
1422   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1423   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1424   PetscFunctionReturn(0);
1425 }
1426 
1427 #undef __FUNCT__
1428 #define __FUNCT__ "MatScale_MPIBAIJ"
1429 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1430 {
1431   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1432   PetscErrorCode ierr;
1433 
1434   PetscFunctionBegin;
1435   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1436   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1437   PetscFunctionReturn(0);
1438 }
1439 
1440 #undef __FUNCT__
1441 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1442 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1443 {
1444   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1445   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1446   PetscErrorCode ierr;
1447   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1448   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1449   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1450 
1451   PetscFunctionBegin;
1452   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1453   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1454   mat->getrowactive = PETSC_TRUE;
1455 
1456   if (!mat->rowvalues && (idx || v)) {
1457     /*
1458         allocate enough space to hold information from the longest row.
1459     */
1460     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1461     PetscInt     max = 1,mbs = mat->mbs,tmp;
1462     for (i=0; i<mbs; i++) {
1463       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1464       if (max < tmp) { max = tmp; }
1465     }
1466     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1467   }
1468   lrow = row - brstart;
1469 
1470   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1471   if (!v)   {pvA = 0; pvB = 0;}
1472   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1473   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1474   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1475   nztot = nzA + nzB;
1476 
1477   cmap  = mat->garray;
1478   if (v  || idx) {
1479     if (nztot) {
1480       /* Sort by increasing column numbers, assuming A and B already sorted */
1481       PetscInt imark = -1;
1482       if (v) {
1483         *v = v_p = mat->rowvalues;
1484         for (i=0; i<nzB; i++) {
1485           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1486           else break;
1487         }
1488         imark = i;
1489         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1490         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1491       }
1492       if (idx) {
1493         *idx = idx_p = mat->rowindices;
1494         if (imark > -1) {
1495           for (i=0; i<imark; i++) {
1496             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1497           }
1498         } else {
1499           for (i=0; i<nzB; i++) {
1500             if (cmap[cworkB[i]/bs] < cstart)
1501               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1502             else break;
1503           }
1504           imark = i;
1505         }
1506         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1507         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1508       }
1509     } else {
1510       if (idx) *idx = 0;
1511       if (v)   *v   = 0;
1512     }
1513   }
1514   *nz = nztot;
1515   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1516   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1517   PetscFunctionReturn(0);
1518 }
1519 
1520 #undef __FUNCT__
1521 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1522 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1523 {
1524   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1525 
1526   PetscFunctionBegin;
1527   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1528   baij->getrowactive = PETSC_FALSE;
1529   PetscFunctionReturn(0);
1530 }
1531 
1532 #undef __FUNCT__
1533 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1534 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1535 {
1536   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1537   PetscErrorCode ierr;
1538 
1539   PetscFunctionBegin;
1540   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1541   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1542   PetscFunctionReturn(0);
1543 }
1544 
1545 #undef __FUNCT__
1546 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1547 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1548 {
1549   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1550   Mat            A = a->A,B = a->B;
1551   PetscErrorCode ierr;
1552   PetscReal      isend[5],irecv[5];
1553 
1554   PetscFunctionBegin;
1555   info->block_size     = (PetscReal)matin->rmap->bs;
1556   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1557   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1558   isend[3] = info->memory;  isend[4] = info->mallocs;
1559   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1560   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1561   isend[3] += info->memory;  isend[4] += info->mallocs;
1562   if (flag == MAT_LOCAL) {
1563     info->nz_used      = isend[0];
1564     info->nz_allocated = isend[1];
1565     info->nz_unneeded  = isend[2];
1566     info->memory       = isend[3];
1567     info->mallocs      = isend[4];
1568   } else if (flag == MAT_GLOBAL_MAX) {
1569     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1570     info->nz_used      = irecv[0];
1571     info->nz_allocated = irecv[1];
1572     info->nz_unneeded  = irecv[2];
1573     info->memory       = irecv[3];
1574     info->mallocs      = irecv[4];
1575   } else if (flag == MAT_GLOBAL_SUM) {
1576     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1577     info->nz_used      = irecv[0];
1578     info->nz_allocated = irecv[1];
1579     info->nz_unneeded  = irecv[2];
1580     info->memory       = irecv[3];
1581     info->mallocs      = irecv[4];
1582   } else {
1583     SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1584   }
1585   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1586   info->fill_ratio_needed = 0;
1587   info->factor_mallocs    = 0;
1588   PetscFunctionReturn(0);
1589 }
1590 
1591 #undef __FUNCT__
1592 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1593 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool  flg)
1594 {
1595   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1596   PetscErrorCode ierr;
1597 
1598   PetscFunctionBegin;
1599   switch (op) {
1600   case MAT_NEW_NONZERO_LOCATIONS:
1601   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1602   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1603   case MAT_KEEP_NONZERO_PATTERN:
1604   case MAT_NEW_NONZERO_LOCATION_ERR:
1605     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1606     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1607     break;
1608   case MAT_ROW_ORIENTED:
1609     a->roworiented = flg;
1610     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1611     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1612     break;
1613   case MAT_NEW_DIAGONALS:
1614     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1615     break;
1616   case MAT_IGNORE_OFF_PROC_ENTRIES:
1617     a->donotstash = flg;
1618     break;
1619   case MAT_USE_HASH_TABLE:
1620     a->ht_flag = flg;
1621     break;
1622   case MAT_SYMMETRIC:
1623   case MAT_STRUCTURALLY_SYMMETRIC:
1624   case MAT_HERMITIAN:
1625   case MAT_SYMMETRY_ETERNAL:
1626     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1627     break;
1628   default:
1629     SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op);
1630   }
1631   PetscFunctionReturn(0);
1632 }
1633 
1634 #undef __FUNCT__
1635 #define __FUNCT__ "MatTranspose_MPIBAIJ"
1636 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1637 {
1638   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1639   Mat_SeqBAIJ    *Aloc;
1640   Mat            B;
1641   PetscErrorCode ierr;
1642   PetscInt       M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1643   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1644   MatScalar      *a;
1645 
1646   PetscFunctionBegin;
1647   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1648   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1649     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1650     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1651     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1652     /* Do not know preallocation information, but must set block size */
1653     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,PETSC_NULL,PETSC_DECIDE,PETSC_NULL);CHKERRQ(ierr);
1654   } else {
1655     B = *matout;
1656   }
1657 
1658   /* copy over the A part */
1659   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1660   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1661   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1662 
1663   for (i=0; i<mbs; i++) {
1664     rvals[0] = bs*(baij->rstartbs + i);
1665     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1666     for (j=ai[i]; j<ai[i+1]; j++) {
1667       col = (baij->cstartbs+aj[j])*bs;
1668       for (k=0; k<bs; k++) {
1669         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1670         col++; a += bs;
1671       }
1672     }
1673   }
1674   /* copy over the B part */
1675   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1676   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1677   for (i=0; i<mbs; i++) {
1678     rvals[0] = bs*(baij->rstartbs + i);
1679     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1680     for (j=ai[i]; j<ai[i+1]; j++) {
1681       col = baij->garray[aj[j]]*bs;
1682       for (k=0; k<bs; k++) {
1683         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1684         col++; a += bs;
1685       }
1686     }
1687   }
1688   ierr = PetscFree(rvals);CHKERRQ(ierr);
1689   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1690   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1691 
1692   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1693     *matout = B;
1694   } else {
1695     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1696   }
1697   PetscFunctionReturn(0);
1698 }
1699 
1700 #undef __FUNCT__
1701 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1702 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1703 {
1704   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1705   Mat            a = baij->A,b = baij->B;
1706   PetscErrorCode ierr;
1707   PetscInt       s1,s2,s3;
1708 
1709   PetscFunctionBegin;
1710   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1711   if (rr) {
1712     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1713     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1714     /* Overlap communication with computation. */
1715     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1716   }
1717   if (ll) {
1718     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1719     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1720     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1721   }
1722   /* scale  the diagonal block */
1723   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1724 
1725   if (rr) {
1726     /* Do a scatter end and then right scale the off-diagonal block */
1727     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1728     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1729   }
1730 
1731   PetscFunctionReturn(0);
1732 }
1733 
1734 #undef __FUNCT__
1735 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1736 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1737 {
1738   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1739   PetscErrorCode    ierr;
1740   PetscMPIInt       imdex,size = l->size,n,rank = l->rank;
1741   PetscInt          i,*owners = A->rmap->range;
1742   PetscInt          *nprocs,j,idx,nsends,row;
1743   PetscInt          nmax,*svalues,*starts,*owner,nrecvs;
1744   PetscInt          *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1745   PetscInt          *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1746   MPI_Comm          comm = ((PetscObject)A)->comm;
1747   MPI_Request       *send_waits,*recv_waits;
1748   MPI_Status        recv_status,*send_status;
1749   const PetscScalar *xx;
1750   PetscScalar       *bb;
1751 #if defined(PETSC_DEBUG)
1752   PetscBool         found = PETSC_FALSE;
1753 #endif
1754 
1755   PetscFunctionBegin;
1756   /*  first count number of contributors to each processor */
1757   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1758   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1759   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1760   j = 0;
1761   for (i=0; i<N; i++) {
1762     if (lastidx > (idx = rows[i])) j = 0;
1763     lastidx = idx;
1764     for (; j<size; j++) {
1765       if (idx >= owners[j] && idx < owners[j+1]) {
1766         nprocs[2*j]++;
1767         nprocs[2*j+1] = 1;
1768         owner[i] = j;
1769 #if defined(PETSC_DEBUG)
1770         found = PETSC_TRUE;
1771 #endif
1772         break;
1773       }
1774     }
1775 #if defined(PETSC_DEBUG)
1776     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1777     found = PETSC_FALSE;
1778 #endif
1779   }
1780   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1781 
1782   if (A->nooffproczerorows) {
1783     if (nsends > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"You called MatSetOption(,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE) but set an off process zero row");
1784     nrecvs = nsends;
1785     nmax   = N;
1786   } else {
1787     /* inform other processors of number of messages and max length*/
1788     ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1789   }
1790 
1791   /* post receives:   */
1792   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1793   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1794   for (i=0; i<nrecvs; i++) {
1795     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1796   }
1797 
1798   /* do sends:
1799      1) starts[i] gives the starting index in svalues for stuff going to
1800      the ith processor
1801   */
1802   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1803   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1804   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1805   starts[0]  = 0;
1806   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1807   for (i=0; i<N; i++) {
1808     svalues[starts[owner[i]]++] = rows[i];
1809   }
1810 
1811   starts[0] = 0;
1812   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1813   count = 0;
1814   for (i=0; i<size; i++) {
1815     if (nprocs[2*i+1]) {
1816       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1817     }
1818   }
1819   ierr = PetscFree(starts);CHKERRQ(ierr);
1820 
1821   base = owners[rank];
1822 
1823   /*  wait on receives */
1824   ierr   = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr);
1825   count  = nrecvs;
1826   slen = 0;
1827   while (count) {
1828     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1829     /* unpack receives into our local space */
1830     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1831     source[imdex]  = recv_status.MPI_SOURCE;
1832     lens[imdex]    = n;
1833     slen          += n;
1834     count--;
1835   }
1836   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1837 
1838   /* move the data into the send scatter */
1839   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1840   count = 0;
1841   for (i=0; i<nrecvs; i++) {
1842     values = rvalues + i*nmax;
1843     for (j=0; j<lens[i]; j++) {
1844       lrows[count++] = values[j] - base;
1845     }
1846   }
1847   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1848   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1849   ierr = PetscFree(owner);CHKERRQ(ierr);
1850   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1851 
1852   /* fix right hand side if needed */
1853   if (x && b) {
1854     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1855     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1856     for (i=0; i<slen; i++) {
1857       bb[lrows[i]] = diag*xx[lrows[i]];
1858     }
1859     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1860     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1861   }
1862 
1863   /* actually zap the local rows */
1864   /*
1865         Zero the required rows. If the "diagonal block" of the matrix
1866      is square and the user wishes to set the diagonal we use separate
1867      code so that MatSetValues() is not called for each diagonal allocating
1868      new memory, thus calling lots of mallocs and slowing things down.
1869 
1870   */
1871   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1872   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1873   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1874     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag,0,0);CHKERRQ(ierr);
1875   } else if (diag != 0.0) {
1876     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1877     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1878        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1879     for (i=0; i<slen; i++) {
1880       row  = lrows[i] + rstart_bs;
1881       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1882     }
1883     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1884     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1885   } else {
1886     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0,0,0);CHKERRQ(ierr);
1887   }
1888 
1889   ierr = PetscFree(lrows);CHKERRQ(ierr);
1890 
1891   /* wait on sends */
1892   if (nsends) {
1893     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1894     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1895     ierr = PetscFree(send_status);CHKERRQ(ierr);
1896   }
1897   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1898   ierr = PetscFree(svalues);CHKERRQ(ierr);
1899 
1900   PetscFunctionReturn(0);
1901 }
1902 
1903 #undef __FUNCT__
1904 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1905 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1906 {
1907   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1908   PetscErrorCode ierr;
1909 
1910   PetscFunctionBegin;
1911   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1912   PetscFunctionReturn(0);
1913 }
1914 
1915 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1916 
1917 #undef __FUNCT__
1918 #define __FUNCT__ "MatEqual_MPIBAIJ"
1919 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1920 {
1921   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1922   Mat            a,b,c,d;
1923   PetscBool      flg;
1924   PetscErrorCode ierr;
1925 
1926   PetscFunctionBegin;
1927   a = matA->A; b = matA->B;
1928   c = matB->A; d = matB->B;
1929 
1930   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1931   if (flg) {
1932     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1933   }
1934   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1935   PetscFunctionReturn(0);
1936 }
1937 
1938 #undef __FUNCT__
1939 #define __FUNCT__ "MatCopy_MPIBAIJ"
1940 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1941 {
1942   PetscErrorCode ierr;
1943   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1944   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1945 
1946   PetscFunctionBegin;
1947   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1948   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1949     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1950   } else {
1951     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1952     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1953   }
1954   PetscFunctionReturn(0);
1955 }
1956 
1957 #undef __FUNCT__
1958 #define __FUNCT__ "MatSetUp_MPIBAIJ"
1959 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1960 {
1961   PetscErrorCode ierr;
1962 
1963   PetscFunctionBegin;
1964   if (A->rmap->bs < 0) {
1965     ierr = PetscLayoutSetBlockSize(A->rmap,1);CHKERRQ(ierr);
1966     ierr = PetscLayoutSetBlockSize(A->cmap,1);CHKERRQ(ierr);
1967   }
1968   ierr =  MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1969   PetscFunctionReturn(0);
1970 }
1971 
1972 #undef __FUNCT__
1973 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1974 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1975 {
1976   PetscErrorCode ierr;
1977   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1978   PetscBLASInt   bnz,one=1;
1979   Mat_SeqBAIJ    *x,*y;
1980 
1981   PetscFunctionBegin;
1982   if (str == SAME_NONZERO_PATTERN) {
1983     PetscScalar alpha = a;
1984     x = (Mat_SeqBAIJ *)xx->A->data;
1985     y = (Mat_SeqBAIJ *)yy->A->data;
1986     bnz = PetscBLASIntCast(x->nz);
1987     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1988     x = (Mat_SeqBAIJ *)xx->B->data;
1989     y = (Mat_SeqBAIJ *)yy->B->data;
1990     bnz = PetscBLASIntCast(x->nz);
1991     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1992   } else {
1993     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1994   }
1995   PetscFunctionReturn(0);
1996 }
1997 
1998 #undef __FUNCT__
1999 #define __FUNCT__ "MatRealPart_MPIBAIJ"
2000 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
2001 {
2002   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
2003   PetscErrorCode ierr;
2004 
2005   PetscFunctionBegin;
2006   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2007   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2008   PetscFunctionReturn(0);
2009 }
2010 
2011 #undef __FUNCT__
2012 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
2013 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2014 {
2015   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
2016   PetscErrorCode ierr;
2017 
2018   PetscFunctionBegin;
2019   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2020   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2021   PetscFunctionReturn(0);
2022 }
2023 
2024 #undef __FUNCT__
2025 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
2026 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2027 {
2028   PetscErrorCode ierr;
2029   IS             iscol_local;
2030   PetscInt       csize;
2031 
2032   PetscFunctionBegin;
2033   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2034   if (call == MAT_REUSE_MATRIX) {
2035     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2036     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2037   } else {
2038     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2039   }
2040   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2041   if (call == MAT_INITIAL_MATRIX) {
2042     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2043     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2044   }
2045   PetscFunctionReturn(0);
2046 }
2047 
2048 #undef __FUNCT__
2049 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
2050 /*
2051     Not great since it makes two copies of the submatrix, first an SeqBAIJ
2052   in local and then by concatenating the local matrices the end result.
2053   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
2054 */
2055 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2056 {
2057   PetscErrorCode ierr;
2058   PetscMPIInt    rank,size;
2059   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2060   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
2061   Mat            *local,M,Mreuse;
2062   MatScalar      *vwork,*aa;
2063   MPI_Comm       comm = ((PetscObject)mat)->comm;
2064   Mat_SeqBAIJ    *aij;
2065 
2066 
2067   PetscFunctionBegin;
2068   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2069   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2070 
2071   if (call ==  MAT_REUSE_MATRIX) {
2072     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
2073     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2074     local = &Mreuse;
2075     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
2076   } else {
2077     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
2078     Mreuse = *local;
2079     ierr   = PetscFree(local);CHKERRQ(ierr);
2080   }
2081 
2082   /*
2083       m - number of local rows
2084       n - number of columns (same on all processors)
2085       rstart - first row in new global matrix generated
2086   */
2087   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2088   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2089   m    = m/bs;
2090   n    = n/bs;
2091 
2092   if (call == MAT_INITIAL_MATRIX) {
2093     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2094     ii  = aij->i;
2095     jj  = aij->j;
2096 
2097     /*
2098         Determine the number of non-zeros in the diagonal and off-diagonal
2099         portions of the matrix in order to do correct preallocation
2100     */
2101 
2102     /* first get start and end of "diagonal" columns */
2103     if (csize == PETSC_DECIDE) {
2104       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2105       if (mglobal == n*bs) { /* square matrix */
2106 	nlocal = m;
2107       } else {
2108         nlocal = n/size + ((n % size) > rank);
2109       }
2110     } else {
2111       nlocal = csize/bs;
2112     }
2113     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2114     rstart = rend - nlocal;
2115     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2116 
2117     /* next, compute all the lengths */
2118     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2119     olens = dlens + m;
2120     for (i=0; i<m; i++) {
2121       jend = ii[i+1] - ii[i];
2122       olen = 0;
2123       dlen = 0;
2124       for (j=0; j<jend; j++) {
2125         if (*jj < rstart || *jj >= rend) olen++;
2126         else dlen++;
2127         jj++;
2128       }
2129       olens[i] = olen;
2130       dlens[i] = dlen;
2131     }
2132     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2133     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2134     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2135     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2136     ierr = PetscFree(dlens);CHKERRQ(ierr);
2137   } else {
2138     PetscInt ml,nl;
2139 
2140     M = *newmat;
2141     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2142     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2143     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2144     /*
2145          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2146        rather than the slower MatSetValues().
2147     */
2148     M->was_assembled = PETSC_TRUE;
2149     M->assembled     = PETSC_FALSE;
2150   }
2151   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2152   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2153   aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2154   ii  = aij->i;
2155   jj  = aij->j;
2156   aa  = aij->a;
2157   for (i=0; i<m; i++) {
2158     row   = rstart/bs + i;
2159     nz    = ii[i+1] - ii[i];
2160     cwork = jj;     jj += nz;
2161     vwork = aa;     aa += nz;
2162     ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2163   }
2164 
2165   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2166   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2167   *newmat = M;
2168 
2169   /* save submatrix used in processor for next request */
2170   if (call ==  MAT_INITIAL_MATRIX) {
2171     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2172     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2173   }
2174 
2175   PetscFunctionReturn(0);
2176 }
2177 
2178 #undef __FUNCT__
2179 #define __FUNCT__ "MatPermute_MPIBAIJ"
2180 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2181 {
2182   MPI_Comm       comm,pcomm;
2183   PetscInt       first,local_size,nrows;
2184   const PetscInt *rows;
2185   PetscMPIInt    size;
2186   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
2187   PetscErrorCode ierr;
2188 
2189   PetscFunctionBegin;
2190   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2191   /* make a collective version of 'rowp' */
2192   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2193   if (pcomm==comm) {
2194     crowp = rowp;
2195   } else {
2196     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2197     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2198     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2199     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2200   }
2201   /* collect the global row permutation and invert it */
2202   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
2203   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
2204   if (pcomm!=comm) {
2205     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2206   }
2207   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
2208   /* get the local target indices */
2209   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
2210   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
2211   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
2212   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,PETSC_COPY_VALUES,&lrowp);CHKERRQ(ierr);
2213   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
2214   ierr = ISDestroy(&irowp);CHKERRQ(ierr);
2215   /* the column permutation is so much easier;
2216      make a local version of 'colp' and invert it */
2217   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2218   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2219   if (size==1) {
2220     lcolp = colp;
2221   } else {
2222     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
2223     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
2224     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,PETSC_COPY_VALUES,&lcolp);CHKERRQ(ierr);
2225   }
2226   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2227   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
2228   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
2229   if (size>1) {
2230     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
2231     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2232   }
2233   /* now we just get the submatrix */
2234   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2235   /* clean up */
2236   ierr = ISDestroy(&lrowp);CHKERRQ(ierr);
2237   ierr = ISDestroy(&icolp);CHKERRQ(ierr);
2238   PetscFunctionReturn(0);
2239 }
2240 
2241 #undef __FUNCT__
2242 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2243 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2244 {
2245   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*) mat->data;
2246   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
2247 
2248   PetscFunctionBegin;
2249   if (nghosts) { *nghosts = B->nbs;}
2250   if (ghosts) {*ghosts = baij->garray;}
2251   PetscFunctionReturn(0);
2252 }
2253 
2254 extern PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat);
2255 
2256 #undef __FUNCT__
2257 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ"
2258 /*
2259     This routine is almost identical to MatFDColoringCreate_MPIBAIJ()!
2260 */
2261 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
2262 {
2263   Mat_MPIBAIJ            *baij = (Mat_MPIBAIJ*)mat->data;
2264   PetscErrorCode        ierr;
2265   PetscMPIInt           size,*ncolsonproc,*disp,nn;
2266   PetscInt              bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col;
2267   const PetscInt        *is;
2268   PetscInt              nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj;
2269   PetscInt              *rowhit,M,cstart,cend,colb;
2270   PetscInt              *columnsforrow,l;
2271   IS                    *isa;
2272   PetscBool              done,flg;
2273   ISLocalToGlobalMapping map = mat->cmap->bmapping;
2274   PetscInt               *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype;
2275 
2276   PetscFunctionBegin;
2277   if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();");
2278   if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock");
2279 
2280   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
2281   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2282   M                = mat->rmap->n/bs;
2283   cstart           = mat->cmap->rstart/bs;
2284   cend             = mat->cmap->rend/bs;
2285   c->M             = mat->rmap->N/bs;  /* set the global rows and columns and local rows */
2286   c->N             = mat->cmap->N/bs;
2287   c->m             = mat->rmap->n/bs;
2288   c->rstart        = mat->rmap->rstart/bs;
2289 
2290   c->ncolors       = nis;
2291   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
2292   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
2293   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
2294   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
2295   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
2296   ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
2297 
2298   /* Allow access to data structures of local part of matrix */
2299   if (!baij->colmap) {
2300     ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
2301   }
2302   ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2303   ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2304 
2305   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
2306   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
2307 
2308   for (i=0; i<nis; i++) {
2309     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
2310     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
2311     c->ncolumns[i] = n;
2312     if (n) {
2313       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
2314       ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr);
2315       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
2316     } else {
2317       c->columns[i]  = 0;
2318     }
2319 
2320     if (ctype == IS_COLORING_GLOBAL){
2321       /* Determine the total (parallel) number of columns of this color */
2322       ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
2323       ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr);
2324 
2325       nn   = PetscMPIIntCast(n);
2326       ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2327       nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];}
2328       if (!nctot) {
2329         ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
2330       }
2331 
2332       disp[0] = 0;
2333       for (j=1; j<size; j++) {
2334         disp[j] = disp[j-1] + ncolsonproc[j-1];
2335       }
2336 
2337       /* Get complete list of columns for color on each processor */
2338       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2339       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2340       ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr);
2341     } else if (ctype == IS_COLORING_GHOSTED){
2342       /* Determine local number of columns of this color on this process, including ghost points */
2343       nctot = n;
2344       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2345       ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
2346     } else {
2347       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
2348     }
2349 
2350     /*
2351        Mark all rows affect by these columns
2352     */
2353     /* Temporary option to allow for debugging/testing */
2354     flg  = PETSC_FALSE;
2355     ierr = PetscOptionsGetBool(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr);
2356     if (!flg) {/*-----------------------------------------------------------------------------*/
2357       /* crude, fast version */
2358       ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
2359       /* loop over columns*/
2360       for (j=0; j<nctot; j++) {
2361         if (ctype == IS_COLORING_GHOSTED) {
2362           col = ltog[cols[j]];
2363         } else {
2364           col  = cols[j];
2365         }
2366         if (col >= cstart && col < cend) {
2367           /* column is in diagonal block of matrix */
2368           rows = A_cj + A_ci[col-cstart];
2369           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2370         } else {
2371 #if defined (PETSC_USE_CTABLE)
2372           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2373 	  colb --;
2374 #else
2375           colb = baij->colmap[col] - 1;
2376 #endif
2377           if (colb == -1) {
2378             m = 0;
2379           } else {
2380             colb = colb/bs;
2381             rows = B_cj + B_ci[colb];
2382             m    = B_ci[colb+1] - B_ci[colb];
2383           }
2384         }
2385         /* loop over columns marking them in rowhit */
2386         for (k=0; k<m; k++) {
2387           rowhit[*rows++] = col + 1;
2388         }
2389       }
2390 
2391       /* count the number of hits */
2392       nrows = 0;
2393       for (j=0; j<M; j++) {
2394         if (rowhit[j]) nrows++;
2395       }
2396       c->nrows[i]         = nrows;
2397       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2398       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2399       ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2400       nrows = 0;
2401       for (j=0; j<M; j++) {
2402         if (rowhit[j]) {
2403           c->rows[i][nrows]           = j;
2404           c->columnsforrow[i][nrows] = rowhit[j] - 1;
2405           nrows++;
2406         }
2407       }
2408     } else {/*-------------------------------------------------------------------------------*/
2409       /* slow version, using rowhit as a linked list */
2410       PetscInt currentcol,fm,mfm;
2411       rowhit[M] = M;
2412       nrows     = 0;
2413       /* loop over columns*/
2414       for (j=0; j<nctot; j++) {
2415         if (ctype == IS_COLORING_GHOSTED) {
2416           col = ltog[cols[j]];
2417         } else {
2418           col  = cols[j];
2419         }
2420         if (col >= cstart && col < cend) {
2421           /* column is in diagonal block of matrix */
2422           rows = A_cj + A_ci[col-cstart];
2423           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2424         } else {
2425 #if defined (PETSC_USE_CTABLE)
2426           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2427           colb --;
2428 #else
2429           colb = baij->colmap[col] - 1;
2430 #endif
2431           if (colb == -1) {
2432             m = 0;
2433           } else {
2434             colb = colb/bs;
2435             rows = B_cj + B_ci[colb];
2436             m    = B_ci[colb+1] - B_ci[colb];
2437           }
2438         }
2439 
2440         /* loop over columns marking them in rowhit */
2441         fm    = M; /* fm points to first entry in linked list */
2442         for (k=0; k<m; k++) {
2443           currentcol = *rows++;
2444 	  /* is it already in the list? */
2445           do {
2446             mfm  = fm;
2447             fm   = rowhit[fm];
2448           } while (fm < currentcol);
2449           /* not in list so add it */
2450           if (fm != currentcol) {
2451             nrows++;
2452             columnsforrow[currentcol] = col;
2453             /* next three lines insert new entry into linked list */
2454             rowhit[mfm]               = currentcol;
2455             rowhit[currentcol]        = fm;
2456             fm                        = currentcol;
2457             /* fm points to present position in list since we know the columns are sorted */
2458           } else {
2459             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
2460           }
2461         }
2462       }
2463       c->nrows[i]         = nrows;
2464       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2465       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2466       ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2467       /* now store the linked list of rows into c->rows[i] */
2468       nrows = 0;
2469       fm    = rowhit[M];
2470       do {
2471         c->rows[i][nrows]            = fm;
2472         c->columnsforrow[i][nrows++] = columnsforrow[fm];
2473         fm                           = rowhit[fm];
2474       } while (fm < M);
2475     } /* ---------------------------------------------------------------------------------------*/
2476     ierr = PetscFree(cols);CHKERRQ(ierr);
2477   }
2478 
2479   /* Optimize by adding the vscale, and scaleforrow[][] fields */
2480   /*
2481        vscale will contain the "diagonal" on processor scalings followed by the off processor
2482   */
2483   if (ctype == IS_COLORING_GLOBAL) {
2484     PetscInt *garray;
2485     ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2486     for (i=0; i<baij->B->cmap->n/bs; i++) {
2487       for (j=0; j<bs; j++) {
2488         garray[i*bs+j] = bs*baij->garray[i]+j;
2489       }
2490     }
2491     ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr);
2492     ierr = PetscFree(garray);CHKERRQ(ierr);
2493     CHKMEMQ;
2494     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2495     for (k=0; k<c->ncolors; k++) {
2496       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2497       for (l=0; l<c->nrows[k]; l++) {
2498         col = c->columnsforrow[k][l];
2499         if (col >= cstart && col < cend) {
2500           /* column is in diagonal block of matrix */
2501           colb = col - cstart;
2502         } else {
2503           /* column  is in "off-processor" part */
2504 #if defined (PETSC_USE_CTABLE)
2505           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2506           colb --;
2507 #else
2508           colb = baij->colmap[col] - 1;
2509 #endif
2510           colb = colb/bs;
2511           colb += cend - cstart;
2512         }
2513         c->vscaleforrow[k][l] = colb;
2514       }
2515     }
2516   } else if (ctype == IS_COLORING_GHOSTED) {
2517     /* Get gtol mapping */
2518     PetscInt N = mat->cmap->N, *gtol;
2519     ierr = PetscMalloc((N+1)*sizeof(PetscInt),&gtol);CHKERRQ(ierr);
2520     for (i=0; i<N; i++) gtol[i] = -1;
2521     for (i=0; i<map->n; i++) gtol[ltog[i]] = i;
2522 
2523     c->vscale = 0; /* will be created in MatFDColoringApply() */
2524     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2525     for (k=0; k<c->ncolors; k++) {
2526       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2527       for (l=0; l<c->nrows[k]; l++) {
2528         col = c->columnsforrow[k][l];      /* global column index */
2529         c->vscaleforrow[k][l] = gtol[col]; /* local column index */
2530       }
2531     }
2532     ierr = PetscFree(gtol);CHKERRQ(ierr);
2533   }
2534   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
2535 
2536   ierr = PetscFree(rowhit);CHKERRQ(ierr);
2537   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
2538   ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2539   ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2540     CHKMEMQ;
2541   PetscFunctionReturn(0);
2542 }
2543 
2544 #undef __FUNCT__
2545 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ"
2546 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2547 {
2548   Mat            B;
2549   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
2550   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2551   Mat_SeqAIJ     *b;
2552   PetscErrorCode ierr;
2553   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2554   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2555   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2556 
2557   PetscFunctionBegin;
2558   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2559   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2560 
2561   /* ----------------------------------------------------------------
2562      Tell every processor the number of nonzeros per row
2563   */
2564   ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
2565   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2566     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2567   }
2568   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2569   ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr);
2570   displs     = recvcounts + size;
2571   for (i=0; i<size; i++) {
2572     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2573     displs[i]     = A->rmap->range[i]/bs;
2574   }
2575 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2576   ierr  = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2577 #else
2578   ierr  = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2579 #endif
2580   /* ---------------------------------------------------------------
2581      Create the sequential matrix of the same type as the local block diagonal
2582   */
2583   ierr  = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2584   ierr  = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2585   ierr  = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2586   ierr  = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2587   b = (Mat_SeqAIJ *)B->data;
2588 
2589   /*--------------------------------------------------------------------
2590     Copy my part of matrix column indices over
2591   */
2592   sendcount  = ad->nz + bd->nz;
2593   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2594   a_jsendbuf = ad->j;
2595   b_jsendbuf = bd->j;
2596   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2597   cnt        = 0;
2598   for (i=0; i<n; i++) {
2599 
2600     /* put in lower diagonal portion */
2601     m = bd->i[i+1] - bd->i[i];
2602     while (m > 0) {
2603       /* is it above diagonal (in bd (compressed) numbering) */
2604       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2605       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2606       m--;
2607     }
2608 
2609     /* put in diagonal portion */
2610     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2611       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2612     }
2613 
2614     /* put in upper diagonal portion */
2615     while (m-- > 0) {
2616       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2617     }
2618   }
2619   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2620 
2621   /*--------------------------------------------------------------------
2622     Gather all column indices to all processors
2623   */
2624   for (i=0; i<size; i++) {
2625     recvcounts[i] = 0;
2626     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2627       recvcounts[i] += lens[j];
2628     }
2629   }
2630   displs[0]  = 0;
2631   for (i=1; i<size; i++) {
2632     displs[i] = displs[i-1] + recvcounts[i-1];
2633   }
2634 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2635   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2636 #else
2637   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2638 #endif
2639   /*--------------------------------------------------------------------
2640     Assemble the matrix into useable form (note numerical values not yet set)
2641   */
2642   /* set the b->ilen (length of each row) values */
2643   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2644   /* set the b->i indices */
2645   b->i[0] = 0;
2646   for (i=1; i<=A->rmap->N/bs; i++) {
2647     b->i[i] = b->i[i-1] + lens[i-1];
2648   }
2649   ierr = PetscFree(lens);CHKERRQ(ierr);
2650   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2651   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2652   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2653 
2654   if (A->symmetric){
2655     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2656   } else if (A->hermitian) {
2657     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2658   } else if (A->structurally_symmetric) {
2659     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2660   }
2661   *newmat = B;
2662   PetscFunctionReturn(0);
2663 }
2664 
2665 #undef __FUNCT__
2666 #define __FUNCT__ "MatSOR_MPIBAIJ"
2667 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2668 {
2669   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2670   PetscErrorCode ierr;
2671   Vec            bb1 = 0;
2672 
2673   PetscFunctionBegin;
2674   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2675     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2676   }
2677 
2678   if (flag == SOR_APPLY_UPPER) {
2679     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2680     PetscFunctionReturn(0);
2681   }
2682 
2683   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2684     if (flag & SOR_ZERO_INITIAL_GUESS) {
2685       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2686       its--;
2687     }
2688 
2689     while (its--) {
2690       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2691       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2692 
2693       /* update rhs: bb1 = bb - B*x */
2694       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2695       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2696 
2697       /* local sweep */
2698       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2699     }
2700   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
2701     if (flag & SOR_ZERO_INITIAL_GUESS) {
2702       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2703       its--;
2704     }
2705     while (its--) {
2706       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2707       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2708 
2709       /* update rhs: bb1 = bb - B*x */
2710       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2711       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2712 
2713       /* local sweep */
2714       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2715     }
2716   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
2717     if (flag & SOR_ZERO_INITIAL_GUESS) {
2718       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2719       its--;
2720     }
2721     while (its--) {
2722       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2723       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2724 
2725       /* update rhs: bb1 = bb - B*x */
2726       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2727       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2728 
2729       /* local sweep */
2730       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2731     }
2732   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2733 
2734   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2735   PetscFunctionReturn(0);
2736 }
2737 
2738 extern PetscErrorCode  MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2739 
2740 #undef __FUNCT__
2741 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ"
2742 PetscErrorCode  MatInvertBlockDiagonal_MPIBAIJ(Mat A,PetscScalar **values)
2743 {
2744   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2745   PetscErrorCode ierr;
2746 
2747   PetscFunctionBegin;
2748   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2749   PetscFunctionReturn(0);
2750 }
2751 
2752 
2753 /* -------------------------------------------------------------------*/
2754 static struct _MatOps MatOps_Values = {
2755        MatSetValues_MPIBAIJ,
2756        MatGetRow_MPIBAIJ,
2757        MatRestoreRow_MPIBAIJ,
2758        MatMult_MPIBAIJ,
2759 /* 4*/ MatMultAdd_MPIBAIJ,
2760        MatMultTranspose_MPIBAIJ,
2761        MatMultTransposeAdd_MPIBAIJ,
2762        0,
2763        0,
2764        0,
2765 /*10*/ 0,
2766        0,
2767        0,
2768        MatSOR_MPIBAIJ,
2769        MatTranspose_MPIBAIJ,
2770 /*15*/ MatGetInfo_MPIBAIJ,
2771        MatEqual_MPIBAIJ,
2772        MatGetDiagonal_MPIBAIJ,
2773        MatDiagonalScale_MPIBAIJ,
2774        MatNorm_MPIBAIJ,
2775 /*20*/ MatAssemblyBegin_MPIBAIJ,
2776        MatAssemblyEnd_MPIBAIJ,
2777        MatSetOption_MPIBAIJ,
2778        MatZeroEntries_MPIBAIJ,
2779 /*24*/ MatZeroRows_MPIBAIJ,
2780        0,
2781        0,
2782        0,
2783        0,
2784 /*29*/ MatSetUp_MPIBAIJ,
2785        0,
2786        0,
2787        0,
2788        0,
2789 /*34*/ MatDuplicate_MPIBAIJ,
2790        0,
2791        0,
2792        0,
2793        0,
2794 /*39*/ MatAXPY_MPIBAIJ,
2795        MatGetSubMatrices_MPIBAIJ,
2796        MatIncreaseOverlap_MPIBAIJ,
2797        MatGetValues_MPIBAIJ,
2798        MatCopy_MPIBAIJ,
2799 /*44*/ 0,
2800        MatScale_MPIBAIJ,
2801        0,
2802        0,
2803        0,
2804 /*49*/ 0,
2805        0,
2806        0,
2807        0,
2808        0,
2809 /*54*/ MatFDColoringCreate_MPIBAIJ,
2810        0,
2811        MatSetUnfactored_MPIBAIJ,
2812        MatPermute_MPIBAIJ,
2813        MatSetValuesBlocked_MPIBAIJ,
2814 /*59*/ MatGetSubMatrix_MPIBAIJ,
2815        MatDestroy_MPIBAIJ,
2816        MatView_MPIBAIJ,
2817        0,
2818        0,
2819 /*64*/ 0,
2820        0,
2821        0,
2822        0,
2823        0,
2824 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2825        0,
2826        0,
2827        0,
2828        0,
2829 /*74*/ 0,
2830        MatFDColoringApply_BAIJ,
2831        0,
2832        0,
2833        0,
2834 /*79*/ 0,
2835        0,
2836        0,
2837        0,
2838        MatLoad_MPIBAIJ,
2839 /*84*/ 0,
2840        0,
2841        0,
2842        0,
2843        0,
2844 /*89*/ 0,
2845        0,
2846        0,
2847        0,
2848        0,
2849 /*94*/ 0,
2850        0,
2851        0,
2852        0,
2853        0,
2854 /*99*/ 0,
2855        0,
2856        0,
2857        0,
2858        0,
2859 /*104*/0,
2860        MatRealPart_MPIBAIJ,
2861        MatImaginaryPart_MPIBAIJ,
2862        0,
2863        0,
2864 /*109*/0,
2865        0,
2866        0,
2867        0,
2868        0,
2869 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2870        0,
2871        MatGetGhosts_MPIBAIJ,
2872        0,
2873        0,
2874 /*119*/0,
2875        0,
2876        0,
2877        0,
2878        0,
2879 /*124*/0,
2880        0,
2881        MatInvertBlockDiagonal_MPIBAIJ
2882 };
2883 
2884 EXTERN_C_BEGIN
2885 #undef __FUNCT__
2886 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2887 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2888 {
2889   PetscFunctionBegin;
2890   *a = ((Mat_MPIBAIJ *)A->data)->A;
2891   PetscFunctionReturn(0);
2892 }
2893 EXTERN_C_END
2894 
2895 EXTERN_C_BEGIN
2896 extern PetscErrorCode  MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2897 EXTERN_C_END
2898 
2899 EXTERN_C_BEGIN
2900 #undef __FUNCT__
2901 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2902 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2903 {
2904   PetscInt       m,rstart,cstart,cend;
2905   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2906   const PetscInt *JJ=0;
2907   PetscScalar    *values=0;
2908   PetscErrorCode ierr;
2909 
2910   PetscFunctionBegin;
2911   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2912   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2913   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2914   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2915   m      = B->rmap->n/bs;
2916   rstart = B->rmap->rstart/bs;
2917   cstart = B->cmap->rstart/bs;
2918   cend   = B->cmap->rend/bs;
2919 
2920   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2921   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
2922   for (i=0; i<m; i++) {
2923     nz = ii[i+1] - ii[i];
2924     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2925     nz_max = PetscMax(nz_max,nz);
2926     JJ  = jj + ii[i];
2927     for (j=0; j<nz; j++) {
2928       if (*JJ >= cstart) break;
2929       JJ++;
2930     }
2931     d = 0;
2932     for (; j<nz; j++) {
2933       if (*JJ++ >= cend) break;
2934       d++;
2935     }
2936     d_nnz[i] = d;
2937     o_nnz[i] = nz - d;
2938   }
2939   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2940   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2941 
2942   values = (PetscScalar*)V;
2943   if (!values) {
2944     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2945     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2946   }
2947   for (i=0; i<m; i++) {
2948     PetscInt          row    = i + rstart;
2949     PetscInt          ncols  = ii[i+1] - ii[i];
2950     const PetscInt    *icols = jj + ii[i];
2951     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2952     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2953   }
2954 
2955   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2956   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2957   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2958   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2959   PetscFunctionReturn(0);
2960 }
2961 EXTERN_C_END
2962 
2963 #undef __FUNCT__
2964 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2965 /*@C
2966    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2967    (the default parallel PETSc format).
2968 
2969    Collective on MPI_Comm
2970 
2971    Input Parameters:
2972 +  A - the matrix
2973 .  bs - the block size
2974 .  i - the indices into j for the start of each local row (starts with zero)
2975 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2976 -  v - optional values in the matrix
2977 
2978    Level: developer
2979 
2980 .keywords: matrix, aij, compressed row, sparse, parallel
2981 
2982 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2983 @*/
2984 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2985 {
2986   PetscErrorCode ierr;
2987 
2988   PetscFunctionBegin;
2989   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2990   PetscValidType(B,1);
2991   PetscValidLogicalCollectiveInt(B,bs,2);
2992   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2993   PetscFunctionReturn(0);
2994 }
2995 
2996 EXTERN_C_BEGIN
2997 #undef __FUNCT__
2998 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2999 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
3000 {
3001   Mat_MPIBAIJ    *b;
3002   PetscErrorCode ierr;
3003   PetscInt       i;
3004   PetscBool      d_realalloc = PETSC_FALSE,o_realalloc = PETSC_FALSE;
3005 
3006   PetscFunctionBegin;
3007   if (d_nz >= 0 || d_nnz) d_realalloc = PETSC_TRUE;
3008   if (o_nz >= 0 || o_nnz) o_realalloc = PETSC_TRUE;
3009 
3010   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
3011   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
3012   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
3013   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
3014 
3015   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3016   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3017   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3018   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3019 
3020   if (d_nnz) {
3021     for (i=0; i<B->rmap->n/bs; i++) {
3022       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
3023     }
3024   }
3025   if (o_nnz) {
3026     for (i=0; i<B->rmap->n/bs; i++) {
3027       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
3028     }
3029   }
3030 
3031   b = (Mat_MPIBAIJ*)B->data;
3032   b->bs2 = bs*bs;
3033   b->mbs = B->rmap->n/bs;
3034   b->nbs = B->cmap->n/bs;
3035   b->Mbs = B->rmap->N/bs;
3036   b->Nbs = B->cmap->N/bs;
3037 
3038   for (i=0; i<=b->size; i++) {
3039     b->rangebs[i] = B->rmap->range[i]/bs;
3040   }
3041   b->rstartbs = B->rmap->rstart/bs;
3042   b->rendbs   = B->rmap->rend/bs;
3043   b->cstartbs = B->cmap->rstart/bs;
3044   b->cendbs   = B->cmap->rend/bs;
3045 
3046   if (!B->preallocated) {
3047     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
3048     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
3049     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
3050     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
3051     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
3052     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
3053     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
3054     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
3055     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
3056   }
3057 
3058   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3059   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
3060   /* Do not error if the user did not give real preallocation information. Ugly because this would overwrite a previous user call to MatSetOption(). */
3061   if (!d_realalloc) {ierr = MatSetOption(b->A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3062   if (!o_realalloc) {ierr = MatSetOption(b->B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);}
3063   B->preallocated = PETSC_TRUE;
3064   PetscFunctionReturn(0);
3065 }
3066 EXTERN_C_END
3067 
3068 EXTERN_C_BEGIN
3069 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
3070 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
3071 EXTERN_C_END
3072 
3073 
3074 EXTERN_C_BEGIN
3075 #undef __FUNCT__
3076 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
3077 PetscErrorCode  MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj)
3078 {
3079   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
3080   PetscErrorCode ierr;
3081   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
3082   PetscInt       M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
3083   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
3084 
3085   PetscFunctionBegin;
3086   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr);
3087   ii[0] = 0;
3088   CHKMEMQ;
3089   for (i=0; i<M; i++) {
3090     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
3091     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
3092     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
3093     /* remove one from count of matrix has diagonal */
3094     for (j=id[i]; j<id[i+1]; j++) {
3095       if (jd[j] == i) {ii[i+1]--;break;}
3096     }
3097   CHKMEMQ;
3098   }
3099   ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3100   cnt = 0;
3101   for (i=0; i<M; i++) {
3102     for (j=io[i]; j<io[i+1]; j++) {
3103       if (garray[jo[j]] > rstart) break;
3104       jj[cnt++] = garray[jo[j]];
3105   CHKMEMQ;
3106     }
3107     for (k=id[i]; k<id[i+1]; k++) {
3108       if (jd[k] != i) {
3109         jj[cnt++] = rstart + jd[k];
3110   CHKMEMQ;
3111       }
3112     }
3113     for (;j<io[i+1]; j++) {
3114       jj[cnt++] = garray[jo[j]];
3115   CHKMEMQ;
3116     }
3117   }
3118   ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr);
3119   PetscFunctionReturn(0);
3120 }
3121 EXTERN_C_END
3122 
3123 #include <../src/mat/impls/aij/mpi/mpiaij.h>
3124 EXTERN_C_BEGIN
3125 PetscErrorCode  MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,MatReuse,Mat*);
3126 EXTERN_C_END
3127 
3128 EXTERN_C_BEGIN
3129 #undef __FUNCT__
3130 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ"
3131 PetscErrorCode  MatConvert_MPIBAIJ_MPIAIJ(Mat A,const MatType newtype,MatReuse reuse,Mat *newmat)
3132 {
3133   PetscErrorCode ierr;
3134   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
3135   Mat            B;
3136   Mat_MPIAIJ     *b;
3137 
3138   PetscFunctionBegin;
3139   if (!A->assembled) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_SUP,"Matrix must be assembled");
3140 
3141   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
3142   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3143   ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
3144   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
3145   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
3146   b = (Mat_MPIAIJ*) B->data;
3147 
3148   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
3149   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
3150   ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
3151   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
3152   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
3153   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3154   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3155   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3156   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3157   if (reuse == MAT_REUSE_MATRIX) {
3158     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
3159   } else {
3160    *newmat = B;
3161   }
3162   PetscFunctionReturn(0);
3163 }
3164 EXTERN_C_END
3165 
3166 EXTERN_C_BEGIN
3167 #if defined(PETSC_HAVE_MUMPS)
3168 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3169 #endif
3170 EXTERN_C_END
3171 
3172 /*MC
3173    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
3174 
3175    Options Database Keys:
3176 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
3177 . -mat_block_size <bs> - set the blocksize used to store the matrix
3178 - -mat_use_hash_table <fact>
3179 
3180   Level: beginner
3181 
3182 .seealso: MatCreateMPIBAIJ
3183 M*/
3184 
3185 EXTERN_C_BEGIN
3186 extern PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,const MatType,MatReuse,Mat*);
3187 EXTERN_C_END
3188 
3189 EXTERN_C_BEGIN
3190 #undef __FUNCT__
3191 #define __FUNCT__ "MatCreate_MPIBAIJ"
3192 PetscErrorCode  MatCreate_MPIBAIJ(Mat B)
3193 {
3194   Mat_MPIBAIJ    *b;
3195   PetscErrorCode ierr;
3196   PetscBool      flg;
3197 
3198   PetscFunctionBegin;
3199   ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
3200   B->data = (void*)b;
3201 
3202   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3203   B->assembled  = PETSC_FALSE;
3204 
3205   B->insertmode = NOT_SET_VALUES;
3206   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
3207   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
3208 
3209   /* build local table of row and column ownerships */
3210   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
3211 
3212   /* build cache for off array entries formed */
3213   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
3214   b->donotstash  = PETSC_FALSE;
3215   b->colmap      = PETSC_NULL;
3216   b->garray      = PETSC_NULL;
3217   b->roworiented = PETSC_TRUE;
3218 
3219   /* stuff used in block assembly */
3220   b->barray       = 0;
3221 
3222   /* stuff used for matrix vector multiply */
3223   b->lvec         = 0;
3224   b->Mvctx        = 0;
3225 
3226   /* stuff for MatGetRow() */
3227   b->rowindices   = 0;
3228   b->rowvalues    = 0;
3229   b->getrowactive = PETSC_FALSE;
3230 
3231   /* hash table stuff */
3232   b->ht           = 0;
3233   b->hd           = 0;
3234   b->ht_size      = 0;
3235   b->ht_flag      = PETSC_FALSE;
3236   b->ht_fact      = 0;
3237   b->ht_total_ct  = 0;
3238   b->ht_insert_ct = 0;
3239 
3240   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
3241   b->ijonly       = PETSC_FALSE;
3242 
3243   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3244     ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3245     if (flg) {
3246       PetscReal fact = 1.39;
3247       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3248       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
3249       if (fact <= 1.0) fact = 1.39;
3250       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3251       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3252     }
3253   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3254 
3255 #if defined(PETSC_HAVE_MUMPS)
3256   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr);
3257 #endif
3258   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",
3259                                      "MatConvert_MPIBAIJ_MPIAdj",
3260                                       MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3261   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",
3262                                      "MatConvert_MPIBAIJ_MPIAIJ",
3263                                       MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3264   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",
3265                                      "MatConvert_MPIBAIJ_MPISBAIJ",
3266                                       MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3267   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3268                                      "MatStoreValues_MPIBAIJ",
3269                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3270   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3271                                      "MatRetrieveValues_MPIBAIJ",
3272                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3273   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3274                                      "MatGetDiagonalBlock_MPIBAIJ",
3275                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3276   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
3277                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
3278                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3279   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
3280 				     "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
3281 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3282   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3283                                      "MatDiagonalScaleLocal_MPIBAIJ",
3284                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3285   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
3286                                      "MatSetHashTableFactor_MPIBAIJ",
3287                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3288   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",
3289                                      "MatConvert_MPIBAIJ_MPIBSTRM",
3290                                       MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr);
3291   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3292   PetscFunctionReturn(0);
3293 }
3294 EXTERN_C_END
3295 
3296 /*MC
3297    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3298 
3299    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3300    and MATMPIBAIJ otherwise.
3301 
3302    Options Database Keys:
3303 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3304 
3305   Level: beginner
3306 
3307 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3308 M*/
3309 
3310 #undef __FUNCT__
3311 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3312 /*@C
3313    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3314    (block compressed row).  For good matrix assembly performance
3315    the user should preallocate the matrix storage by setting the parameters
3316    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3317    performance can be increased by more than a factor of 50.
3318 
3319    Collective on Mat
3320 
3321    Input Parameters:
3322 +  A - the matrix
3323 .  bs   - size of blockk
3324 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3325            submatrix  (same for all local rows)
3326 .  d_nnz - array containing the number of block nonzeros in the various block rows
3327            of the in diagonal portion of the local (possibly different for each block
3328            row) or PETSC_NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3329            set it even if it is zero.
3330 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3331            submatrix (same for all local rows).
3332 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3333            off-diagonal portion of the local submatrix (possibly different for
3334            each block row) or PETSC_NULL.
3335 
3336    If the *_nnz parameter is given then the *_nz parameter is ignored
3337 
3338    Options Database Keys:
3339 +   -mat_block_size - size of the blocks to use
3340 -   -mat_use_hash_table <fact>
3341 
3342    Notes:
3343    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3344    than it must be used on all processors that share the object for that argument.
3345 
3346    Storage Information:
3347    For a square global matrix we define each processor's diagonal portion
3348    to be its local rows and the corresponding columns (a square submatrix);
3349    each processor's off-diagonal portion encompasses the remainder of the
3350    local matrix (a rectangular submatrix).
3351 
3352    The user can specify preallocated storage for the diagonal part of
3353    the local submatrix with either d_nz or d_nnz (not both).  Set
3354    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3355    memory allocation.  Likewise, specify preallocated storage for the
3356    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3357 
3358    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3359    the figure below we depict these three local rows and all columns (0-11).
3360 
3361 .vb
3362            0 1 2 3 4 5 6 7 8 9 10 11
3363           -------------------
3364    row 3  |  o o o d d d o o o o o o
3365    row 4  |  o o o d d d o o o o o o
3366    row 5  |  o o o d d d o o o o o o
3367           -------------------
3368 .ve
3369 
3370    Thus, any entries in the d locations are stored in the d (diagonal)
3371    submatrix, and any entries in the o locations are stored in the
3372    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3373    stored simply in the MATSEQBAIJ format for compressed row storage.
3374 
3375    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3376    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3377    In general, for PDE problems in which most nonzeros are near the diagonal,
3378    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3379    or you will get TERRIBLE performance; see the users' manual chapter on
3380    matrices.
3381 
3382    You can call MatGetInfo() to get information on how effective the preallocation was;
3383    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3384    You can also run with the option -info and look for messages with the string
3385    malloc in them to see if additional memory allocation was needed.
3386 
3387    Level: intermediate
3388 
3389 .keywords: matrix, block, aij, compressed row, sparse, parallel
3390 
3391 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR()
3392 @*/
3393 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3394 {
3395   PetscErrorCode ierr;
3396 
3397   PetscFunctionBegin;
3398   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3399   PetscValidType(B,1);
3400   PetscValidLogicalCollectiveInt(B,bs,2);
3401   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3402   PetscFunctionReturn(0);
3403 }
3404 
3405 #undef __FUNCT__
3406 #define __FUNCT__ "MatCreateBAIJ"
3407 /*@C
3408    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3409    (block compressed row).  For good matrix assembly performance
3410    the user should preallocate the matrix storage by setting the parameters
3411    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3412    performance can be increased by more than a factor of 50.
3413 
3414    Collective on MPI_Comm
3415 
3416    Input Parameters:
3417 +  comm - MPI communicator
3418 .  bs   - size of blockk
3419 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3420            This value should be the same as the local size used in creating the
3421            y vector for the matrix-vector product y = Ax.
3422 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3423            This value should be the same as the local size used in creating the
3424            x vector for the matrix-vector product y = Ax.
3425 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3426 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3427 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3428            submatrix  (same for all local rows)
3429 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3430            of the in diagonal portion of the local (possibly different for each block
3431            row) or PETSC_NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3432            and set it even if it is zero.
3433 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3434            submatrix (same for all local rows).
3435 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3436            off-diagonal portion of the local submatrix (possibly different for
3437            each block row) or PETSC_NULL.
3438 
3439    Output Parameter:
3440 .  A - the matrix
3441 
3442    Options Database Keys:
3443 +   -mat_block_size - size of the blocks to use
3444 -   -mat_use_hash_table <fact>
3445 
3446    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3447    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3448    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3449 
3450    Notes:
3451    If the *_nnz parameter is given then the *_nz parameter is ignored
3452 
3453    A nonzero block is any block that as 1 or more nonzeros in it
3454 
3455    The user MUST specify either the local or global matrix dimensions
3456    (possibly both).
3457 
3458    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3459    than it must be used on all processors that share the object for that argument.
3460 
3461    Storage Information:
3462    For a square global matrix we define each processor's diagonal portion
3463    to be its local rows and the corresponding columns (a square submatrix);
3464    each processor's off-diagonal portion encompasses the remainder of the
3465    local matrix (a rectangular submatrix).
3466 
3467    The user can specify preallocated storage for the diagonal part of
3468    the local submatrix with either d_nz or d_nnz (not both).  Set
3469    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3470    memory allocation.  Likewise, specify preallocated storage for the
3471    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3472 
3473    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3474    the figure below we depict these three local rows and all columns (0-11).
3475 
3476 .vb
3477            0 1 2 3 4 5 6 7 8 9 10 11
3478           -------------------
3479    row 3  |  o o o d d d o o o o o o
3480    row 4  |  o o o d d d o o o o o o
3481    row 5  |  o o o d d d o o o o o o
3482           -------------------
3483 .ve
3484 
3485    Thus, any entries in the d locations are stored in the d (diagonal)
3486    submatrix, and any entries in the o locations are stored in the
3487    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3488    stored simply in the MATSEQBAIJ format for compressed row storage.
3489 
3490    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3491    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3492    In general, for PDE problems in which most nonzeros are near the diagonal,
3493    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3494    or you will get TERRIBLE performance; see the users' manual chapter on
3495    matrices.
3496 
3497    Level: intermediate
3498 
3499 .keywords: matrix, block, aij, compressed row, sparse, parallel
3500 
3501 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3502 @*/
3503 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3504 {
3505   PetscErrorCode ierr;
3506   PetscMPIInt    size;
3507 
3508   PetscFunctionBegin;
3509   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3510   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3511   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3512   if (size > 1) {
3513     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3514     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3515   } else {
3516     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3517     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3518   }
3519   PetscFunctionReturn(0);
3520 }
3521 
3522 #undef __FUNCT__
3523 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3524 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3525 {
3526   Mat            mat;
3527   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3528   PetscErrorCode ierr;
3529   PetscInt       len=0;
3530 
3531   PetscFunctionBegin;
3532   *newmat       = 0;
3533   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3534   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3535   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3536   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3537 
3538   mat->factortype   = matin->factortype;
3539   mat->preallocated = PETSC_TRUE;
3540   mat->assembled    = PETSC_TRUE;
3541   mat->insertmode   = NOT_SET_VALUES;
3542 
3543   a      = (Mat_MPIBAIJ*)mat->data;
3544   mat->rmap->bs  = matin->rmap->bs;
3545   a->bs2   = oldmat->bs2;
3546   a->mbs   = oldmat->mbs;
3547   a->nbs   = oldmat->nbs;
3548   a->Mbs   = oldmat->Mbs;
3549   a->Nbs   = oldmat->Nbs;
3550 
3551   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3552   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3553 
3554   a->size         = oldmat->size;
3555   a->rank         = oldmat->rank;
3556   a->donotstash   = oldmat->donotstash;
3557   a->roworiented  = oldmat->roworiented;
3558   a->rowindices   = 0;
3559   a->rowvalues    = 0;
3560   a->getrowactive = PETSC_FALSE;
3561   a->barray       = 0;
3562   a->rstartbs     = oldmat->rstartbs;
3563   a->rendbs       = oldmat->rendbs;
3564   a->cstartbs     = oldmat->cstartbs;
3565   a->cendbs       = oldmat->cendbs;
3566 
3567   /* hash table stuff */
3568   a->ht           = 0;
3569   a->hd           = 0;
3570   a->ht_size      = 0;
3571   a->ht_flag      = oldmat->ht_flag;
3572   a->ht_fact      = oldmat->ht_fact;
3573   a->ht_total_ct  = 0;
3574   a->ht_insert_ct = 0;
3575 
3576   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3577   if (oldmat->colmap) {
3578 #if defined (PETSC_USE_CTABLE)
3579   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3580 #else
3581   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3582   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3583   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3584 #endif
3585   } else a->colmap = 0;
3586 
3587   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3588     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3589     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3590     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3591   } else a->garray = 0;
3592 
3593   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3594   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3595   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3596   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3597   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3598 
3599   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3600   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3601   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3602   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3603   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3604   *newmat = mat;
3605 
3606   PetscFunctionReturn(0);
3607 }
3608 
3609 #undef __FUNCT__
3610 #define __FUNCT__ "MatLoad_MPIBAIJ"
3611 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3612 {
3613   PetscErrorCode ierr;
3614   int            fd;
3615   PetscInt       i,nz,j,rstart,rend;
3616   PetscScalar    *vals,*buf;
3617   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3618   MPI_Status     status;
3619   PetscMPIInt    rank,size,maxnz;
3620   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3621   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
3622   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3623   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
3624   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
3625   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3626 
3627   PetscFunctionBegin;
3628   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3629     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3630   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3631 
3632   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3633   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3634   if (!rank) {
3635     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3636     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3637     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3638   }
3639 
3640   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3641 
3642   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3643   M = header[1]; N = header[2];
3644 
3645   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3646   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3647   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3648 
3649   /* If global sizes are set, check if they are consistent with that given in the file */
3650   if (sizesset) {
3651     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3652   }
3653   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3654   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3655 
3656   if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices");
3657 
3658   /*
3659      This code adds extra rows to make sure the number of rows is
3660      divisible by the blocksize
3661   */
3662   Mbs        = M/bs;
3663   extra_rows = bs - M + bs*Mbs;
3664   if (extra_rows == bs) extra_rows = 0;
3665   else                  Mbs++;
3666   if (extra_rows && !rank) {
3667     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3668   }
3669 
3670   /* determine ownership of all rows */
3671   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3672     mbs        = Mbs/size + ((Mbs % size) > rank);
3673     m          = mbs*bs;
3674   } else { /* User set */
3675     m          = newmat->rmap->n;
3676     mbs        = m/bs;
3677   }
3678   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
3679   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3680 
3681   /* process 0 needs enough room for process with most rows */
3682   if (!rank) {
3683     mmax = rowners[1];
3684     for (i=2; i<size; i++) {
3685       mmax = PetscMax(mmax,rowners[i]);
3686     }
3687     mmax*=bs;
3688   } else mmax = m;
3689 
3690   rowners[0] = 0;
3691   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
3692   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
3693   rstart = rowners[rank];
3694   rend   = rowners[rank+1];
3695 
3696   /* distribute row lengths to all processors */
3697   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
3698   if (!rank) {
3699     mend = m;
3700     if (size == 1) mend = mend - extra_rows;
3701     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3702     for (j=mend; j<m; j++) locrowlens[j] = 1;
3703     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3704     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3705     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3706     for (j=0; j<m; j++) {
3707       procsnz[0] += locrowlens[j];
3708     }
3709     for (i=1; i<size; i++) {
3710       mend = browners[i+1] - browners[i];
3711       if (i == size-1) mend = mend - extra_rows;
3712       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3713       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3714       /* calculate the number of nonzeros on each processor */
3715       for (j=0; j<browners[i+1]-browners[i]; j++) {
3716         procsnz[i] += rowlengths[j];
3717       }
3718       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3719     }
3720     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3721   } else {
3722     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3723   }
3724 
3725   if (!rank) {
3726     /* determine max buffer needed and allocate it */
3727     maxnz = procsnz[0];
3728     for (i=1; i<size; i++) {
3729       maxnz = PetscMax(maxnz,procsnz[i]);
3730     }
3731     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3732 
3733     /* read in my part of the matrix column indices  */
3734     nz     = procsnz[0];
3735     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3736     mycols = ibuf;
3737     if (size == 1)  nz -= extra_rows;
3738     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3739     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
3740 
3741     /* read in every ones (except the last) and ship off */
3742     for (i=1; i<size-1; i++) {
3743       nz   = procsnz[i];
3744       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3745       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3746     }
3747     /* read in the stuff for the last proc */
3748     if (size != 1) {
3749       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3750       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3751       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3752       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3753     }
3754     ierr = PetscFree(cols);CHKERRQ(ierr);
3755   } else {
3756     /* determine buffer space needed for message */
3757     nz = 0;
3758     for (i=0; i<m; i++) {
3759       nz += locrowlens[i];
3760     }
3761     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3762     mycols = ibuf;
3763     /* receive message of column indices*/
3764     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3765     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3766     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3767   }
3768 
3769   /* loop over local rows, determining number of off diagonal entries */
3770   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
3771   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
3772   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3773   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3774   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3775   rowcount = 0; nzcount = 0;
3776   for (i=0; i<mbs; i++) {
3777     dcount  = 0;
3778     odcount = 0;
3779     for (j=0; j<bs; j++) {
3780       kmax = locrowlens[rowcount];
3781       for (k=0; k<kmax; k++) {
3782         tmp = mycols[nzcount++]/bs;
3783         if (!mask[tmp]) {
3784           mask[tmp] = 1;
3785           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3786           else masked1[dcount++] = tmp;
3787         }
3788       }
3789       rowcount++;
3790     }
3791 
3792     dlens[i]  = dcount;
3793     odlens[i] = odcount;
3794 
3795     /* zero out the mask elements we set */
3796     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3797     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3798   }
3799 
3800 
3801   if (!sizesset) {
3802     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3803   }
3804   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3805 
3806   if (!rank) {
3807     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3808     /* read in my part of the matrix numerical values  */
3809     nz = procsnz[0];
3810     vals = buf;
3811     mycols = ibuf;
3812     if (size == 1)  nz -= extra_rows;
3813     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3814     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
3815 
3816     /* insert into matrix */
3817     jj      = rstart*bs;
3818     for (i=0; i<m; i++) {
3819       ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3820       mycols += locrowlens[i];
3821       vals   += locrowlens[i];
3822       jj++;
3823     }
3824     /* read in other processors (except the last one) and ship out */
3825     for (i=1; i<size-1; i++) {
3826       nz   = procsnz[i];
3827       vals = buf;
3828       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3829       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3830     }
3831     /* the last proc */
3832     if (size != 1){
3833       nz   = procsnz[i] - extra_rows;
3834       vals = buf;
3835       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3836       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3837       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3838     }
3839     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3840   } else {
3841     /* receive numeric values */
3842     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3843 
3844     /* receive message of values*/
3845     vals   = buf;
3846     mycols = ibuf;
3847     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
3848     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
3849     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3850 
3851     /* insert into matrix */
3852     jj      = rstart*bs;
3853     for (i=0; i<m; i++) {
3854       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3855       mycols += locrowlens[i];
3856       vals   += locrowlens[i];
3857       jj++;
3858     }
3859   }
3860   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3861   ierr = PetscFree(buf);CHKERRQ(ierr);
3862   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3863   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3864   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3865   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3866   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3867   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3868 
3869   PetscFunctionReturn(0);
3870 }
3871 
3872 #undef __FUNCT__
3873 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3874 /*@
3875    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3876 
3877    Input Parameters:
3878 .  mat  - the matrix
3879 .  fact - factor
3880 
3881    Not Collective, each process can use a different factor
3882 
3883    Level: advanced
3884 
3885   Notes:
3886    This can also be set by the command line option: -mat_use_hash_table <fact>
3887 
3888 .keywords: matrix, hashtable, factor, HT
3889 
3890 .seealso: MatSetOption()
3891 @*/
3892 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3893 {
3894   PetscErrorCode ierr;
3895 
3896   PetscFunctionBegin;
3897   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3898   PetscFunctionReturn(0);
3899 }
3900 
3901 EXTERN_C_BEGIN
3902 #undef __FUNCT__
3903 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3904 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3905 {
3906   Mat_MPIBAIJ *baij;
3907 
3908   PetscFunctionBegin;
3909   baij = (Mat_MPIBAIJ*)mat->data;
3910   baij->ht_fact = fact;
3911   PetscFunctionReturn(0);
3912 }
3913 EXTERN_C_END
3914 
3915 #undef __FUNCT__
3916 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3917 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3918 {
3919   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3920   PetscFunctionBegin;
3921   *Ad     = a->A;
3922   *Ao     = a->B;
3923   *colmap = a->garray;
3924   PetscFunctionReturn(0);
3925 }
3926 
3927 /*
3928     Special version for direct calls from Fortran (to eliminate two function call overheads
3929 */
3930 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3931 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3932 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3933 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3934 #endif
3935 
3936 #undef __FUNCT__
3937 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3938 /*@C
3939   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3940 
3941   Collective on Mat
3942 
3943   Input Parameters:
3944 + mat - the matrix
3945 . min - number of input rows
3946 . im - input rows
3947 . nin - number of input columns
3948 . in - input columns
3949 . v - numerical values input
3950 - addvin - INSERT_VALUES or ADD_VALUES
3951 
3952   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3953 
3954   Level: advanced
3955 
3956 .seealso:   MatSetValuesBlocked()
3957 @*/
3958 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3959 {
3960   /* convert input arguments to C version */
3961   Mat             mat = *matin;
3962   PetscInt        m = *min, n = *nin;
3963   InsertMode      addv = *addvin;
3964 
3965   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3966   const MatScalar *value;
3967   MatScalar       *barray=baij->barray;
3968   PetscBool       roworiented = baij->roworiented;
3969   PetscErrorCode  ierr;
3970   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3971   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3972   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3973 
3974   PetscFunctionBegin;
3975   /* tasks normally handled by MatSetValuesBlocked() */
3976   if (mat->insertmode == NOT_SET_VALUES) {
3977     mat->insertmode = addv;
3978   }
3979 #if defined(PETSC_USE_DEBUG)
3980   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3981   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3982 #endif
3983   if (mat->assembled) {
3984     mat->was_assembled = PETSC_TRUE;
3985     mat->assembled     = PETSC_FALSE;
3986   }
3987   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3988 
3989 
3990   if(!barray) {
3991     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
3992     baij->barray = barray;
3993   }
3994 
3995   if (roworiented) {
3996     stepval = (n-1)*bs;
3997   } else {
3998     stepval = (m-1)*bs;
3999   }
4000   for (i=0; i<m; i++) {
4001     if (im[i] < 0) continue;
4002 #if defined(PETSC_USE_DEBUG)
4003     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
4004 #endif
4005     if (im[i] >= rstart && im[i] < rend) {
4006       row = im[i] - rstart;
4007       for (j=0; j<n; j++) {
4008         /* If NumCol = 1 then a copy is not required */
4009         if ((roworiented) && (n == 1)) {
4010           barray = (MatScalar*)v + i*bs2;
4011         } else if((!roworiented) && (m == 1)) {
4012           barray = (MatScalar*)v + j*bs2;
4013         } else { /* Here a copy is required */
4014           if (roworiented) {
4015             value = v + i*(stepval+bs)*bs + j*bs;
4016           } else {
4017             value = v + j*(stepval+bs)*bs + i*bs;
4018           }
4019           for (ii=0; ii<bs; ii++,value+=stepval) {
4020             for (jj=0; jj<bs; jj++) {
4021               *barray++  = *value++;
4022             }
4023           }
4024           barray -=bs2;
4025         }
4026 
4027         if (in[j] >= cstart && in[j] < cend){
4028           col  = in[j] - cstart;
4029           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4030         }
4031         else if (in[j] < 0) continue;
4032 #if defined(PETSC_USE_DEBUG)
4033         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
4034 #endif
4035         else {
4036           if (mat->was_assembled) {
4037             if (!baij->colmap) {
4038               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
4039             }
4040 
4041 #if defined(PETSC_USE_DEBUG)
4042 #if defined (PETSC_USE_CTABLE)
4043             { PetscInt data;
4044               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
4045               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4046             }
4047 #else
4048             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4049 #endif
4050 #endif
4051 #if defined (PETSC_USE_CTABLE)
4052 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4053             col  = (col - 1)/bs;
4054 #else
4055             col = (baij->colmap[in[j]] - 1)/bs;
4056 #endif
4057             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
4058               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
4059               col =  in[j];
4060             }
4061           }
4062           else col = in[j];
4063           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4064         }
4065       }
4066     } else {
4067       if (!baij->donotstash) {
4068         if (roworiented) {
4069           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4070         } else {
4071           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4072         }
4073       }
4074     }
4075   }
4076 
4077   /* task normally handled by MatSetValuesBlocked() */
4078   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
4079   PetscFunctionReturn(0);
4080 }
4081 
4082 #undef __FUNCT__
4083 #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
4084 /*@
4085      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
4086          CSR format the local rows.
4087 
4088    Collective on MPI_Comm
4089 
4090    Input Parameters:
4091 +  comm - MPI communicator
4092 .  bs - the block size, only a block size of 1 is supported
4093 .  m - number of local rows (Cannot be PETSC_DECIDE)
4094 .  n - This value should be the same as the local size used in creating the
4095        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4096        calculated if N is given) For square matrices n is almost always m.
4097 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4098 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4099 .   i - row indices
4100 .   j - column indices
4101 -   a - matrix values
4102 
4103    Output Parameter:
4104 .   mat - the matrix
4105 
4106    Level: intermediate
4107 
4108    Notes:
4109        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4110      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4111      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4112 
4113        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4114 
4115 .keywords: matrix, aij, compressed row, sparse, parallel
4116 
4117 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4118           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
4119 @*/
4120 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4121 {
4122   PetscErrorCode ierr;
4123 
4124 
4125  PetscFunctionBegin;
4126   if (i[0]) {
4127     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4128   }
4129   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4130   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4131   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4132   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
4133   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
4134   PetscFunctionReturn(0);
4135 }
4136