xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 4b4eb8d3b5c67aff3d3d02420f547e6671225154)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>   /*I  "petscmat.h"  I*/
3 
4 #include <petscblaslapack.h>
5 #include <petscsf.h>
6 
7 #undef __FUNCT__
8 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
9 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
10 {
11   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
12   PetscErrorCode ierr;
13   PetscInt       i,*idxb = 0;
14   PetscScalar    *va,*vb;
15   Vec            vtmp;
16 
17   PetscFunctionBegin;
18   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
19   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
20   if (idx) {
21     for (i=0; i<A->rmap->n; i++) {
22       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
23     }
24   }
25 
26   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
27   if (idx) {ierr = PetscMalloc1(A->rmap->n,&idxb);CHKERRQ(ierr);}
28   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
29   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
30 
31   for (i=0; i<A->rmap->n; i++) {
32     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
33       va[i] = vb[i];
34       if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);
35     }
36   }
37 
38   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
39   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
40   ierr = PetscFree(idxb);CHKERRQ(ierr);
41   ierr = VecDestroy(&vtmp);CHKERRQ(ierr);
42   PetscFunctionReturn(0);
43 }
44 
45 #undef __FUNCT__
46 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
47 PetscErrorCode  MatStoreValues_MPIBAIJ(Mat mat)
48 {
49   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
54   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 
58 #undef __FUNCT__
59 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
60 PetscErrorCode  MatRetrieveValues_MPIBAIJ(Mat mat)
61 {
62   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)mat->data;
63   PetscErrorCode ierr;
64 
65   PetscFunctionBegin;
66   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
67   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
68   PetscFunctionReturn(0);
69 }
70 
71 /*
72      Local utility routine that creates a mapping from the global column
73    number to the local number in the off-diagonal part of the local
74    storage of the matrix.  This is done in a non scalable way since the
75    length of colmap equals the global matrix length.
76 */
77 #undef __FUNCT__
78 #define __FUNCT__ "MatCreateColmap_MPIBAIJ_Private"
79 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat)
80 {
81   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
82   Mat_SeqBAIJ    *B    = (Mat_SeqBAIJ*)baij->B->data;
83   PetscErrorCode ierr;
84   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
85 
86   PetscFunctionBegin;
87 #if defined(PETSC_USE_CTABLE)
88   ierr = PetscTableCreate(baij->nbs,baij->Nbs+1,&baij->colmap);CHKERRQ(ierr);
89   for (i=0; i<nbs; i++) {
90     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1,INSERT_VALUES);CHKERRQ(ierr);
91   }
92 #else
93   ierr = PetscMalloc1((baij->Nbs+1),&baij->colmap);CHKERRQ(ierr);
94   ierr = PetscLogObjectMemory((PetscObject)mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
95   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
96   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
97 #endif
98   PetscFunctionReturn(0);
99 }
100 
101 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
102   { \
103  \
104     brow = row/bs;  \
105     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
106     rmax = aimax[brow]; nrow = ailen[brow]; \
107     bcol = col/bs; \
108     ridx = row % bs; cidx = col % bs; \
109     low  = 0; high = nrow; \
110     while (high-low > 3) { \
111       t = (low+high)/2; \
112       if (rp[t] > bcol) high = t; \
113       else              low  = t; \
114     } \
115     for (_i=low; _i<high; _i++) { \
116       if (rp[_i] > bcol) break; \
117       if (rp[_i] == bcol) { \
118         bap = ap +  bs2*_i + bs*cidx + ridx; \
119         if (addv == ADD_VALUES) *bap += value;  \
120         else                    *bap  = value;  \
121         goto a_noinsert; \
122       } \
123     } \
124     if (a->nonew == 1) goto a_noinsert; \
125     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
126     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
127     N = nrow++ - 1;  \
128     /* shift up all the later entries in this row */ \
129     for (ii=N; ii>=_i; ii--) { \
130       rp[ii+1] = rp[ii]; \
131       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
132     } \
133     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
134     rp[_i]                      = bcol;  \
135     ap[bs2*_i + bs*cidx + ridx] = value;  \
136 a_noinsert:; \
137     ailen[brow] = nrow; \
138   }
139 
140 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
141   { \
142     brow = row/bs;  \
143     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
144     rmax = bimax[brow]; nrow = bilen[brow]; \
145     bcol = col/bs; \
146     ridx = row % bs; cidx = col % bs; \
147     low  = 0; high = nrow; \
148     while (high-low > 3) { \
149       t = (low+high)/2; \
150       if (rp[t] > bcol) high = t; \
151       else              low  = t; \
152     } \
153     for (_i=low; _i<high; _i++) { \
154       if (rp[_i] > bcol) break; \
155       if (rp[_i] == bcol) { \
156         bap = ap +  bs2*_i + bs*cidx + ridx; \
157         if (addv == ADD_VALUES) *bap += value;  \
158         else                    *bap  = value;  \
159         goto b_noinsert; \
160       } \
161     } \
162     if (b->nonew == 1) goto b_noinsert; \
163     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
164     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
165     N = nrow++ - 1;  \
166     /* shift up all the later entries in this row */ \
167     for (ii=N; ii>=_i; ii--) { \
168       rp[ii+1] = rp[ii]; \
169       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
170     } \
171     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
172     rp[_i]                      = bcol;  \
173     ap[bs2*_i + bs*cidx + ridx] = value;  \
174 b_noinsert:; \
175     bilen[brow] = nrow; \
176   }
177 
178 #undef __FUNCT__
179 #define __FUNCT__ "MatSetValues_MPIBAIJ"
180 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
181 {
182   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
183   MatScalar      value;
184   PetscBool      roworiented = baij->roworiented;
185   PetscErrorCode ierr;
186   PetscInt       i,j,row,col;
187   PetscInt       rstart_orig=mat->rmap->rstart;
188   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
189   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
190 
191   /* Some Variables required in the macro */
192   Mat         A     = baij->A;
193   Mat_SeqBAIJ *a    = (Mat_SeqBAIJ*)(A)->data;
194   PetscInt    *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
195   MatScalar   *aa   =a->a;
196 
197   Mat         B     = baij->B;
198   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
199   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
200   MatScalar   *ba   =b->a;
201 
202   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
203   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
204   MatScalar *ap,*bap;
205 
206   PetscFunctionBegin;
207   for (i=0; i<m; i++) {
208     if (im[i] < 0) continue;
209 #if defined(PETSC_USE_DEBUG)
210     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
211 #endif
212     if (im[i] >= rstart_orig && im[i] < rend_orig) {
213       row = im[i] - rstart_orig;
214       for (j=0; j<n; j++) {
215         if (in[j] >= cstart_orig && in[j] < cend_orig) {
216           col = in[j] - cstart_orig;
217           if (roworiented) value = v[i*n+j];
218           else             value = v[i+j*m];
219           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
220           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
221         } else if (in[j] < 0) continue;
222 #if defined(PETSC_USE_DEBUG)
223         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
224 #endif
225         else {
226           if (mat->was_assembled) {
227             if (!baij->colmap) {
228               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
229             }
230 #if defined(PETSC_USE_CTABLE)
231             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
232             col  = col - 1;
233 #else
234             col = baij->colmap[in[j]/bs] - 1;
235 #endif
236             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
237               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
238               col  =  in[j];
239               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
240               B    = baij->B;
241               b    = (Mat_SeqBAIJ*)(B)->data;
242               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
243               ba   =b->a;
244             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", im[i], in[j]);
245             else col += in[j]%bs;
246           } else col = in[j];
247           if (roworiented) value = v[i*n+j];
248           else             value = v[i+j*m];
249           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
250           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
251         }
252       }
253     } else {
254       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
255       if (!baij->donotstash) {
256         mat->assembled = PETSC_FALSE;
257         if (roworiented) {
258           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
259         } else {
260           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
261         }
262       }
263     }
264   }
265   PetscFunctionReturn(0);
266 }
267 
268 #undef __FUNCT__
269 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
270 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
271 {
272   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
273   const PetscScalar *value;
274   MatScalar         *barray     = baij->barray;
275   PetscBool         roworiented = baij->roworiented;
276   PetscErrorCode    ierr;
277   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
278   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
279   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
280 
281   PetscFunctionBegin;
282   if (!barray) {
283     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
284     baij->barray = barray;
285   }
286 
287   if (roworiented) stepval = (n-1)*bs;
288   else stepval = (m-1)*bs;
289 
290   for (i=0; i<m; i++) {
291     if (im[i] < 0) continue;
292 #if defined(PETSC_USE_DEBUG)
293     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
294 #endif
295     if (im[i] >= rstart && im[i] < rend) {
296       row = im[i] - rstart;
297       for (j=0; j<n; j++) {
298         /* If NumCol = 1 then a copy is not required */
299         if ((roworiented) && (n == 1)) {
300           barray = (MatScalar*)v + i*bs2;
301         } else if ((!roworiented) && (m == 1)) {
302           barray = (MatScalar*)v + j*bs2;
303         } else { /* Here a copy is required */
304           if (roworiented) {
305             value = v + (i*(stepval+bs) + j)*bs;
306           } else {
307             value = v + (j*(stepval+bs) + i)*bs;
308           }
309           for (ii=0; ii<bs; ii++,value+=bs+stepval) {
310             for (jj=0; jj<bs; jj++) barray[jj] = value[jj];
311             barray += bs;
312           }
313           barray -= bs2;
314         }
315 
316         if (in[j] >= cstart && in[j] < cend) {
317           col  = in[j] - cstart;
318           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
319         } else if (in[j] < 0) continue;
320 #if defined(PETSC_USE_DEBUG)
321         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
322 #endif
323         else {
324           if (mat->was_assembled) {
325             if (!baij->colmap) {
326               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
327             }
328 
329 #if defined(PETSC_USE_DEBUG)
330 #if defined(PETSC_USE_CTABLE)
331             { PetscInt data;
332               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
333               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
334             }
335 #else
336             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
337 #endif
338 #endif
339 #if defined(PETSC_USE_CTABLE)
340             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
341             col  = (col - 1)/bs;
342 #else
343             col = (baij->colmap[in[j]] - 1)/bs;
344 #endif
345             if (col < 0 && !((Mat_SeqBAIJ*)(baij->B->data))->nonew) {
346               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
347               col  =  in[j];
348             } else if (col < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", bs*im[i], bs*in[j]);
349           } else col = in[j];
350           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
351         }
352       }
353     } else {
354       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
355       if (!baij->donotstash) {
356         if (roworiented) {
357           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
358         } else {
359           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
360         }
361       }
362     }
363   }
364   PetscFunctionReturn(0);
365 }
366 
367 #define HASH_KEY 0.6180339887
368 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
369 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
370 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
371 #undef __FUNCT__
372 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
373 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
374 {
375   Mat_MPIBAIJ    *baij       = (Mat_MPIBAIJ*)mat->data;
376   PetscBool      roworiented = baij->roworiented;
377   PetscErrorCode ierr;
378   PetscInt       i,j,row,col;
379   PetscInt       rstart_orig=mat->rmap->rstart;
380   PetscInt       rend_orig  =mat->rmap->rend,Nbs=baij->Nbs;
381   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
382   PetscReal      tmp;
383   MatScalar      **HD = baij->hd,value;
384 #if defined(PETSC_USE_DEBUG)
385   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
386 #endif
387 
388   PetscFunctionBegin;
389   for (i=0; i<m; i++) {
390 #if defined(PETSC_USE_DEBUG)
391     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
392     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
393 #endif
394     row = im[i];
395     if (row >= rstart_orig && row < rend_orig) {
396       for (j=0; j<n; j++) {
397         col = in[j];
398         if (roworiented) value = v[i*n+j];
399         else             value = v[i+j*m];
400         /* Look up PetscInto the Hash Table */
401         key = (row/bs)*Nbs+(col/bs)+1;
402         h1  = HASH(size,key,tmp);
403 
404 
405         idx = h1;
406 #if defined(PETSC_USE_DEBUG)
407         insert_ct++;
408         total_ct++;
409         if (HT[idx] != key) {
410           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
411           if (idx == size) {
412             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
413             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
414           }
415         }
416 #else
417         if (HT[idx] != key) {
418           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
419           if (idx == size) {
420             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
421             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
422           }
423         }
424 #endif
425         /* A HASH table entry is found, so insert the values at the correct address */
426         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
427         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
428       }
429     } else if (!baij->donotstash) {
430       if (roworiented) {
431         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
432       } else {
433         ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
434       }
435     }
436   }
437 #if defined(PETSC_USE_DEBUG)
438   baij->ht_total_ct  = total_ct;
439   baij->ht_insert_ct = insert_ct;
440 #endif
441   PetscFunctionReturn(0);
442 }
443 
444 #undef __FUNCT__
445 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
446 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
447 {
448   Mat_MPIBAIJ       *baij       = (Mat_MPIBAIJ*)mat->data;
449   PetscBool         roworiented = baij->roworiented;
450   PetscErrorCode    ierr;
451   PetscInt          i,j,ii,jj,row,col;
452   PetscInt          rstart=baij->rstartbs;
453   PetscInt          rend  =mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
454   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
455   PetscReal         tmp;
456   MatScalar         **HD = baij->hd,*baij_a;
457   const PetscScalar *v_t,*value;
458 #if defined(PETSC_USE_DEBUG)
459   PetscInt total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
460 #endif
461 
462   PetscFunctionBegin;
463   if (roworiented) stepval = (n-1)*bs;
464   else stepval = (m-1)*bs;
465 
466   for (i=0; i<m; i++) {
467 #if defined(PETSC_USE_DEBUG)
468     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
469     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
470 #endif
471     row = im[i];
472     v_t = v + i*nbs2;
473     if (row >= rstart && row < rend) {
474       for (j=0; j<n; j++) {
475         col = in[j];
476 
477         /* Look up into the Hash Table */
478         key = row*Nbs+col+1;
479         h1  = HASH(size,key,tmp);
480 
481         idx = h1;
482 #if defined(PETSC_USE_DEBUG)
483         total_ct++;
484         insert_ct++;
485         if (HT[idx] != key) {
486           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++) ;
487           if (idx == size) {
488             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++) ;
489             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
490           }
491         }
492 #else
493         if (HT[idx] != key) {
494           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++) ;
495           if (idx == size) {
496             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++) ;
497             if (idx == h1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
498           }
499         }
500 #endif
501         baij_a = HD[idx];
502         if (roworiented) {
503           /*value = v + i*(stepval+bs)*bs + j*bs;*/
504           /* value = v + (i*(stepval+bs)+j)*bs; */
505           value = v_t;
506           v_t  += bs;
507           if (addv == ADD_VALUES) {
508             for (ii=0; ii<bs; ii++,value+=stepval) {
509               for (jj=ii; jj<bs2; jj+=bs) {
510                 baij_a[jj] += *value++;
511               }
512             }
513           } else {
514             for (ii=0; ii<bs; ii++,value+=stepval) {
515               for (jj=ii; jj<bs2; jj+=bs) {
516                 baij_a[jj] = *value++;
517               }
518             }
519           }
520         } else {
521           value = v + j*(stepval+bs)*bs + i*bs;
522           if (addv == ADD_VALUES) {
523             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
524               for (jj=0; jj<bs; jj++) {
525                 baij_a[jj] += *value++;
526               }
527             }
528           } else {
529             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
530               for (jj=0; jj<bs; jj++) {
531                 baij_a[jj] = *value++;
532               }
533             }
534           }
535         }
536       }
537     } else {
538       if (!baij->donotstash) {
539         if (roworiented) {
540           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
541         } else {
542           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
543         }
544       }
545     }
546   }
547 #if defined(PETSC_USE_DEBUG)
548   baij->ht_total_ct  = total_ct;
549   baij->ht_insert_ct = insert_ct;
550 #endif
551   PetscFunctionReturn(0);
552 }
553 
554 #undef __FUNCT__
555 #define __FUNCT__ "MatGetValues_MPIBAIJ"
556 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
557 {
558   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
559   PetscErrorCode ierr;
560   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
561   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
562 
563   PetscFunctionBegin;
564   for (i=0; i<m; i++) {
565     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
566     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
567     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
568       row = idxm[i] - bsrstart;
569       for (j=0; j<n; j++) {
570         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
571         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
572         if (idxn[j] >= bscstart && idxn[j] < bscend) {
573           col  = idxn[j] - bscstart;
574           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
575         } else {
576           if (!baij->colmap) {
577             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
578           }
579 #if defined(PETSC_USE_CTABLE)
580           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
581           data--;
582 #else
583           data = baij->colmap[idxn[j]/bs]-1;
584 #endif
585           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
586           else {
587             col  = data + idxn[j]%bs;
588             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
589           }
590         }
591       }
592     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
593   }
594   PetscFunctionReturn(0);
595 }
596 
597 #undef __FUNCT__
598 #define __FUNCT__ "MatNorm_MPIBAIJ"
599 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
600 {
601   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
602   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
603   PetscErrorCode ierr;
604   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
605   PetscReal      sum = 0.0;
606   MatScalar      *v;
607 
608   PetscFunctionBegin;
609   if (baij->size == 1) {
610     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
611   } else {
612     if (type == NORM_FROBENIUS) {
613       v  = amat->a;
614       nz = amat->nz*bs2;
615       for (i=0; i<nz; i++) {
616         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
617       }
618       v  = bmat->a;
619       nz = bmat->nz*bs2;
620       for (i=0; i<nz; i++) {
621         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
622       }
623       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
624       *nrm = PetscSqrtReal(*nrm);
625     } else if (type == NORM_1) { /* max column sum */
626       PetscReal *tmp,*tmp2;
627       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
628       ierr = PetscMalloc2(mat->cmap->N,&tmp,mat->cmap->N,&tmp2);CHKERRQ(ierr);
629       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
630       v    = amat->a; jj = amat->j;
631       for (i=0; i<amat->nz; i++) {
632         for (j=0; j<bs; j++) {
633           col = bs*(cstart + *jj) + j; /* column index */
634           for (row=0; row<bs; row++) {
635             tmp[col] += PetscAbsScalar(*v);  v++;
636           }
637         }
638         jj++;
639       }
640       v = bmat->a; jj = bmat->j;
641       for (i=0; i<bmat->nz; i++) {
642         for (j=0; j<bs; j++) {
643           col = bs*garray[*jj] + j;
644           for (row=0; row<bs; row++) {
645             tmp[col] += PetscAbsScalar(*v); v++;
646           }
647         }
648         jj++;
649       }
650       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
651       *nrm = 0.0;
652       for (j=0; j<mat->cmap->N; j++) {
653         if (tmp2[j] > *nrm) *nrm = tmp2[j];
654       }
655       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
656     } else if (type == NORM_INFINITY) { /* max row sum */
657       PetscReal *sums;
658       ierr = PetscMalloc1(bs,&sums);CHKERRQ(ierr);
659       sum  = 0.0;
660       for (j=0; j<amat->mbs; j++) {
661         for (row=0; row<bs; row++) sums[row] = 0.0;
662         v  = amat->a + bs2*amat->i[j];
663         nz = amat->i[j+1]-amat->i[j];
664         for (i=0; i<nz; i++) {
665           for (col=0; col<bs; col++) {
666             for (row=0; row<bs; row++) {
667               sums[row] += PetscAbsScalar(*v); v++;
668             }
669           }
670         }
671         v  = bmat->a + bs2*bmat->i[j];
672         nz = bmat->i[j+1]-bmat->i[j];
673         for (i=0; i<nz; i++) {
674           for (col=0; col<bs; col++) {
675             for (row=0; row<bs; row++) {
676               sums[row] += PetscAbsScalar(*v); v++;
677             }
678           }
679         }
680         for (row=0; row<bs; row++) {
681           if (sums[row] > sum) sum = sums[row];
682         }
683       }
684       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
685       ierr = PetscFree(sums);CHKERRQ(ierr);
686     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for this norm yet");
687   }
688   PetscFunctionReturn(0);
689 }
690 
691 /*
692   Creates the hash table, and sets the table
693   This table is created only once.
694   If new entried need to be added to the matrix
695   then the hash table has to be destroyed and
696   recreated.
697 */
698 #undef __FUNCT__
699 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
700 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
701 {
702   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
703   Mat            A     = baij->A,B=baij->B;
704   Mat_SeqBAIJ    *a    = (Mat_SeqBAIJ*)A->data,*b=(Mat_SeqBAIJ*)B->data;
705   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
706   PetscErrorCode ierr;
707   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
708   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
709   PetscInt       *HT,key;
710   MatScalar      **HD;
711   PetscReal      tmp;
712 #if defined(PETSC_USE_INFO)
713   PetscInt ct=0,max=0;
714 #endif
715 
716   PetscFunctionBegin;
717   if (baij->ht) PetscFunctionReturn(0);
718 
719   baij->ht_size = (PetscInt)(factor*nz);
720   ht_size       = baij->ht_size;
721 
722   /* Allocate Memory for Hash Table */
723   ierr = PetscCalloc2(ht_size,&baij->hd,ht_size,&baij->ht);CHKERRQ(ierr);
724   HD   = baij->hd;
725   HT   = baij->ht;
726 
727   /* Loop Over A */
728   for (i=0; i<a->mbs; i++) {
729     for (j=ai[i]; j<ai[i+1]; j++) {
730       row = i+rstart;
731       col = aj[j]+cstart;
732 
733       key = row*Nbs + col + 1;
734       h1  = HASH(ht_size,key,tmp);
735       for (k=0; k<ht_size; k++) {
736         if (!HT[(h1+k)%ht_size]) {
737           HT[(h1+k)%ht_size] = key;
738           HD[(h1+k)%ht_size] = a->a + j*bs2;
739           break;
740 #if defined(PETSC_USE_INFO)
741         } else {
742           ct++;
743 #endif
744         }
745       }
746 #if defined(PETSC_USE_INFO)
747       if (k> max) max = k;
748 #endif
749     }
750   }
751   /* Loop Over B */
752   for (i=0; i<b->mbs; i++) {
753     for (j=bi[i]; j<bi[i+1]; j++) {
754       row = i+rstart;
755       col = garray[bj[j]];
756       key = row*Nbs + col + 1;
757       h1  = HASH(ht_size,key,tmp);
758       for (k=0; k<ht_size; k++) {
759         if (!HT[(h1+k)%ht_size]) {
760           HT[(h1+k)%ht_size] = key;
761           HD[(h1+k)%ht_size] = b->a + j*bs2;
762           break;
763 #if defined(PETSC_USE_INFO)
764         } else {
765           ct++;
766 #endif
767         }
768       }
769 #if defined(PETSC_USE_INFO)
770       if (k> max) max = k;
771 #endif
772     }
773   }
774 
775   /* Print Summary */
776 #if defined(PETSC_USE_INFO)
777   for (i=0,j=0; i<ht_size; i++) {
778     if (HT[i]) j++;
779   }
780   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
781 #endif
782   PetscFunctionReturn(0);
783 }
784 
785 #undef __FUNCT__
786 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
787 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
788 {
789   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
790   PetscErrorCode ierr;
791   PetscInt       nstash,reallocs;
792 
793   PetscFunctionBegin;
794   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
795 
796   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
797   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
798   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
799   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
800   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
801   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
802   PetscFunctionReturn(0);
803 }
804 
805 #undef __FUNCT__
806 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
807 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
808 {
809   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
810   Mat_SeqBAIJ    *a   =(Mat_SeqBAIJ*)baij->A->data;
811   PetscErrorCode ierr;
812   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
813   PetscInt       *row,*col;
814   PetscBool      r1,r2,r3,other_disassembled;
815   MatScalar      *val;
816   PetscMPIInt    n;
817 
818   PetscFunctionBegin;
819   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
820   if (!baij->donotstash && !mat->nooffprocentries) {
821     while (1) {
822       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
823       if (!flg) break;
824 
825       for (i=0; i<n;) {
826         /* Now identify the consecutive vals belonging to the same row */
827         for (j=i,rstart=row[j]; j<n; j++) {
828           if (row[j] != rstart) break;
829         }
830         if (j < n) ncols = j-i;
831         else       ncols = n-i;
832         /* Now assemble all these values with a single function call */
833         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
834         i    = j;
835       }
836     }
837     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
838     /* Now process the block-stash. Since the values are stashed column-oriented,
839        set the roworiented flag to column oriented, and after MatSetValues()
840        restore the original flags */
841     r1 = baij->roworiented;
842     r2 = a->roworiented;
843     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
844 
845     baij->roworiented = PETSC_FALSE;
846     a->roworiented    = PETSC_FALSE;
847 
848     (((Mat_SeqBAIJ*)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */
849     while (1) {
850       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
851       if (!flg) break;
852 
853       for (i=0; i<n;) {
854         /* Now identify the consecutive vals belonging to the same row */
855         for (j=i,rstart=row[j]; j<n; j++) {
856           if (row[j] != rstart) break;
857         }
858         if (j < n) ncols = j-i;
859         else       ncols = n-i;
860         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,mat->insertmode);CHKERRQ(ierr);
861         i    = j;
862       }
863     }
864     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
865 
866     baij->roworiented = r1;
867     a->roworiented    = r2;
868 
869     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworiented */
870   }
871 
872   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
873   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
874 
875   /* determine if any processor has disassembled, if so we must
876      also disassemble ourselfs, in order that we may reassemble. */
877   /*
878      if nonzero structure of submatrix B cannot change then we know that
879      no processor disassembled thus we can skip this stuff
880   */
881   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
882     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
883     if (mat->was_assembled && !other_disassembled) {
884       ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
885     }
886   }
887 
888   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
889     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
890   }
891   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
892   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
893 
894 #if defined(PETSC_USE_INFO)
895   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
896     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
897 
898     baij->ht_total_ct  = 0;
899     baij->ht_insert_ct = 0;
900   }
901 #endif
902   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
903     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
904 
905     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
906     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
907   }
908 
909   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
910 
911   baij->rowvalues = 0;
912 
913   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
914   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
915     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
916     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
917   }
918   PetscFunctionReturn(0);
919 }
920 
921 extern PetscErrorCode MatView_SeqBAIJ(Mat,PetscViewer);
922 #include <petscdraw.h>
923 #undef __FUNCT__
924 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
925 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
926 {
927   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
928   PetscErrorCode    ierr;
929   PetscMPIInt       rank = baij->rank;
930   PetscInt          bs   = mat->rmap->bs;
931   PetscBool         iascii,isdraw;
932   PetscViewer       sviewer;
933   PetscViewerFormat format;
934 
935   PetscFunctionBegin;
936   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
937   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
938   if (iascii) {
939     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
940     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
941       MatInfo info;
942       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
943       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
944       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
945       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
946                                                 rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
947       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
948       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
949       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
950       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
951       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
952       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
953       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
954       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
955       PetscFunctionReturn(0);
956     } else if (format == PETSC_VIEWER_ASCII_INFO) {
957       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
958       PetscFunctionReturn(0);
959     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
960       PetscFunctionReturn(0);
961     }
962   }
963 
964   if (isdraw) {
965     PetscDraw draw;
966     PetscBool isnull;
967     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
968     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
969   }
970 
971   {
972     /* assemble the entire matrix onto first processor. */
973     Mat         A;
974     Mat_SeqBAIJ *Aloc;
975     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
976     MatScalar   *a;
977     const char  *matname;
978 
979     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
980     /* Perhaps this should be the type of mat? */
981     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
982     if (!rank) {
983       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
984     } else {
985       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
986     }
987     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
988     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
989     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
990     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
991 
992     /* copy over the A part */
993     Aloc = (Mat_SeqBAIJ*)baij->A->data;
994     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
995     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
996 
997     for (i=0; i<mbs; i++) {
998       rvals[0] = bs*(baij->rstartbs + i);
999       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1000       for (j=ai[i]; j<ai[i+1]; j++) {
1001         col = (baij->cstartbs+aj[j])*bs;
1002         for (k=0; k<bs; k++) {
1003           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1004           col++; a += bs;
1005         }
1006       }
1007     }
1008     /* copy over the B part */
1009     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1010     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1011     for (i=0; i<mbs; i++) {
1012       rvals[0] = bs*(baij->rstartbs + i);
1013       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1014       for (j=ai[i]; j<ai[i+1]; j++) {
1015         col = baij->garray[aj[j]]*bs;
1016         for (k=0; k<bs; k++) {
1017           ierr      = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1018           col++; a += bs;
1019         }
1020       }
1021     }
1022     ierr = PetscFree(rvals);CHKERRQ(ierr);
1023     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1024     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1025     /*
1026        Everyone has to call to draw the matrix since the graphics waits are
1027        synchronized across all processors that share the PetscDraw object
1028     */
1029     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1030     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
1031     if (!rank) {
1032       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
1033       ierr = MatView_SeqBAIJ(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1034     }
1035     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1036     ierr = MatDestroy(&A);CHKERRQ(ierr);
1037   }
1038   PetscFunctionReturn(0);
1039 }
1040 
1041 #undef __FUNCT__
1042 #define __FUNCT__ "MatView_MPIBAIJ_Binary"
1043 static PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat,PetscViewer viewer)
1044 {
1045   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)mat->data;
1046   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)a->A->data;
1047   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)a->B->data;
1048   PetscErrorCode ierr;
1049   PetscInt       i,*row_lens,*crow_lens,bs = mat->rmap->bs,j,k,bs2=a->bs2,header[4],nz,rlen;
1050   PetscInt       *range=0,nzmax,*column_indices,cnt,col,*garray = a->garray,cstart = mat->cmap->rstart/bs,len,pcnt,l,ll;
1051   int            fd;
1052   PetscScalar    *column_values;
1053   FILE           *file;
1054   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag;
1055   PetscInt       message_count,flowcontrolcount;
1056 
1057   PetscFunctionBegin;
1058   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
1059   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr);
1060   nz   = bs2*(A->nz + B->nz);
1061   rlen = mat->rmap->n;
1062   if (!rank) {
1063     header[0] = MAT_FILE_CLASSID;
1064     header[1] = mat->rmap->N;
1065     header[2] = mat->cmap->N;
1066 
1067     ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1068     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1069     ierr = PetscBinaryWrite(fd,header,4,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1070     /* get largest number of rows any processor has */
1071     range = mat->rmap->range;
1072     for (i=1; i<size; i++) {
1073       rlen = PetscMax(rlen,range[i+1] - range[i]);
1074     }
1075   } else {
1076     ierr = MPI_Reduce(&nz,0,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1077   }
1078 
1079   ierr = PetscMalloc1((rlen/bs),&crow_lens);CHKERRQ(ierr);
1080   /* compute lengths of each row  */
1081   for (i=0; i<a->mbs; i++) {
1082     crow_lens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1083   }
1084   /* store the row lengths to the file */
1085   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1086   if (!rank) {
1087     MPI_Status status;
1088     ierr = PetscMalloc1(rlen,&row_lens);CHKERRQ(ierr);
1089     rlen = (range[1] - range[0])/bs;
1090     for (i=0; i<rlen; i++) {
1091       for (j=0; j<bs; j++) {
1092         row_lens[i*bs+j] = bs*crow_lens[i];
1093       }
1094     }
1095     ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1096     for (i=1; i<size; i++) {
1097       rlen = (range[i+1] - range[i])/bs;
1098       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1099       ierr = MPI_Recv(crow_lens,rlen,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1100       for (k=0; k<rlen; k++) {
1101         for (j=0; j<bs; j++) {
1102           row_lens[k*bs+j] = bs*crow_lens[k];
1103         }
1104       }
1105       ierr = PetscBinaryWrite(fd,row_lens,bs*rlen,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1106     }
1107     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1108     ierr = PetscFree(row_lens);CHKERRQ(ierr);
1109   } else {
1110     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1111     ierr = MPI_Send(crow_lens,mat->rmap->n/bs,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1112     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1113   }
1114   ierr = PetscFree(crow_lens);CHKERRQ(ierr);
1115 
1116   /* load up the local column indices. Include for all rows not just one for each block row since process 0 does not have the
1117      information needed to make it for each row from a block row. This does require more communication but still not more than
1118      the communication needed for the nonzero values  */
1119   nzmax = nz; /*  space a largest processor needs */
1120   ierr  = MPI_Reduce(&nz,&nzmax,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1121   ierr  = PetscMalloc1(nzmax,&column_indices);CHKERRQ(ierr);
1122   cnt   = 0;
1123   for (i=0; i<a->mbs; i++) {
1124     pcnt = cnt;
1125     for (j=B->i[i]; j<B->i[i+1]; j++) {
1126       if ((col = garray[B->j[j]]) > cstart) break;
1127       for (l=0; l<bs; l++) {
1128         column_indices[cnt++] = bs*col+l;
1129       }
1130     }
1131     for (k=A->i[i]; k<A->i[i+1]; k++) {
1132       for (l=0; l<bs; l++) {
1133         column_indices[cnt++] = bs*(A->j[k] + cstart)+l;
1134       }
1135     }
1136     for (; j<B->i[i+1]; j++) {
1137       for (l=0; l<bs; l++) {
1138         column_indices[cnt++] = bs*garray[B->j[j]]+l;
1139       }
1140     }
1141     len = cnt - pcnt;
1142     for (k=1; k<bs; k++) {
1143       ierr = PetscMemcpy(&column_indices[cnt],&column_indices[pcnt],len*sizeof(PetscInt));CHKERRQ(ierr);
1144       cnt += len;
1145     }
1146   }
1147   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1148 
1149   /* store the columns to the file */
1150   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1151   if (!rank) {
1152     MPI_Status status;
1153     ierr = PetscBinaryWrite(fd,column_indices,nz,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1154     for (i=1; i<size; i++) {
1155       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1156       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1157       ierr = MPI_Recv(column_indices,cnt,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1158       ierr = PetscBinaryWrite(fd,column_indices,cnt,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1159     }
1160     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1161   } else {
1162     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1163     ierr = MPI_Send(&cnt,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1164     ierr = MPI_Send(column_indices,cnt,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1165     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1166   }
1167   ierr = PetscFree(column_indices);CHKERRQ(ierr);
1168 
1169   /* load up the numerical values */
1170   ierr = PetscMalloc1(nzmax,&column_values);CHKERRQ(ierr);
1171   cnt  = 0;
1172   for (i=0; i<a->mbs; i++) {
1173     rlen = bs*(B->i[i+1] - B->i[i] + A->i[i+1] - A->i[i]);
1174     for (j=B->i[i]; j<B->i[i+1]; j++) {
1175       if (garray[B->j[j]] > cstart) break;
1176       for (l=0; l<bs; l++) {
1177         for (ll=0; ll<bs; ll++) {
1178           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1179         }
1180       }
1181       cnt += bs;
1182     }
1183     for (k=A->i[i]; k<A->i[i+1]; k++) {
1184       for (l=0; l<bs; l++) {
1185         for (ll=0; ll<bs; ll++) {
1186           column_values[cnt + l*rlen + ll] = A->a[bs2*k+l+bs*ll];
1187         }
1188       }
1189       cnt += bs;
1190     }
1191     for (; j<B->i[i+1]; j++) {
1192       for (l=0; l<bs; l++) {
1193         for (ll=0; ll<bs; ll++) {
1194           column_values[cnt + l*rlen + ll] = B->a[bs2*j+l+bs*ll];
1195         }
1196       }
1197       cnt += bs;
1198     }
1199     cnt += (bs-1)*rlen;
1200   }
1201   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1202 
1203   /* store the column values to the file */
1204   ierr = PetscViewerFlowControlStart(viewer,&message_count,&flowcontrolcount);CHKERRQ(ierr);
1205   if (!rank) {
1206     MPI_Status status;
1207     ierr = PetscBinaryWrite(fd,column_values,nz,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1208     for (i=1; i<size; i++) {
1209       ierr = PetscViewerFlowControlStepMaster(viewer,i,&message_count,flowcontrolcount);CHKERRQ(ierr);
1210       ierr = MPI_Recv(&cnt,1,MPIU_INT,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1211       ierr = MPI_Recv(column_values,cnt,MPIU_SCALAR,i,tag,PetscObjectComm((PetscObject)mat),&status);CHKERRQ(ierr);
1212       ierr = PetscBinaryWrite(fd,column_values,cnt,PETSC_SCALAR,PETSC_TRUE);CHKERRQ(ierr);
1213     }
1214     ierr = PetscViewerFlowControlEndMaster(viewer,&message_count);CHKERRQ(ierr);
1215   } else {
1216     ierr = PetscViewerFlowControlStepWorker(viewer,rank,&message_count);CHKERRQ(ierr);
1217     ierr = MPI_Send(&nz,1,MPIU_INT,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1218     ierr = MPI_Send(column_values,nz,MPIU_SCALAR,0,tag,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1219     ierr = PetscViewerFlowControlEndWorker(viewer,&message_count);CHKERRQ(ierr);
1220   }
1221   ierr = PetscFree(column_values);CHKERRQ(ierr);
1222 
1223   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1224   if (file) {
1225     fprintf(file,"-matload_block_size %d\n",(int)mat->rmap->bs);
1226   }
1227   PetscFunctionReturn(0);
1228 }
1229 
1230 #undef __FUNCT__
1231 #define __FUNCT__ "MatView_MPIBAIJ"
1232 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1233 {
1234   PetscErrorCode ierr;
1235   PetscBool      iascii,isdraw,issocket,isbinary;
1236 
1237   PetscFunctionBegin;
1238   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1239   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1240   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1241   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1242   if (iascii || isdraw || issocket) {
1243     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1244   } else if (isbinary) {
1245     ierr = MatView_MPIBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1246   }
1247   PetscFunctionReturn(0);
1248 }
1249 
1250 #undef __FUNCT__
1251 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1252 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1253 {
1254   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1255   PetscErrorCode ierr;
1256 
1257   PetscFunctionBegin;
1258 #if defined(PETSC_USE_LOG)
1259   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1260 #endif
1261   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1262   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1263   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
1264   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
1265 #if defined(PETSC_USE_CTABLE)
1266   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
1267 #else
1268   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1269 #endif
1270   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1271   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
1272   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
1273   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1274   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1275   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1276   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1277   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1278 
1279   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1280   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1281   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1282   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
1283   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1284   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1285   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1286   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C",NULL);CHKERRQ(ierr);
1287   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1288   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpibaij_mpibstrm_C",NULL);CHKERRQ(ierr);
1289   PetscFunctionReturn(0);
1290 }
1291 
1292 #undef __FUNCT__
1293 #define __FUNCT__ "MatMult_MPIBAIJ"
1294 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1295 {
1296   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1297   PetscErrorCode ierr;
1298   PetscInt       nt;
1299 
1300   PetscFunctionBegin;
1301   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1302   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1303   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1304   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1305   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1306   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1307   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1308   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1309   PetscFunctionReturn(0);
1310 }
1311 
1312 #undef __FUNCT__
1313 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1314 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1315 {
1316   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1317   PetscErrorCode ierr;
1318 
1319   PetscFunctionBegin;
1320   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1321   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1322   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1323   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1324   PetscFunctionReturn(0);
1325 }
1326 
1327 #undef __FUNCT__
1328 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1329 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1330 {
1331   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1332   PetscErrorCode ierr;
1333   PetscBool      merged;
1334 
1335   PetscFunctionBegin;
1336   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1337   /* do nondiagonal part */
1338   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1339   if (!merged) {
1340     /* send it on its way */
1341     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1342     /* do local part */
1343     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1344     /* receive remote parts: note this assumes the values are not actually */
1345     /* inserted in yy until the next line */
1346     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1347   } else {
1348     /* do local part */
1349     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1350     /* send it on its way */
1351     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1352     /* values actually were received in the Begin() but we need to call this nop */
1353     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1354   }
1355   PetscFunctionReturn(0);
1356 }
1357 
1358 #undef __FUNCT__
1359 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1360 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1361 {
1362   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1363   PetscErrorCode ierr;
1364 
1365   PetscFunctionBegin;
1366   /* do nondiagonal part */
1367   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1368   /* send it on its way */
1369   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1370   /* do local part */
1371   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1372   /* receive remote parts: note this assumes the values are not actually */
1373   /* inserted in yy until the next line, which is true for my implementation*/
1374   /* but is not perhaps always true. */
1375   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1376   PetscFunctionReturn(0);
1377 }
1378 
1379 /*
1380   This only works correctly for square matrices where the subblock A->A is the
1381    diagonal block
1382 */
1383 #undef __FUNCT__
1384 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1385 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1386 {
1387   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1388   PetscErrorCode ierr;
1389 
1390   PetscFunctionBegin;
1391   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1392   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1393   PetscFunctionReturn(0);
1394 }
1395 
1396 #undef __FUNCT__
1397 #define __FUNCT__ "MatScale_MPIBAIJ"
1398 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1399 {
1400   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1401   PetscErrorCode ierr;
1402 
1403   PetscFunctionBegin;
1404   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1405   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1406   PetscFunctionReturn(0);
1407 }
1408 
1409 #undef __FUNCT__
1410 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1411 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1412 {
1413   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1414   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1415   PetscErrorCode ierr;
1416   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1417   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1418   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1419 
1420   PetscFunctionBegin;
1421   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1422   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1423   mat->getrowactive = PETSC_TRUE;
1424 
1425   if (!mat->rowvalues && (idx || v)) {
1426     /*
1427         allocate enough space to hold information from the longest row.
1428     */
1429     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1430     PetscInt    max = 1,mbs = mat->mbs,tmp;
1431     for (i=0; i<mbs; i++) {
1432       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1433       if (max < tmp) max = tmp;
1434     }
1435     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1436   }
1437   lrow = row - brstart;
1438 
1439   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1440   if (!v)   {pvA = 0; pvB = 0;}
1441   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1442   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1443   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1444   nztot = nzA + nzB;
1445 
1446   cmap = mat->garray;
1447   if (v  || idx) {
1448     if (nztot) {
1449       /* Sort by increasing column numbers, assuming A and B already sorted */
1450       PetscInt imark = -1;
1451       if (v) {
1452         *v = v_p = mat->rowvalues;
1453         for (i=0; i<nzB; i++) {
1454           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1455           else break;
1456         }
1457         imark = i;
1458         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1459         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1460       }
1461       if (idx) {
1462         *idx = idx_p = mat->rowindices;
1463         if (imark > -1) {
1464           for (i=0; i<imark; i++) {
1465             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1466           }
1467         } else {
1468           for (i=0; i<nzB; i++) {
1469             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1470             else break;
1471           }
1472           imark = i;
1473         }
1474         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1475         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1476       }
1477     } else {
1478       if (idx) *idx = 0;
1479       if (v)   *v   = 0;
1480     }
1481   }
1482   *nz  = nztot;
1483   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1484   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1485   PetscFunctionReturn(0);
1486 }
1487 
1488 #undef __FUNCT__
1489 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1490 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1491 {
1492   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1493 
1494   PetscFunctionBegin;
1495   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1496   baij->getrowactive = PETSC_FALSE;
1497   PetscFunctionReturn(0);
1498 }
1499 
1500 #undef __FUNCT__
1501 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1502 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1503 {
1504   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1505   PetscErrorCode ierr;
1506 
1507   PetscFunctionBegin;
1508   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1509   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1510   PetscFunctionReturn(0);
1511 }
1512 
1513 #undef __FUNCT__
1514 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1515 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1516 {
1517   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1518   Mat            A  = a->A,B = a->B;
1519   PetscErrorCode ierr;
1520   PetscReal      isend[5],irecv[5];
1521 
1522   PetscFunctionBegin;
1523   info->block_size = (PetscReal)matin->rmap->bs;
1524 
1525   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1526 
1527   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1528   isend[3] = info->memory;  isend[4] = info->mallocs;
1529 
1530   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1531 
1532   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1533   isend[3] += info->memory;  isend[4] += info->mallocs;
1534 
1535   if (flag == MAT_LOCAL) {
1536     info->nz_used      = isend[0];
1537     info->nz_allocated = isend[1];
1538     info->nz_unneeded  = isend[2];
1539     info->memory       = isend[3];
1540     info->mallocs      = isend[4];
1541   } else if (flag == MAT_GLOBAL_MAX) {
1542     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1543 
1544     info->nz_used      = irecv[0];
1545     info->nz_allocated = irecv[1];
1546     info->nz_unneeded  = irecv[2];
1547     info->memory       = irecv[3];
1548     info->mallocs      = irecv[4];
1549   } else if (flag == MAT_GLOBAL_SUM) {
1550     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1551 
1552     info->nz_used      = irecv[0];
1553     info->nz_allocated = irecv[1];
1554     info->nz_unneeded  = irecv[2];
1555     info->memory       = irecv[3];
1556     info->mallocs      = irecv[4];
1557   } else SETERRQ1(PetscObjectComm((PetscObject)matin),PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1558   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1559   info->fill_ratio_needed = 0;
1560   info->factor_mallocs    = 0;
1561   PetscFunctionReturn(0);
1562 }
1563 
1564 #undef __FUNCT__
1565 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1566 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscBool flg)
1567 {
1568   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1569   PetscErrorCode ierr;
1570 
1571   PetscFunctionBegin;
1572   switch (op) {
1573   case MAT_NEW_NONZERO_LOCATIONS:
1574   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1575   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1576   case MAT_KEEP_NONZERO_PATTERN:
1577   case MAT_NEW_NONZERO_LOCATION_ERR:
1578     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1579     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1580     break;
1581   case MAT_ROW_ORIENTED:
1582     a->roworiented = flg;
1583 
1584     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1585     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1586     break;
1587   case MAT_NEW_DIAGONALS:
1588     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1589     break;
1590   case MAT_IGNORE_OFF_PROC_ENTRIES:
1591     a->donotstash = flg;
1592     break;
1593   case MAT_USE_HASH_TABLE:
1594     a->ht_flag = flg;
1595     break;
1596   case MAT_SYMMETRIC:
1597   case MAT_STRUCTURALLY_SYMMETRIC:
1598   case MAT_HERMITIAN:
1599   case MAT_SYMMETRY_ETERNAL:
1600     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1601     break;
1602   default:
1603     SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"unknown option %d",op);
1604   }
1605   PetscFunctionReturn(0);
1606 }
1607 
1608 #undef __FUNCT__
1609 #define __FUNCT__ "MatTranspose_MPIBAIJ"
1610 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1611 {
1612   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1613   Mat_SeqBAIJ    *Aloc;
1614   Mat            B;
1615   PetscErrorCode ierr;
1616   PetscInt       M =A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1617   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1618   MatScalar      *a;
1619 
1620   PetscFunctionBegin;
1621   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1622   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1623     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1624     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1625     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1626     /* Do not know preallocation information, but must set block size */
1627     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,PETSC_DECIDE,NULL,PETSC_DECIDE,NULL);CHKERRQ(ierr);
1628   } else {
1629     B = *matout;
1630   }
1631 
1632   /* copy over the A part */
1633   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1634   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1635   ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
1636 
1637   for (i=0; i<mbs; i++) {
1638     rvals[0] = bs*(baij->rstartbs + i);
1639     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1640     for (j=ai[i]; j<ai[i+1]; j++) {
1641       col = (baij->cstartbs+aj[j])*bs;
1642       for (k=0; k<bs; k++) {
1643         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1644 
1645         col++; a += bs;
1646       }
1647     }
1648   }
1649   /* copy over the B part */
1650   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1651   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1652   for (i=0; i<mbs; i++) {
1653     rvals[0] = bs*(baij->rstartbs + i);
1654     for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
1655     for (j=ai[i]; j<ai[i+1]; j++) {
1656       col = baij->garray[aj[j]]*bs;
1657       for (k=0; k<bs; k++) {
1658         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1659         col++;
1660         a += bs;
1661       }
1662     }
1663   }
1664   ierr = PetscFree(rvals);CHKERRQ(ierr);
1665   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1666   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1667 
1668   if (reuse == MAT_INITIAL_MATRIX || *matout != A) *matout = B;
1669   else {
1670     ierr = MatHeaderMerge(A,B);CHKERRQ(ierr);
1671   }
1672   PetscFunctionReturn(0);
1673 }
1674 
1675 #undef __FUNCT__
1676 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1677 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1678 {
1679   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1680   Mat            a     = baij->A,b = baij->B;
1681   PetscErrorCode ierr;
1682   PetscInt       s1,s2,s3;
1683 
1684   PetscFunctionBegin;
1685   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1686   if (rr) {
1687     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1688     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1689     /* Overlap communication with computation. */
1690     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1691   }
1692   if (ll) {
1693     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1694     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1695     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1696   }
1697   /* scale  the diagonal block */
1698   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1699 
1700   if (rr) {
1701     /* Do a scatter end and then right scale the off-diagonal block */
1702     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1703     ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1704   }
1705   PetscFunctionReturn(0);
1706 }
1707 
1708 #undef __FUNCT__
1709 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1710 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1711 {
1712   Mat_MPIBAIJ   *l      = (Mat_MPIBAIJ *) A->data;
1713   PetscInt      *owners = A->rmap->range;
1714   PetscInt       n      = A->rmap->n;
1715   PetscSF        sf;
1716   PetscInt      *lrows;
1717   PetscSFNode   *rrows;
1718   PetscInt       r, p = 0, len = 0;
1719   PetscErrorCode ierr;
1720 
1721   PetscFunctionBegin;
1722   /* Create SF where leaves are input rows and roots are owned rows */
1723   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1724   for (r = 0; r < n; ++r) lrows[r] = -1;
1725   if (!A->nooffproczerorows) {ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);}
1726   for (r = 0; r < N; ++r) {
1727     const PetscInt idx   = rows[r];
1728     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1729     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1730       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1731     }
1732     if (A->nooffproczerorows) {
1733       if (p != l->rank) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"MAT_NO_OFF_PROC_ZERO_ROWS set, but row %D is not owned by rank %d",idx,l->rank);
1734       lrows[len++] = idx - owners[p];
1735     } else {
1736       rrows[r].rank = p;
1737       rrows[r].index = rows[r] - owners[p];
1738     }
1739   }
1740   if (!A->nooffproczerorows) {
1741     ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1742     ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1743     /* Collect flags for rows to be zeroed */
1744     ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1745     ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1746     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1747     /* Compress and put in row numbers */
1748     for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1749   }
1750   /* fix right hand side if needed */
1751   if (x && b) {
1752     const PetscScalar *xx;
1753     PetscScalar       *bb;
1754 
1755     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1756     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1757     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
1758     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1759     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1760   }
1761 
1762   /* actually zap the local rows */
1763   /*
1764         Zero the required rows. If the "diagonal block" of the matrix
1765      is square and the user wishes to set the diagonal we use separate
1766      code so that MatSetValues() is not called for each diagonal allocating
1767      new memory, thus calling lots of mallocs and slowing things down.
1768 
1769   */
1770   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1771   ierr = MatZeroRows_SeqBAIJ(l->B,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1772   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1773     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,diag,NULL,NULL);CHKERRQ(ierr);
1774   } else if (diag != 0.0) {
1775     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,0,0);CHKERRQ(ierr);
1776     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1777        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1778     for (r = 0; r < len; ++r) {
1779       const PetscInt row = lrows[r] + A->rmap->rstart;
1780       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1781     }
1782     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1783     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1784   } else {
1785     ierr = MatZeroRows_SeqBAIJ(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr);
1786   }
1787   ierr = PetscFree(lrows);CHKERRQ(ierr);
1788 
1789   /* only change matrix nonzero state if pattern was allowed to be changed */
1790   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1791     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1792     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1793   }
1794   PetscFunctionReturn(0);
1795 }
1796 
1797 #undef __FUNCT__
1798 #define __FUNCT__ "MatZeroRowsColumns_MPIBAIJ"
1799 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
1800 {
1801   Mat_MPIBAIJ       *l = (Mat_MPIBAIJ*)A->data;
1802   PetscErrorCode    ierr;
1803   PetscMPIInt       n = A->rmap->n;
1804   PetscInt          i,j,k,r,p = 0,len = 0,row,col,count;
1805   PetscInt          *lrows,*owners = A->rmap->range;
1806   PetscSFNode       *rrows;
1807   PetscSF           sf;
1808   const PetscScalar *xx;
1809   PetscScalar       *bb,*mask;
1810   Vec               xmask,lmask;
1811   Mat_SeqBAIJ       *baij = (Mat_SeqBAIJ*)l->B->data;
1812   PetscInt           bs = A->rmap->bs, bs2 = baij->bs2;
1813   PetscScalar       *aa;
1814 
1815   PetscFunctionBegin;
1816   /* Create SF where leaves are input rows and roots are owned rows */
1817   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
1818   for (r = 0; r < n; ++r) lrows[r] = -1;
1819   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
1820   for (r = 0; r < N; ++r) {
1821     const PetscInt idx   = rows[r];
1822     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
1823     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
1824       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
1825     }
1826     rrows[r].rank  = p;
1827     rrows[r].index = rows[r] - owners[p];
1828   }
1829   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
1830   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
1831   /* Collect flags for rows to be zeroed */
1832   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1833   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
1834   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1835   /* Compress and put in row numbers */
1836   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
1837   /* zero diagonal part of matrix */
1838   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
1839   /* handle off diagonal part of matrix */
1840   ierr = MatGetVecs(A,&xmask,NULL);CHKERRQ(ierr);
1841   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
1842   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
1843   for (i=0; i<len; i++) bb[lrows[i]] = 1;
1844   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
1845   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1846   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1847   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
1848   if (x) {
1849     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1850     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1851     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1852     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1853   }
1854   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
1855   /* remove zeroed rows of off diagonal matrix */
1856   for (i = 0; i < len; ++i) {
1857     row   = lrows[i];
1858     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1859     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1860     for (k = 0; k < count; ++k) {
1861       aa[0] = 0.0;
1862       aa   += bs;
1863     }
1864   }
1865   /* loop over all elements of off process part of matrix zeroing removed columns*/
1866   for (i = 0; i < l->B->rmap->N; ++i) {
1867     row = i/bs;
1868     for (j = baij->i[row]; j < baij->i[row+1]; ++j) {
1869       for (k = 0; k < bs; ++k) {
1870         col = bs*baij->j[j] + k;
1871         if (PetscAbsScalar(mask[col])) {
1872           aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
1873           if (b) bb[i] -= aa[0]*xx[col];
1874           aa[0] = 0.0;
1875         }
1876       }
1877     }
1878   }
1879   if (x) {
1880     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
1881     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
1882   }
1883   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
1884   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
1885   ierr = PetscFree(lrows);CHKERRQ(ierr);
1886 
1887   /* only change matrix nonzero state if pattern was allowed to be changed */
1888   if (!((Mat_SeqBAIJ*)(l->A->data))->keepnonzeropattern) {
1889     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
1890     ierr = MPI_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1891   }
1892   PetscFunctionReturn(0);
1893 }
1894 
1895 #undef __FUNCT__
1896 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1897 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1898 {
1899   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1900   PetscErrorCode ierr;
1901 
1902   PetscFunctionBegin;
1903   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1904   PetscFunctionReturn(0);
1905 }
1906 
1907 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat*);
1908 
1909 #undef __FUNCT__
1910 #define __FUNCT__ "MatEqual_MPIBAIJ"
1911 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscBool  *flag)
1912 {
1913   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1914   Mat            a,b,c,d;
1915   PetscBool      flg;
1916   PetscErrorCode ierr;
1917 
1918   PetscFunctionBegin;
1919   a = matA->A; b = matA->B;
1920   c = matB->A; d = matB->B;
1921 
1922   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1923   if (flg) {
1924     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1925   }
1926   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1927   PetscFunctionReturn(0);
1928 }
1929 
1930 #undef __FUNCT__
1931 #define __FUNCT__ "MatCopy_MPIBAIJ"
1932 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1933 {
1934   PetscErrorCode ierr;
1935   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1936   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
1937 
1938   PetscFunctionBegin;
1939   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1940   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1941     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1942   } else {
1943     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1944     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1945   }
1946   PetscFunctionReturn(0);
1947 }
1948 
1949 #undef __FUNCT__
1950 #define __FUNCT__ "MatSetUp_MPIBAIJ"
1951 PetscErrorCode MatSetUp_MPIBAIJ(Mat A)
1952 {
1953   PetscErrorCode ierr;
1954 
1955   PetscFunctionBegin;
1956   ierr = MatMPIBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1957   PetscFunctionReturn(0);
1958 }
1959 
1960 #undef __FUNCT__
1961 #define __FUNCT__ "MatAXPYGetPreallocation_MPIBAIJ"
1962 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
1963 {
1964   PetscErrorCode ierr;
1965   PetscInt       bs = Y->rmap->bs,m = Y->rmap->N/bs;
1966   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
1967   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
1968 
1969   PetscFunctionBegin;
1970   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
1971   PetscFunctionReturn(0);
1972 }
1973 
1974 #undef __FUNCT__
1975 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1976 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1977 {
1978   PetscErrorCode ierr;
1979   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ*)X->data,*yy=(Mat_MPIBAIJ*)Y->data;
1980   PetscBLASInt   bnz,one=1;
1981   Mat_SeqBAIJ    *x,*y;
1982 
1983   PetscFunctionBegin;
1984   if (str == SAME_NONZERO_PATTERN) {
1985     PetscScalar alpha = a;
1986     x    = (Mat_SeqBAIJ*)xx->A->data;
1987     y    = (Mat_SeqBAIJ*)yy->A->data;
1988     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
1989     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1990     x    = (Mat_SeqBAIJ*)xx->B->data;
1991     y    = (Mat_SeqBAIJ*)yy->B->data;
1992     ierr = PetscBLASIntCast(x->nz,&bnz);CHKERRQ(ierr);
1993     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
1994     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1995   } else {
1996     Mat      B;
1997     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1998     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1999     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2000     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2001     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2002     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2003     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2004     ierr = MatSetType(B,MATMPIBAIJ);CHKERRQ(ierr);
2005     ierr = MatAXPYGetPreallocation_SeqBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2006     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2007     ierr = MatMPIBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2008     /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */
2009     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2010     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2011     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2012     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2013   }
2014   PetscFunctionReturn(0);
2015 }
2016 
2017 #undef __FUNCT__
2018 #define __FUNCT__ "MatRealPart_MPIBAIJ"
2019 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
2020 {
2021   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2022   PetscErrorCode ierr;
2023 
2024   PetscFunctionBegin;
2025   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2026   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2027   PetscFunctionReturn(0);
2028 }
2029 
2030 #undef __FUNCT__
2031 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
2032 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
2033 {
2034   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2035   PetscErrorCode ierr;
2036 
2037   PetscFunctionBegin;
2038   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2039   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2040   PetscFunctionReturn(0);
2041 }
2042 
2043 #undef __FUNCT__
2044 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
2045 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
2046 {
2047   PetscErrorCode ierr;
2048   IS             iscol_local;
2049   PetscInt       csize;
2050 
2051   PetscFunctionBegin;
2052   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
2053   if (call == MAT_REUSE_MATRIX) {
2054     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
2055     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2056   } else {
2057     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
2058   }
2059   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
2060   if (call == MAT_INITIAL_MATRIX) {
2061     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
2062     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
2063   }
2064   PetscFunctionReturn(0);
2065 }
2066 extern PetscErrorCode MatGetSubMatrices_MPIBAIJ_local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool*,PetscBool*,Mat*);
2067 #undef __FUNCT__
2068 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ_Private"
2069 /*
2070   Not great since it makes two copies of the submatrix, first an SeqBAIJ
2071   in local and then by concatenating the local matrices the end result.
2072   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
2073 */
2074 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
2075 {
2076   PetscErrorCode ierr;
2077   PetscMPIInt    rank,size;
2078   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
2079   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal,ncol,nrow;
2080   Mat            M,Mreuse;
2081   MatScalar      *vwork,*aa;
2082   MPI_Comm       comm;
2083   IS             isrow_new, iscol_new;
2084   PetscBool      idflag,allrows, allcols;
2085   Mat_SeqBAIJ    *aij;
2086 
2087   PetscFunctionBegin;
2088   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
2089   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2090   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2091   /* The compression and expansion should be avoided. Doesn't point
2092      out errors, might change the indices, hence buggey */
2093   ierr = ISCompressIndicesGeneral(mat->rmap->N,mat->rmap->n,mat->rmap->bs,1,&isrow,&isrow_new);CHKERRQ(ierr);
2094   ierr = ISCompressIndicesGeneral(mat->cmap->N,mat->cmap->n,mat->cmap->bs,1,&iscol,&iscol_new);CHKERRQ(ierr);
2095 
2096   /* Check for special case: each processor gets entire matrix columns */
2097   ierr = ISIdentity(iscol,&idflag);CHKERRQ(ierr);
2098   ierr = ISGetLocalSize(iscol,&ncol);CHKERRQ(ierr);
2099   if (idflag && ncol == mat->cmap->N) allcols = PETSC_TRUE;
2100   else allcols = PETSC_FALSE;
2101 
2102   ierr = ISIdentity(isrow,&idflag);CHKERRQ(ierr);
2103   ierr = ISGetLocalSize(isrow,&nrow);CHKERRQ(ierr);
2104   if (idflag && nrow == mat->rmap->N) allrows = PETSC_TRUE;
2105   else allrows = PETSC_FALSE;
2106 
2107   if (call ==  MAT_REUSE_MATRIX) {
2108     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
2109     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
2110     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_REUSE_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2111   } else {
2112     ierr = MatGetSubMatrices_MPIBAIJ_local(mat,1,&isrow_new,&iscol_new,MAT_INITIAL_MATRIX,&allrows,&allcols,&Mreuse);CHKERRQ(ierr);
2113   }
2114   ierr = ISDestroy(&isrow_new);CHKERRQ(ierr);
2115   ierr = ISDestroy(&iscol_new);CHKERRQ(ierr);
2116   /*
2117       m - number of local rows
2118       n - number of columns (same on all processors)
2119       rstart - first row in new global matrix generated
2120   */
2121   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2122   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
2123   m    = m/bs;
2124   n    = n/bs;
2125 
2126   if (call == MAT_INITIAL_MATRIX) {
2127     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
2128     ii  = aij->i;
2129     jj  = aij->j;
2130 
2131     /*
2132         Determine the number of non-zeros in the diagonal and off-diagonal
2133         portions of the matrix in order to do correct preallocation
2134     */
2135 
2136     /* first get start and end of "diagonal" columns */
2137     if (csize == PETSC_DECIDE) {
2138       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
2139       if (mglobal == n*bs) { /* square matrix */
2140         nlocal = m;
2141       } else {
2142         nlocal = n/size + ((n % size) > rank);
2143       }
2144     } else {
2145       nlocal = csize/bs;
2146     }
2147     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
2148     rstart = rend - nlocal;
2149     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
2150 
2151     /* next, compute all the lengths */
2152     ierr  = PetscMalloc2(m+1,&dlens,m+1,&olens);CHKERRQ(ierr);
2153     for (i=0; i<m; i++) {
2154       jend = ii[i+1] - ii[i];
2155       olen = 0;
2156       dlen = 0;
2157       for (j=0; j<jend; j++) {
2158         if (*jj < rstart || *jj >= rend) olen++;
2159         else dlen++;
2160         jj++;
2161       }
2162       olens[i] = olen;
2163       dlens[i] = dlen;
2164     }
2165     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
2166     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
2167     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
2168     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
2169     ierr = PetscFree2(dlens,olens);CHKERRQ(ierr);
2170   } else {
2171     PetscInt ml,nl;
2172 
2173     M    = *newmat;
2174     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
2175     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
2176     ierr = MatZeroEntries(M);CHKERRQ(ierr);
2177     /*
2178          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
2179        rather than the slower MatSetValues().
2180     */
2181     M->was_assembled = PETSC_TRUE;
2182     M->assembled     = PETSC_FALSE;
2183   }
2184   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
2185   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
2186   aij  = (Mat_SeqBAIJ*)(Mreuse)->data;
2187   ii   = aij->i;
2188   jj   = aij->j;
2189   aa   = aij->a;
2190   for (i=0; i<m; i++) {
2191     row   = rstart/bs + i;
2192     nz    = ii[i+1] - ii[i];
2193     cwork = jj;     jj += nz;
2194     vwork = aa;     aa += nz*bs*bs;
2195     ierr  = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
2196   }
2197 
2198   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2199   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2200   *newmat = M;
2201 
2202   /* save submatrix used in processor for next request */
2203   if (call ==  MAT_INITIAL_MATRIX) {
2204     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
2205     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
2206   }
2207   PetscFunctionReturn(0);
2208 }
2209 
2210 #undef __FUNCT__
2211 #define __FUNCT__ "MatPermute_MPIBAIJ"
2212 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2213 {
2214   MPI_Comm       comm,pcomm;
2215   PetscInt       clocal_size,nrows;
2216   const PetscInt *rows;
2217   PetscMPIInt    size;
2218   IS             crowp,lcolp;
2219   PetscErrorCode ierr;
2220 
2221   PetscFunctionBegin;
2222   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2223   /* make a collective version of 'rowp' */
2224   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2225   if (pcomm==comm) {
2226     crowp = rowp;
2227   } else {
2228     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2229     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2230     ierr = ISCreateGeneral(comm,nrows,rows,PETSC_COPY_VALUES,&crowp);CHKERRQ(ierr);
2231     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2232   }
2233   ierr = ISSetPermutation(crowp);CHKERRQ(ierr);
2234   /* make a local version of 'colp' */
2235   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2236   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2237   if (size==1) {
2238     lcolp = colp;
2239   } else {
2240     ierr = ISAllGather(colp,&lcolp);CHKERRQ(ierr);
2241   }
2242   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2243   /* now we just get the submatrix */
2244   ierr = MatGetLocalSize(A,NULL,&clocal_size);CHKERRQ(ierr);
2245   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,crowp,lcolp,clocal_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2246   /* clean up */
2247   if (pcomm!=comm) {
2248     ierr = ISDestroy(&crowp);CHKERRQ(ierr);
2249   }
2250   if (size>1) {
2251     ierr = ISDestroy(&lcolp);CHKERRQ(ierr);
2252   }
2253   PetscFunctionReturn(0);
2254 }
2255 
2256 #undef __FUNCT__
2257 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2258 PetscErrorCode  MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2259 {
2260   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*) mat->data;
2261   Mat_SeqBAIJ *B    = (Mat_SeqBAIJ*)baij->B->data;
2262 
2263   PetscFunctionBegin;
2264   if (nghosts) *nghosts = B->nbs;
2265   if (ghosts) *ghosts = baij->garray;
2266   PetscFunctionReturn(0);
2267 }
2268 
2269 #undef __FUNCT__
2270 #define __FUNCT__ "MatGetSeqNonzeroStructure_MPIBAIJ"
2271 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A,Mat *newmat)
2272 {
2273   Mat            B;
2274   Mat_MPIBAIJ    *a  = (Mat_MPIBAIJ*)A->data;
2275   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2276   Mat_SeqAIJ     *b;
2277   PetscErrorCode ierr;
2278   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2279   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2280   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2281 
2282   PetscFunctionBegin;
2283   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2284   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2285 
2286   /* ----------------------------------------------------------------
2287      Tell every processor the number of nonzeros per row
2288   */
2289   ierr = PetscMalloc1((A->rmap->N/bs),&lens);CHKERRQ(ierr);
2290   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2291     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2292   }
2293   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2294   ierr      = PetscMalloc1(2*size,&recvcounts);CHKERRQ(ierr);
2295   displs    = recvcounts + size;
2296   for (i=0; i<size; i++) {
2297     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2298     displs[i]     = A->rmap->range[i]/bs;
2299   }
2300 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2301   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2302 #else
2303   ierr = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2304 #endif
2305   /* ---------------------------------------------------------------
2306      Create the sequential matrix of the same type as the local block diagonal
2307   */
2308   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2309   ierr = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2310   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2311   ierr = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2312   b    = (Mat_SeqAIJ*)B->data;
2313 
2314   /*--------------------------------------------------------------------
2315     Copy my part of matrix column indices over
2316   */
2317   sendcount  = ad->nz + bd->nz;
2318   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2319   a_jsendbuf = ad->j;
2320   b_jsendbuf = bd->j;
2321   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2322   cnt        = 0;
2323   for (i=0; i<n; i++) {
2324 
2325     /* put in lower diagonal portion */
2326     m = bd->i[i+1] - bd->i[i];
2327     while (m > 0) {
2328       /* is it above diagonal (in bd (compressed) numbering) */
2329       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2330       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2331       m--;
2332     }
2333 
2334     /* put in diagonal portion */
2335     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2336       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2337     }
2338 
2339     /* put in upper diagonal portion */
2340     while (m-- > 0) {
2341       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2342     }
2343   }
2344   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2345 
2346   /*--------------------------------------------------------------------
2347     Gather all column indices to all processors
2348   */
2349   for (i=0; i<size; i++) {
2350     recvcounts[i] = 0;
2351     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2352       recvcounts[i] += lens[j];
2353     }
2354   }
2355   displs[0] = 0;
2356   for (i=1; i<size; i++) {
2357     displs[i] = displs[i-1] + recvcounts[i-1];
2358   }
2359 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2360   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2361 #else
2362   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2363 #endif
2364   /*--------------------------------------------------------------------
2365     Assemble the matrix into useable form (note numerical values not yet set)
2366   */
2367   /* set the b->ilen (length of each row) values */
2368   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2369   /* set the b->i indices */
2370   b->i[0] = 0;
2371   for (i=1; i<=A->rmap->N/bs; i++) {
2372     b->i[i] = b->i[i-1] + lens[i-1];
2373   }
2374   ierr = PetscFree(lens);CHKERRQ(ierr);
2375   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2376   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2377   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2378 
2379   if (A->symmetric) {
2380     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2381   } else if (A->hermitian) {
2382     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2383   } else if (A->structurally_symmetric) {
2384     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2385   }
2386   *newmat = B;
2387   PetscFunctionReturn(0);
2388 }
2389 
2390 #undef __FUNCT__
2391 #define __FUNCT__ "MatSOR_MPIBAIJ"
2392 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2393 {
2394   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2395   PetscErrorCode ierr;
2396   Vec            bb1 = 0;
2397 
2398   PetscFunctionBegin;
2399   if (flag == SOR_APPLY_UPPER) {
2400     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2401     PetscFunctionReturn(0);
2402   }
2403 
2404   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2405     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2406   }
2407 
2408   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2409     if (flag & SOR_ZERO_INITIAL_GUESS) {
2410       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2411       its--;
2412     }
2413 
2414     while (its--) {
2415       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2416       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2417 
2418       /* update rhs: bb1 = bb - B*x */
2419       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2420       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2421 
2422       /* local sweep */
2423       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2424     }
2425   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
2426     if (flag & SOR_ZERO_INITIAL_GUESS) {
2427       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2428       its--;
2429     }
2430     while (its--) {
2431       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2432       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2433 
2434       /* update rhs: bb1 = bb - B*x */
2435       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2436       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2437 
2438       /* local sweep */
2439       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2440     }
2441   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
2442     if (flag & SOR_ZERO_INITIAL_GUESS) {
2443       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2444       its--;
2445     }
2446     while (its--) {
2447       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2448       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2449 
2450       /* update rhs: bb1 = bb - B*x */
2451       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2452       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2453 
2454       /* local sweep */
2455       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2456     }
2457   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2458 
2459   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2460   PetscFunctionReturn(0);
2461 }
2462 
2463 #undef __FUNCT__
2464 #define __FUNCT__ "MatGetColumnNorms_MPIBAIJ"
2465 PetscErrorCode MatGetColumnNorms_MPIBAIJ(Mat A,NormType type,PetscReal *norms)
2466 {
2467   PetscErrorCode ierr;
2468   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ*)A->data;
2469   PetscInt       N,i,*garray = aij->garray;
2470   PetscInt       ib,jb,bs = A->rmap->bs;
2471   Mat_SeqBAIJ    *a_aij = (Mat_SeqBAIJ*) aij->A->data;
2472   MatScalar      *a_val = a_aij->a;
2473   Mat_SeqBAIJ    *b_aij = (Mat_SeqBAIJ*) aij->B->data;
2474   MatScalar      *b_val = b_aij->a;
2475   PetscReal      *work;
2476 
2477   PetscFunctionBegin;
2478   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
2479   ierr = PetscCalloc1(N,&work);CHKERRQ(ierr);
2480   if (type == NORM_2) {
2481     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2482       for (jb=0; jb<bs; jb++) {
2483         for (ib=0; ib<bs; ib++) {
2484           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val);
2485           a_val++;
2486         }
2487       }
2488     }
2489     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2490       for (jb=0; jb<bs; jb++) {
2491         for (ib=0; ib<bs; ib++) {
2492           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val);
2493           b_val++;
2494         }
2495       }
2496     }
2497   } else if (type == NORM_1) {
2498     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2499       for (jb=0; jb<bs; jb++) {
2500         for (ib=0; ib<bs; ib++) {
2501           work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val);
2502           a_val++;
2503         }
2504       }
2505     }
2506     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2507       for (jb=0; jb<bs; jb++) {
2508        for (ib=0; ib<bs; ib++) {
2509           work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val);
2510           b_val++;
2511         }
2512       }
2513     }
2514   } else if (type == NORM_INFINITY) {
2515     for (i=a_aij->i[0]; i<a_aij->i[aij->A->rmap->n/bs]; i++) {
2516       for (jb=0; jb<bs; jb++) {
2517         for (ib=0; ib<bs; ib++) {
2518           int col = A->cmap->rstart + a_aij->j[i] * bs + jb;
2519           work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]);
2520           a_val++;
2521         }
2522       }
2523     }
2524     for (i=b_aij->i[0]; i<b_aij->i[aij->B->rmap->n/bs]; i++) {
2525       for (jb=0; jb<bs; jb++) {
2526         for (ib=0; ib<bs; ib++) {
2527           int col = garray[b_aij->j[i]] * bs + jb;
2528           work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]);
2529           b_val++;
2530         }
2531       }
2532     }
2533   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
2534   if (type == NORM_INFINITY) {
2535     ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2536   } else {
2537     ierr = MPI_Allreduce(work,norms,N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2538   }
2539   ierr = PetscFree(work);CHKERRQ(ierr);
2540   if (type == NORM_2) {
2541     for (i=0; i<N; i++) norms[i] = PetscSqrtReal(norms[i]);
2542   }
2543   PetscFunctionReturn(0);
2544 }
2545 
2546 #undef __FUNCT__
2547 #define __FUNCT__ "MatInvertBlockDiagonal_MPIBAIJ"
2548 PetscErrorCode  MatInvertBlockDiagonal_MPIBAIJ(Mat A,const PetscScalar **values)
2549 {
2550   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*) A->data;
2551   PetscErrorCode ierr;
2552 
2553   PetscFunctionBegin;
2554   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2555   PetscFunctionReturn(0);
2556 }
2557 
2558 
2559 /* -------------------------------------------------------------------*/
2560 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ,
2561                                        MatGetRow_MPIBAIJ,
2562                                        MatRestoreRow_MPIBAIJ,
2563                                        MatMult_MPIBAIJ,
2564                                 /* 4*/ MatMultAdd_MPIBAIJ,
2565                                        MatMultTranspose_MPIBAIJ,
2566                                        MatMultTransposeAdd_MPIBAIJ,
2567                                        0,
2568                                        0,
2569                                        0,
2570                                 /*10*/ 0,
2571                                        0,
2572                                        0,
2573                                        MatSOR_MPIBAIJ,
2574                                        MatTranspose_MPIBAIJ,
2575                                 /*15*/ MatGetInfo_MPIBAIJ,
2576                                        MatEqual_MPIBAIJ,
2577                                        MatGetDiagonal_MPIBAIJ,
2578                                        MatDiagonalScale_MPIBAIJ,
2579                                        MatNorm_MPIBAIJ,
2580                                 /*20*/ MatAssemblyBegin_MPIBAIJ,
2581                                        MatAssemblyEnd_MPIBAIJ,
2582                                        MatSetOption_MPIBAIJ,
2583                                        MatZeroEntries_MPIBAIJ,
2584                                 /*24*/ MatZeroRows_MPIBAIJ,
2585                                        0,
2586                                        0,
2587                                        0,
2588                                        0,
2589                                 /*29*/ MatSetUp_MPIBAIJ,
2590                                        0,
2591                                        0,
2592                                        0,
2593                                        0,
2594                                 /*34*/ MatDuplicate_MPIBAIJ,
2595                                        0,
2596                                        0,
2597                                        0,
2598                                        0,
2599                                 /*39*/ MatAXPY_MPIBAIJ,
2600                                        MatGetSubMatrices_MPIBAIJ,
2601                                        MatIncreaseOverlap_MPIBAIJ,
2602                                        MatGetValues_MPIBAIJ,
2603                                        MatCopy_MPIBAIJ,
2604                                 /*44*/ 0,
2605                                        MatScale_MPIBAIJ,
2606                                        0,
2607                                        0,
2608                                        MatZeroRowsColumns_MPIBAIJ,
2609                                 /*49*/ 0,
2610                                        0,
2611                                        0,
2612                                        0,
2613                                        0,
2614                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2615                                        0,
2616                                        MatSetUnfactored_MPIBAIJ,
2617                                        MatPermute_MPIBAIJ,
2618                                        MatSetValuesBlocked_MPIBAIJ,
2619                                 /*59*/ MatGetSubMatrix_MPIBAIJ,
2620                                        MatDestroy_MPIBAIJ,
2621                                        MatView_MPIBAIJ,
2622                                        0,
2623                                        0,
2624                                 /*64*/ 0,
2625                                        0,
2626                                        0,
2627                                        0,
2628                                        0,
2629                                 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2630                                        0,
2631                                        0,
2632                                        0,
2633                                        0,
2634                                 /*74*/ 0,
2635                                        MatFDColoringApply_BAIJ,
2636                                        0,
2637                                        0,
2638                                        0,
2639                                 /*79*/ 0,
2640                                        0,
2641                                        0,
2642                                        0,
2643                                        MatLoad_MPIBAIJ,
2644                                 /*84*/ 0,
2645                                        0,
2646                                        0,
2647                                        0,
2648                                        0,
2649                                 /*89*/ 0,
2650                                        0,
2651                                        0,
2652                                        0,
2653                                        0,
2654                                 /*94*/ 0,
2655                                        0,
2656                                        0,
2657                                        0,
2658                                        0,
2659                                 /*99*/ 0,
2660                                        0,
2661                                        0,
2662                                        0,
2663                                        0,
2664                                 /*104*/0,
2665                                        MatRealPart_MPIBAIJ,
2666                                        MatImaginaryPart_MPIBAIJ,
2667                                        0,
2668                                        0,
2669                                 /*109*/0,
2670                                        0,
2671                                        0,
2672                                        0,
2673                                        0,
2674                                 /*114*/MatGetSeqNonzeroStructure_MPIBAIJ,
2675                                        0,
2676                                        MatGetGhosts_MPIBAIJ,
2677                                        0,
2678                                        0,
2679                                 /*119*/0,
2680                                        0,
2681                                        0,
2682                                        0,
2683                                        MatGetMultiProcBlock_MPIBAIJ,
2684                                 /*124*/0,
2685                                        MatGetColumnNorms_MPIBAIJ,
2686                                        MatInvertBlockDiagonal_MPIBAIJ,
2687                                        0,
2688                                        0,
2689                                /*129*/ 0,
2690                                        0,
2691                                        0,
2692                                        0,
2693                                        0,
2694                                /*134*/ 0,
2695                                        0,
2696                                        0,
2697                                        0,
2698                                        0,
2699                                /*139*/ 0,
2700                                        0,
2701                                        0,
2702                                        MatFDColoringSetUp_MPIXAIJ
2703 };
2704 
2705 #undef __FUNCT__
2706 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2707 PetscErrorCode  MatGetDiagonalBlock_MPIBAIJ(Mat A,Mat *a)
2708 {
2709   PetscFunctionBegin;
2710   *a = ((Mat_MPIBAIJ*)A->data)->A;
2711   PetscFunctionReturn(0);
2712 }
2713 
2714 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2715 
2716 #undef __FUNCT__
2717 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2718 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2719 {
2720   PetscInt       m,rstart,cstart,cend;
2721   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2722   const PetscInt *JJ    =0;
2723   PetscScalar    *values=0;
2724   PetscBool      roworiented = ((Mat_MPIBAIJ*)B->data)->roworiented;
2725   PetscErrorCode ierr;
2726 
2727   PetscFunctionBegin;
2728   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2729   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2730   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2731   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2732   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2733   m      = B->rmap->n/bs;
2734   rstart = B->rmap->rstart/bs;
2735   cstart = B->cmap->rstart/bs;
2736   cend   = B->cmap->rend/bs;
2737 
2738   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2739   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
2740   for (i=0; i<m; i++) {
2741     nz = ii[i+1] - ii[i];
2742     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2743     nz_max = PetscMax(nz_max,nz);
2744     JJ     = jj + ii[i];
2745     for (j=0; j<nz; j++) {
2746       if (*JJ >= cstart) break;
2747       JJ++;
2748     }
2749     d = 0;
2750     for (; j<nz; j++) {
2751       if (*JJ++ >= cend) break;
2752       d++;
2753     }
2754     d_nnz[i] = d;
2755     o_nnz[i] = nz - d;
2756   }
2757   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2758   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2759 
2760   values = (PetscScalar*)V;
2761   if (!values) {
2762     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
2763     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2764   }
2765   for (i=0; i<m; i++) {
2766     PetscInt          row    = i + rstart;
2767     PetscInt          ncols  = ii[i+1] - ii[i];
2768     const PetscInt    *icols = jj + ii[i];
2769     if (!roworiented) {         /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2770       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2771       ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2772     } else {                    /* block ordering does not match so we can only insert one block at a time. */
2773       PetscInt j;
2774       for (j=0; j<ncols; j++) {
2775         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2776         ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2777       }
2778     }
2779   }
2780 
2781   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2782   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2783   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2784   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2785   PetscFunctionReturn(0);
2786 }
2787 
2788 #undef __FUNCT__
2789 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2790 /*@C
2791    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2792    (the default parallel PETSc format).
2793 
2794    Collective on MPI_Comm
2795 
2796    Input Parameters:
2797 +  B - the matrix
2798 .  bs - the block size
2799 .  i - the indices into j for the start of each local row (starts with zero)
2800 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2801 -  v - optional values in the matrix
2802 
2803    Level: developer
2804 
2805    Notes: The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
2806    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
2807    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
2808    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
2809    block column and the second index is over columns within a block.
2810 
2811 .keywords: matrix, aij, compressed row, sparse, parallel
2812 
2813 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ, MatCreateMPIBAIJWithArrays(), MPIBAIJ
2814 @*/
2815 PetscErrorCode  MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2816 {
2817   PetscErrorCode ierr;
2818 
2819   PetscFunctionBegin;
2820   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2821   PetscValidType(B,1);
2822   PetscValidLogicalCollectiveInt(B,bs,2);
2823   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2824   PetscFunctionReturn(0);
2825 }
2826 
2827 #undef __FUNCT__
2828 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2829 PetscErrorCode  MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
2830 {
2831   Mat_MPIBAIJ    *b;
2832   PetscErrorCode ierr;
2833   PetscInt       i;
2834 
2835   PetscFunctionBegin;
2836   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2837   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2838   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2839   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2840 
2841   if (d_nnz) {
2842     for (i=0; i<B->rmap->n/bs; i++) {
2843       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2844     }
2845   }
2846   if (o_nnz) {
2847     for (i=0; i<B->rmap->n/bs; i++) {
2848       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2849     }
2850   }
2851 
2852   b      = (Mat_MPIBAIJ*)B->data;
2853   b->bs2 = bs*bs;
2854   b->mbs = B->rmap->n/bs;
2855   b->nbs = B->cmap->n/bs;
2856   b->Mbs = B->rmap->N/bs;
2857   b->Nbs = B->cmap->N/bs;
2858 
2859   for (i=0; i<=b->size; i++) {
2860     b->rangebs[i] = B->rmap->range[i]/bs;
2861   }
2862   b->rstartbs = B->rmap->rstart/bs;
2863   b->rendbs   = B->rmap->rend/bs;
2864   b->cstartbs = B->cmap->rstart/bs;
2865   b->cendbs   = B->cmap->rend/bs;
2866 
2867   if (!B->preallocated) {
2868     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2869     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2870     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2871     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2872     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2873     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2874     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2875     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2876     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
2877   }
2878 
2879   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2880   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2881   B->preallocated = PETSC_TRUE;
2882   PetscFunctionReturn(0);
2883 }
2884 
2885 extern PetscErrorCode  MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2886 extern PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2887 
2888 #undef __FUNCT__
2889 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
2890 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype,MatReuse reuse,Mat *adj)
2891 {
2892   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2893   PetscErrorCode ierr;
2894   Mat_SeqBAIJ    *d  = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2895   PetscInt       M   = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2896   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2897 
2898   PetscFunctionBegin;
2899   ierr  = PetscMalloc1((M+1),&ii);CHKERRQ(ierr);
2900   ii[0] = 0;
2901   for (i=0; i<M; i++) {
2902     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2903     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
2904     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2905     /* remove one from count of matrix has diagonal */
2906     for (j=id[i]; j<id[i+1]; j++) {
2907       if (jd[j] == i) {ii[i+1]--;break;}
2908     }
2909   }
2910   ierr = PetscMalloc1(ii[M],&jj);CHKERRQ(ierr);
2911   cnt  = 0;
2912   for (i=0; i<M; i++) {
2913     for (j=io[i]; j<io[i+1]; j++) {
2914       if (garray[jo[j]] > rstart) break;
2915       jj[cnt++] = garray[jo[j]];
2916     }
2917     for (k=id[i]; k<id[i+1]; k++) {
2918       if (jd[k] != i) {
2919         jj[cnt++] = rstart + jd[k];
2920       }
2921     }
2922     for (; j<io[i+1]; j++) {
2923       jj[cnt++] = garray[jo[j]];
2924     }
2925   }
2926   ierr = MatCreateMPIAdj(PetscObjectComm((PetscObject)B),M,B->cmap->N/B->rmap->bs,ii,jj,NULL,adj);CHKERRQ(ierr);
2927   PetscFunctionReturn(0);
2928 }
2929 
2930 #include <../src/mat/impls/aij/mpi/mpiaij.h>
2931 
2932 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat,MatType,MatReuse,Mat*);
2933 
2934 #undef __FUNCT__
2935 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAIJ"
2936 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
2937 {
2938   PetscErrorCode ierr;
2939   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
2940   Mat            B;
2941   Mat_MPIAIJ     *b;
2942 
2943   PetscFunctionBegin;
2944   if (!A->assembled) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Matrix must be assembled");
2945 
2946   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2947   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2948   ierr = MatSetType(B,MATMPIAIJ);CHKERRQ(ierr);
2949   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
2950   ierr = MatMPIAIJSetPreallocation(B,0,NULL,0,NULL);CHKERRQ(ierr);
2951   b    = (Mat_MPIAIJ*) B->data;
2952 
2953   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
2954   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2955   ierr = MatDisAssemble_MPIBAIJ(A);CHKERRQ(ierr);
2956   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A);CHKERRQ(ierr);
2957   ierr = MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B);CHKERRQ(ierr);
2958   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2959   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2960   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2961   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2962   if (reuse == MAT_REUSE_MATRIX) {
2963     ierr = MatHeaderReplace(A,B);CHKERRQ(ierr);
2964   } else {
2965    *newmat = B;
2966   }
2967   PetscFunctionReturn(0);
2968 }
2969 
2970 #if defined(PETSC_HAVE_MUMPS)
2971 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
2972 #endif
2973 
2974 /*MC
2975    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2976 
2977    Options Database Keys:
2978 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2979 . -mat_block_size <bs> - set the blocksize used to store the matrix
2980 - -mat_use_hash_table <fact>
2981 
2982   Level: beginner
2983 
2984 .seealso: MatCreateMPIBAIJ
2985 M*/
2986 
2987 PETSC_EXTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat,MatType,MatReuse,Mat*);
2988 
2989 #undef __FUNCT__
2990 #define __FUNCT__ "MatCreate_MPIBAIJ"
2991 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2992 {
2993   Mat_MPIBAIJ    *b;
2994   PetscErrorCode ierr;
2995   PetscBool      flg;
2996 
2997   PetscFunctionBegin;
2998   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2999   B->data = (void*)b;
3000 
3001   ierr         = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3002   B->assembled = PETSC_FALSE;
3003 
3004   B->insertmode = NOT_SET_VALUES;
3005   ierr          = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
3006   ierr          = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
3007 
3008   /* build local table of row and column ownerships */
3009   ierr = PetscMalloc1((b->size+1),&b->rangebs);CHKERRQ(ierr);
3010 
3011   /* build cache for off array entries formed */
3012   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
3013 
3014   b->donotstash  = PETSC_FALSE;
3015   b->colmap      = NULL;
3016   b->garray      = NULL;
3017   b->roworiented = PETSC_TRUE;
3018 
3019   /* stuff used in block assembly */
3020   b->barray = 0;
3021 
3022   /* stuff used for matrix vector multiply */
3023   b->lvec  = 0;
3024   b->Mvctx = 0;
3025 
3026   /* stuff for MatGetRow() */
3027   b->rowindices   = 0;
3028   b->rowvalues    = 0;
3029   b->getrowactive = PETSC_FALSE;
3030 
3031   /* hash table stuff */
3032   b->ht           = 0;
3033   b->hd           = 0;
3034   b->ht_size      = 0;
3035   b->ht_flag      = PETSC_FALSE;
3036   b->ht_fact      = 0;
3037   b->ht_total_ct  = 0;
3038   b->ht_insert_ct = 0;
3039 
3040   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
3041   b->ijonly = PETSC_FALSE;
3042 
3043   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3044   ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr);
3045   if (flg) {
3046     PetscReal fact = 1.39;
3047     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3048     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
3049     if (fact <= 1.0) fact = 1.39;
3050     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3051     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3052   }
3053   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3054 
3055 #if defined(PETSC_HAVE_MUMPS)
3056   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_baij_mumps);CHKERRQ(ierr);
3057 #endif
3058   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3059   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpiaij_C",MatConvert_MPIBAIJ_MPIAIJ);CHKERRQ(ierr);
3060   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpisbaij_C",MatConvert_MPIBAIJ_MPISBAIJ);CHKERRQ(ierr);
3061   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3062   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3063   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3064   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3065   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3066   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3067   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSetHashTableFactor_C",MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3068   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpibaij_mpibstrm_C",MatConvert_MPIBAIJ_MPIBSTRM);CHKERRQ(ierr);
3069   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3070   PetscFunctionReturn(0);
3071 }
3072 
3073 /*MC
3074    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3075 
3076    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3077    and MATMPIBAIJ otherwise.
3078 
3079    Options Database Keys:
3080 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3081 
3082   Level: beginner
3083 
3084 .seealso: MatCreateBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3085 M*/
3086 
3087 #undef __FUNCT__
3088 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3089 /*@C
3090    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3091    (block compressed row).  For good matrix assembly performance
3092    the user should preallocate the matrix storage by setting the parameters
3093    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3094    performance can be increased by more than a factor of 50.
3095 
3096    Collective on Mat
3097 
3098    Input Parameters:
3099 +  B - the matrix
3100 .  bs   - size of block
3101 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3102            submatrix  (same for all local rows)
3103 .  d_nnz - array containing the number of block nonzeros in the various block rows
3104            of the in diagonal portion of the local (possibly different for each block
3105            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry and
3106            set it even if it is zero.
3107 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3108            submatrix (same for all local rows).
3109 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3110            off-diagonal portion of the local submatrix (possibly different for
3111            each block row) or NULL.
3112 
3113    If the *_nnz parameter is given then the *_nz parameter is ignored
3114 
3115    Options Database Keys:
3116 +   -mat_block_size - size of the blocks to use
3117 -   -mat_use_hash_table <fact>
3118 
3119    Notes:
3120    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3121    than it must be used on all processors that share the object for that argument.
3122 
3123    Storage Information:
3124    For a square global matrix we define each processor's diagonal portion
3125    to be its local rows and the corresponding columns (a square submatrix);
3126    each processor's off-diagonal portion encompasses the remainder of the
3127    local matrix (a rectangular submatrix).
3128 
3129    The user can specify preallocated storage for the diagonal part of
3130    the local submatrix with either d_nz or d_nnz (not both).  Set
3131    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3132    memory allocation.  Likewise, specify preallocated storage for the
3133    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3134 
3135    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3136    the figure below we depict these three local rows and all columns (0-11).
3137 
3138 .vb
3139            0 1 2 3 4 5 6 7 8 9 10 11
3140           --------------------------
3141    row 3  |o o o d d d o o o o  o  o
3142    row 4  |o o o d d d o o o o  o  o
3143    row 5  |o o o d d d o o o o  o  o
3144           --------------------------
3145 .ve
3146 
3147    Thus, any entries in the d locations are stored in the d (diagonal)
3148    submatrix, and any entries in the o locations are stored in the
3149    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3150    stored simply in the MATSEQBAIJ format for compressed row storage.
3151 
3152    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3153    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3154    In general, for PDE problems in which most nonzeros are near the diagonal,
3155    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3156    or you will get TERRIBLE performance; see the users' manual chapter on
3157    matrices.
3158 
3159    You can call MatGetInfo() to get information on how effective the preallocation was;
3160    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3161    You can also run with the option -info and look for messages with the string
3162    malloc in them to see if additional memory allocation was needed.
3163 
3164    Level: intermediate
3165 
3166 .keywords: matrix, block, aij, compressed row, sparse, parallel
3167 
3168 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocationCSR(), PetscSplitOwnership()
3169 @*/
3170 PetscErrorCode  MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3171 {
3172   PetscErrorCode ierr;
3173 
3174   PetscFunctionBegin;
3175   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3176   PetscValidType(B,1);
3177   PetscValidLogicalCollectiveInt(B,bs,2);
3178   ierr = PetscTryMethod(B,"MatMPIBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
3179   PetscFunctionReturn(0);
3180 }
3181 
3182 #undef __FUNCT__
3183 #define __FUNCT__ "MatCreateBAIJ"
3184 /*@C
3185    MatCreateBAIJ - Creates a sparse parallel matrix in block AIJ format
3186    (block compressed row).  For good matrix assembly performance
3187    the user should preallocate the matrix storage by setting the parameters
3188    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3189    performance can be increased by more than a factor of 50.
3190 
3191    Collective on MPI_Comm
3192 
3193    Input Parameters:
3194 +  comm - MPI communicator
3195 .  bs   - size of blockk
3196 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3197            This value should be the same as the local size used in creating the
3198            y vector for the matrix-vector product y = Ax.
3199 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3200            This value should be the same as the local size used in creating the
3201            x vector for the matrix-vector product y = Ax.
3202 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3203 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3204 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3205            submatrix  (same for all local rows)
3206 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3207            of the in diagonal portion of the local (possibly different for each block
3208            row) or NULL.  If you plan to factor the matrix you must leave room for the diagonal entry
3209            and set it even if it is zero.
3210 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3211            submatrix (same for all local rows).
3212 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3213            off-diagonal portion of the local submatrix (possibly different for
3214            each block row) or NULL.
3215 
3216    Output Parameter:
3217 .  A - the matrix
3218 
3219    Options Database Keys:
3220 +   -mat_block_size - size of the blocks to use
3221 -   -mat_use_hash_table <fact>
3222 
3223    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3224    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3225    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3226 
3227    Notes:
3228    If the *_nnz parameter is given then the *_nz parameter is ignored
3229 
3230    A nonzero block is any block that as 1 or more nonzeros in it
3231 
3232    The user MUST specify either the local or global matrix dimensions
3233    (possibly both).
3234 
3235    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3236    than it must be used on all processors that share the object for that argument.
3237 
3238    Storage Information:
3239    For a square global matrix we define each processor's diagonal portion
3240    to be its local rows and the corresponding columns (a square submatrix);
3241    each processor's off-diagonal portion encompasses the remainder of the
3242    local matrix (a rectangular submatrix).
3243 
3244    The user can specify preallocated storage for the diagonal part of
3245    the local submatrix with either d_nz or d_nnz (not both).  Set
3246    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
3247    memory allocation.  Likewise, specify preallocated storage for the
3248    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3249 
3250    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3251    the figure below we depict these three local rows and all columns (0-11).
3252 
3253 .vb
3254            0 1 2 3 4 5 6 7 8 9 10 11
3255           --------------------------
3256    row 3  |o o o d d d o o o o  o  o
3257    row 4  |o o o d d d o o o o  o  o
3258    row 5  |o o o d d d o o o o  o  o
3259           --------------------------
3260 .ve
3261 
3262    Thus, any entries in the d locations are stored in the d (diagonal)
3263    submatrix, and any entries in the o locations are stored in the
3264    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3265    stored simply in the MATSEQBAIJ format for compressed row storage.
3266 
3267    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3268    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3269    In general, for PDE problems in which most nonzeros are near the diagonal,
3270    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3271    or you will get TERRIBLE performance; see the users' manual chapter on
3272    matrices.
3273 
3274    Level: intermediate
3275 
3276 .keywords: matrix, block, aij, compressed row, sparse, parallel
3277 
3278 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3279 @*/
3280 PetscErrorCode  MatCreateBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3281 {
3282   PetscErrorCode ierr;
3283   PetscMPIInt    size;
3284 
3285   PetscFunctionBegin;
3286   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3287   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3288   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3289   if (size > 1) {
3290     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3291     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3292   } else {
3293     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3294     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3295   }
3296   PetscFunctionReturn(0);
3297 }
3298 
3299 #undef __FUNCT__
3300 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3301 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3302 {
3303   Mat            mat;
3304   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3305   PetscErrorCode ierr;
3306   PetscInt       len=0;
3307 
3308   PetscFunctionBegin;
3309   *newmat = 0;
3310   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
3311   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3312   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3313   ierr    = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3314 
3315   mat->factortype   = matin->factortype;
3316   mat->preallocated = PETSC_TRUE;
3317   mat->assembled    = PETSC_TRUE;
3318   mat->insertmode   = NOT_SET_VALUES;
3319 
3320   a             = (Mat_MPIBAIJ*)mat->data;
3321   mat->rmap->bs = matin->rmap->bs;
3322   a->bs2        = oldmat->bs2;
3323   a->mbs        = oldmat->mbs;
3324   a->nbs        = oldmat->nbs;
3325   a->Mbs        = oldmat->Mbs;
3326   a->Nbs        = oldmat->Nbs;
3327 
3328   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3329   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3330 
3331   a->size         = oldmat->size;
3332   a->rank         = oldmat->rank;
3333   a->donotstash   = oldmat->donotstash;
3334   a->roworiented  = oldmat->roworiented;
3335   a->rowindices   = 0;
3336   a->rowvalues    = 0;
3337   a->getrowactive = PETSC_FALSE;
3338   a->barray       = 0;
3339   a->rstartbs     = oldmat->rstartbs;
3340   a->rendbs       = oldmat->rendbs;
3341   a->cstartbs     = oldmat->cstartbs;
3342   a->cendbs       = oldmat->cendbs;
3343 
3344   /* hash table stuff */
3345   a->ht           = 0;
3346   a->hd           = 0;
3347   a->ht_size      = 0;
3348   a->ht_flag      = oldmat->ht_flag;
3349   a->ht_fact      = oldmat->ht_fact;
3350   a->ht_total_ct  = 0;
3351   a->ht_insert_ct = 0;
3352 
3353   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3354   if (oldmat->colmap) {
3355 #if defined(PETSC_USE_CTABLE)
3356     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3357 #else
3358     ierr = PetscMalloc1((a->Nbs),&a->colmap);CHKERRQ(ierr);
3359     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3360     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3361 #endif
3362   } else a->colmap = 0;
3363 
3364   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3365     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
3366     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3367     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3368   } else a->garray = 0;
3369 
3370   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3371   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3372   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
3373   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3374   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
3375 
3376   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3377   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
3378   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3379   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
3380   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3381   *newmat = mat;
3382   PetscFunctionReturn(0);
3383 }
3384 
3385 #undef __FUNCT__
3386 #define __FUNCT__ "MatLoad_MPIBAIJ"
3387 PetscErrorCode MatLoad_MPIBAIJ(Mat newmat,PetscViewer viewer)
3388 {
3389   PetscErrorCode ierr;
3390   int            fd;
3391   PetscInt       i,nz,j,rstart,rend;
3392   PetscScalar    *vals,*buf;
3393   MPI_Comm       comm;
3394   MPI_Status     status;
3395   PetscMPIInt    rank,size,maxnz;
3396   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3397   PetscInt       *locrowlens = NULL,*procsnz = NULL,*browners = NULL;
3398   PetscInt       jj,*mycols,*ibuf,bs = newmat->rmap->bs,Mbs,mbs,extra_rows,mmax;
3399   PetscMPIInt    tag    = ((PetscObject)viewer)->tag;
3400   PetscInt       *dlens = NULL,*odlens = NULL,*mask = NULL,*masked1 = NULL,*masked2 = NULL,rowcount,odcount;
3401   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3402 
3403   PetscFunctionBegin;
3404   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3405   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3406   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3407   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3408   if (bs < 0) bs = 1;
3409 
3410   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3411   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3412   if (!rank) {
3413     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3414     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
3415     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3416   }
3417 
3418   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3419 
3420   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3421   M    = header[1]; N = header[2];
3422 
3423   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3424   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3425   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3426 
3427   /* If global sizes are set, check if they are consistent with that given in the file */
3428   if (sizesset) {
3429     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3430   }
3431   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3432   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3433 
3434   if (M != N) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Can only do square matrices");
3435 
3436   /*
3437      This code adds extra rows to make sure the number of rows is
3438      divisible by the blocksize
3439   */
3440   Mbs        = M/bs;
3441   extra_rows = bs - M + bs*Mbs;
3442   if (extra_rows == bs) extra_rows = 0;
3443   else                  Mbs++;
3444   if (extra_rows && !rank) {
3445     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3446   }
3447 
3448   /* determine ownership of all rows */
3449   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3450     mbs = Mbs/size + ((Mbs % size) > rank);
3451     m   = mbs*bs;
3452   } else { /* User set */
3453     m   = newmat->rmap->n;
3454     mbs = m/bs;
3455   }
3456   ierr = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
3457   ierr = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3458 
3459   /* process 0 needs enough room for process with most rows */
3460   if (!rank) {
3461     mmax = rowners[1];
3462     for (i=2; i<=size; i++) {
3463       mmax = PetscMax(mmax,rowners[i]);
3464     }
3465     mmax*=bs;
3466   } else mmax = -1;             /* unused, but compiler warns anyway */
3467 
3468   rowners[0] = 0;
3469   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
3470   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
3471   rstart = rowners[rank];
3472   rend   = rowners[rank+1];
3473 
3474   /* distribute row lengths to all processors */
3475   ierr = PetscMalloc1(m,&locrowlens);CHKERRQ(ierr);
3476   if (!rank) {
3477     mend = m;
3478     if (size == 1) mend = mend - extra_rows;
3479     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3480     for (j=mend; j<m; j++) locrowlens[j] = 1;
3481     ierr = PetscMalloc1(mmax,&rowlengths);CHKERRQ(ierr);
3482     ierr = PetscCalloc1(size,&procsnz);CHKERRQ(ierr);
3483     for (j=0; j<m; j++) {
3484       procsnz[0] += locrowlens[j];
3485     }
3486     for (i=1; i<size; i++) {
3487       mend = browners[i+1] - browners[i];
3488       if (i == size-1) mend = mend - extra_rows;
3489       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3490       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3491       /* calculate the number of nonzeros on each processor */
3492       for (j=0; j<browners[i+1]-browners[i]; j++) {
3493         procsnz[i] += rowlengths[j];
3494       }
3495       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3496     }
3497     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3498   } else {
3499     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3500   }
3501 
3502   if (!rank) {
3503     /* determine max buffer needed and allocate it */
3504     maxnz = procsnz[0];
3505     for (i=1; i<size; i++) {
3506       maxnz = PetscMax(maxnz,procsnz[i]);
3507     }
3508     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
3509 
3510     /* read in my part of the matrix column indices  */
3511     nz     = procsnz[0];
3512     ierr   = PetscMalloc1((nz+1),&ibuf);CHKERRQ(ierr);
3513     mycols = ibuf;
3514     if (size == 1) nz -= extra_rows;
3515     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3516     if (size == 1) {
3517       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
3518     }
3519 
3520     /* read in every ones (except the last) and ship off */
3521     for (i=1; i<size-1; i++) {
3522       nz   = procsnz[i];
3523       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3524       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3525     }
3526     /* read in the stuff for the last proc */
3527     if (size != 1) {
3528       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3529       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3530       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3531       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3532     }
3533     ierr = PetscFree(cols);CHKERRQ(ierr);
3534   } else {
3535     /* determine buffer space needed for message */
3536     nz = 0;
3537     for (i=0; i<m; i++) {
3538       nz += locrowlens[i];
3539     }
3540     ierr   = PetscMalloc1((nz+1),&ibuf);CHKERRQ(ierr);
3541     mycols = ibuf;
3542     /* receive message of column indices*/
3543     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3544     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3545     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3546   }
3547 
3548   /* loop over local rows, determining number of off diagonal entries */
3549   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
3550   ierr     = PetscCalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
3551   rowcount = 0; nzcount = 0;
3552   for (i=0; i<mbs; i++) {
3553     dcount  = 0;
3554     odcount = 0;
3555     for (j=0; j<bs; j++) {
3556       kmax = locrowlens[rowcount];
3557       for (k=0; k<kmax; k++) {
3558         tmp = mycols[nzcount++]/bs;
3559         if (!mask[tmp]) {
3560           mask[tmp] = 1;
3561           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3562           else masked1[dcount++] = tmp;
3563         }
3564       }
3565       rowcount++;
3566     }
3567 
3568     dlens[i]  = dcount;
3569     odlens[i] = odcount;
3570 
3571     /* zero out the mask elements we set */
3572     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3573     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3574   }
3575 
3576 
3577   if (!sizesset) {
3578     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3579   }
3580   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3581 
3582   if (!rank) {
3583     ierr = PetscMalloc1((maxnz+1),&buf);CHKERRQ(ierr);
3584     /* read in my part of the matrix numerical values  */
3585     nz     = procsnz[0];
3586     vals   = buf;
3587     mycols = ibuf;
3588     if (size == 1) nz -= extra_rows;
3589     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3590     if (size == 1) {
3591       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
3592     }
3593 
3594     /* insert into matrix */
3595     jj = rstart*bs;
3596     for (i=0; i<m; i++) {
3597       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3598       mycols += locrowlens[i];
3599       vals   += locrowlens[i];
3600       jj++;
3601     }
3602     /* read in other processors (except the last one) and ship out */
3603     for (i=1; i<size-1; i++) {
3604       nz   = procsnz[i];
3605       vals = buf;
3606       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3607       ierr = MPIULong_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3608     }
3609     /* the last proc */
3610     if (size != 1) {
3611       nz   = procsnz[i] - extra_rows;
3612       vals = buf;
3613       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3614       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3615       ierr = MPIULong_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3616     }
3617     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3618   } else {
3619     /* receive numeric values */
3620     ierr = PetscMalloc1((nz+1),&buf);CHKERRQ(ierr);
3621 
3622     /* receive message of values*/
3623     vals   = buf;
3624     mycols = ibuf;
3625     ierr   = MPIULong_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3626 
3627     /* insert into matrix */
3628     jj = rstart*bs;
3629     for (i=0; i<m; i++) {
3630       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3631       mycols += locrowlens[i];
3632       vals   += locrowlens[i];
3633       jj++;
3634     }
3635   }
3636   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3637   ierr = PetscFree(buf);CHKERRQ(ierr);
3638   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3639   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3640   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3641   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3642   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3643   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3644   PetscFunctionReturn(0);
3645 }
3646 
3647 #undef __FUNCT__
3648 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3649 /*@
3650    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3651 
3652    Input Parameters:
3653 .  mat  - the matrix
3654 .  fact - factor
3655 
3656    Not Collective, each process can use a different factor
3657 
3658    Level: advanced
3659 
3660   Notes:
3661    This can also be set by the command line option: -mat_use_hash_table <fact>
3662 
3663 .keywords: matrix, hashtable, factor, HT
3664 
3665 .seealso: MatSetOption()
3666 @*/
3667 PetscErrorCode  MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3668 {
3669   PetscErrorCode ierr;
3670 
3671   PetscFunctionBegin;
3672   ierr = PetscTryMethod(mat,"MatSetHashTableFactor_C",(Mat,PetscReal),(mat,fact));CHKERRQ(ierr);
3673   PetscFunctionReturn(0);
3674 }
3675 
3676 #undef __FUNCT__
3677 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3678 PetscErrorCode  MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3679 {
3680   Mat_MPIBAIJ *baij;
3681 
3682   PetscFunctionBegin;
3683   baij          = (Mat_MPIBAIJ*)mat->data;
3684   baij->ht_fact = fact;
3685   PetscFunctionReturn(0);
3686 }
3687 
3688 #undef __FUNCT__
3689 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3690 PetscErrorCode  MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
3691 {
3692   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
3693 
3694   PetscFunctionBegin;
3695   if (Ad)     *Ad     = a->A;
3696   if (Ao)     *Ao     = a->B;
3697   if (colmap) *colmap = a->garray;
3698   PetscFunctionReturn(0);
3699 }
3700 
3701 /*
3702     Special version for direct calls from Fortran (to eliminate two function call overheads
3703 */
3704 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3705 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3706 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3707 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3708 #endif
3709 
3710 #undef __FUNCT__
3711 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3712 /*@C
3713   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3714 
3715   Collective on Mat
3716 
3717   Input Parameters:
3718 + mat - the matrix
3719 . min - number of input rows
3720 . im - input rows
3721 . nin - number of input columns
3722 . in - input columns
3723 . v - numerical values input
3724 - addvin - INSERT_VALUES or ADD_VALUES
3725 
3726   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3727 
3728   Level: advanced
3729 
3730 .seealso:   MatSetValuesBlocked()
3731 @*/
3732 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3733 {
3734   /* convert input arguments to C version */
3735   Mat        mat  = *matin;
3736   PetscInt   m    = *min, n = *nin;
3737   InsertMode addv = *addvin;
3738 
3739   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3740   const MatScalar *value;
3741   MatScalar       *barray     = baij->barray;
3742   PetscBool       roworiented = baij->roworiented;
3743   PetscErrorCode  ierr;
3744   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3745   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3746   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3747 
3748   PetscFunctionBegin;
3749   /* tasks normally handled by MatSetValuesBlocked() */
3750   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
3751 #if defined(PETSC_USE_DEBUG)
3752   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3753   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3754 #endif
3755   if (mat->assembled) {
3756     mat->was_assembled = PETSC_TRUE;
3757     mat->assembled     = PETSC_FALSE;
3758   }
3759   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3760 
3761 
3762   if (!barray) {
3763     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
3764     baij->barray = barray;
3765   }
3766 
3767   if (roworiented) stepval = (n-1)*bs;
3768   else stepval = (m-1)*bs;
3769 
3770   for (i=0; i<m; i++) {
3771     if (im[i] < 0) continue;
3772 #if defined(PETSC_USE_DEBUG)
3773     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3774 #endif
3775     if (im[i] >= rstart && im[i] < rend) {
3776       row = im[i] - rstart;
3777       for (j=0; j<n; j++) {
3778         /* If NumCol = 1 then a copy is not required */
3779         if ((roworiented) && (n == 1)) {
3780           barray = (MatScalar*)v + i*bs2;
3781         } else if ((!roworiented) && (m == 1)) {
3782           barray = (MatScalar*)v + j*bs2;
3783         } else { /* Here a copy is required */
3784           if (roworiented) {
3785             value = v + i*(stepval+bs)*bs + j*bs;
3786           } else {
3787             value = v + j*(stepval+bs)*bs + i*bs;
3788           }
3789           for (ii=0; ii<bs; ii++,value+=stepval) {
3790             for (jj=0; jj<bs; jj++) {
3791               *barray++ = *value++;
3792             }
3793           }
3794           barray -=bs2;
3795         }
3796 
3797         if (in[j] >= cstart && in[j] < cend) {
3798           col  = in[j] - cstart;
3799           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3800         } else if (in[j] < 0) continue;
3801 #if defined(PETSC_USE_DEBUG)
3802         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
3803 #endif
3804         else {
3805           if (mat->was_assembled) {
3806             if (!baij->colmap) {
3807               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3808             }
3809 
3810 #if defined(PETSC_USE_DEBUG)
3811 #if defined(PETSC_USE_CTABLE)
3812             { PetscInt data;
3813               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3814               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3815             }
3816 #else
3817             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3818 #endif
3819 #endif
3820 #if defined(PETSC_USE_CTABLE)
3821             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3822             col  = (col - 1)/bs;
3823 #else
3824             col = (baij->colmap[in[j]] - 1)/bs;
3825 #endif
3826             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3827               ierr = MatDisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3828               col  =  in[j];
3829             }
3830           } else col = in[j];
3831           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3832         }
3833       }
3834     } else {
3835       if (!baij->donotstash) {
3836         if (roworiented) {
3837           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3838         } else {
3839           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3840         }
3841       }
3842     }
3843   }
3844 
3845   /* task normally handled by MatSetValuesBlocked() */
3846   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3847   PetscFunctionReturn(0);
3848 }
3849 
3850 #undef __FUNCT__
3851 #define __FUNCT__ "MatCreateMPIBAIJWithArrays"
3852 /*@
3853      MatCreateMPIBAIJWithArrays - creates a MPI BAIJ matrix using arrays that contain in standard
3854          CSR format the local rows.
3855 
3856    Collective on MPI_Comm
3857 
3858    Input Parameters:
3859 +  comm - MPI communicator
3860 .  bs - the block size, only a block size of 1 is supported
3861 .  m - number of local rows (Cannot be PETSC_DECIDE)
3862 .  n - This value should be the same as the local size used in creating the
3863        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
3864        calculated if N is given) For square matrices n is almost always m.
3865 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3866 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3867 .   i - row indices
3868 .   j - column indices
3869 -   a - matrix values
3870 
3871    Output Parameter:
3872 .   mat - the matrix
3873 
3874    Level: intermediate
3875 
3876    Notes:
3877        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
3878      thus you CANNOT change the matrix entries by changing the values of a[] after you have
3879      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3880 
3881      The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3882      the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3883      block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3884      with column-major ordering within blocks.
3885 
3886        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3887 
3888 .keywords: matrix, aij, compressed row, sparse, parallel
3889 
3890 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
3891           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
3892 @*/
3893 PetscErrorCode  MatCreateMPIBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
3894 {
3895   PetscErrorCode ierr;
3896 
3897   PetscFunctionBegin;
3898   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3899   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
3900   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3901   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
3902   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
3903   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
3904   ierr = MatMPIBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
3905   ierr = MatSetOption(*mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr);
3906   PetscFunctionReturn(0);
3907 }
3908