xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 4d5d7b3b3a01aa104ff5a4cb524f2e3f8dce98e2)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4 #include "petscblaslapack.h"
5 
6 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
7 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
8 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
9 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
10 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
11 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
12 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
13 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
14 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
15 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar);
16 
17 #undef __FUNCT__
18 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
19 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
20 {
21   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
22   PetscErrorCode ierr;
23   PetscInt       i,*idxb = 0;
24   PetscScalar    *va,*vb;
25   Vec            vtmp;
26 
27   PetscFunctionBegin;
28   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
29   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
30   if (idx) {
31     for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;}
32   }
33 
34   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
35   if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
36   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
37   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
38 
39   for (i=0; i<A->rmap->n; i++){
40     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);}
41   }
42 
43   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
44   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
45   if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);}
46   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
47   PetscFunctionReturn(0);
48 }
49 
50 EXTERN_C_BEGIN
51 #undef __FUNCT__
52 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
53 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat)
54 {
55   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
56   PetscErrorCode ierr;
57 
58   PetscFunctionBegin;
59   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
60   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
61   PetscFunctionReturn(0);
62 }
63 EXTERN_C_END
64 
65 EXTERN_C_BEGIN
66 #undef __FUNCT__
67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
68 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat)
69 {
70   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
71   PetscErrorCode ierr;
72 
73   PetscFunctionBegin;
74   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
75   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
76   PetscFunctionReturn(0);
77 }
78 EXTERN_C_END
79 
80 /*
81      Local utility routine that creates a mapping from the global column
82    number to the local number in the off-diagonal part of the local
83    storage of the matrix.  This is done in a non scalable way since the
84    length of colmap equals the global matrix length.
85 */
86 #undef __FUNCT__
87 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
88 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
89 {
90   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
91   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
92   PetscErrorCode ierr;
93   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
94 
95   PetscFunctionBegin;
96 #if defined (PETSC_USE_CTABLE)
97   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
98   for (i=0; i<nbs; i++){
99     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
100   }
101 #else
102   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
103   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
104   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
105   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
106 #endif
107   PetscFunctionReturn(0);
108 }
109 
110 #define CHUNKSIZE  10
111 
112 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
113 { \
114  \
115     brow = row/bs;  \
116     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
117     rmax = aimax[brow]; nrow = ailen[brow]; \
118       bcol = col/bs; \
119       ridx = row % bs; cidx = col % bs; \
120       low = 0; high = nrow; \
121       while (high-low > 3) { \
122         t = (low+high)/2; \
123         if (rp[t] > bcol) high = t; \
124         else              low  = t; \
125       } \
126       for (_i=low; _i<high; _i++) { \
127         if (rp[_i] > bcol) break; \
128         if (rp[_i] == bcol) { \
129           bap  = ap +  bs2*_i + bs*cidx + ridx; \
130           if (addv == ADD_VALUES) *bap += value;  \
131           else                    *bap  = value;  \
132           goto a_noinsert; \
133         } \
134       } \
135       if (a->nonew == 1) goto a_noinsert; \
136       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
137       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
138       N = nrow++ - 1;  \
139       /* shift up all the later entries in this row */ \
140       for (ii=N; ii>=_i; ii--) { \
141         rp[ii+1] = rp[ii]; \
142         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
143       } \
144       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
145       rp[_i]                      = bcol;  \
146       ap[bs2*_i + bs*cidx + ridx] = value;  \
147       a_noinsert:; \
148     ailen[brow] = nrow; \
149 }
150 
151 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
152 { \
153     brow = row/bs;  \
154     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
155     rmax = bimax[brow]; nrow = bilen[brow]; \
156       bcol = col/bs; \
157       ridx = row % bs; cidx = col % bs; \
158       low = 0; high = nrow; \
159       while (high-low > 3) { \
160         t = (low+high)/2; \
161         if (rp[t] > bcol) high = t; \
162         else              low  = t; \
163       } \
164       for (_i=low; _i<high; _i++) { \
165         if (rp[_i] > bcol) break; \
166         if (rp[_i] == bcol) { \
167           bap  = ap +  bs2*_i + bs*cidx + ridx; \
168           if (addv == ADD_VALUES) *bap += value;  \
169           else                    *bap  = value;  \
170           goto b_noinsert; \
171         } \
172       } \
173       if (b->nonew == 1) goto b_noinsert; \
174       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
175       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
176       CHKMEMQ;\
177       N = nrow++ - 1;  \
178       /* shift up all the later entries in this row */ \
179       for (ii=N; ii>=_i; ii--) { \
180         rp[ii+1] = rp[ii]; \
181         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
182       } \
183       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
184       rp[_i]                      = bcol;  \
185       ap[bs2*_i + bs*cidx + ridx] = value;  \
186       b_noinsert:; \
187     bilen[brow] = nrow; \
188 }
189 
190 #undef __FUNCT__
191 #define __FUNCT__ "MatSetValues_MPIBAIJ"
192 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
193 {
194   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
195   MatScalar      value;
196   PetscTruth     roworiented = baij->roworiented;
197   PetscErrorCode ierr;
198   PetscInt       i,j,row,col;
199   PetscInt       rstart_orig=mat->rmap->rstart;
200   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
201   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
202 
203   /* Some Variables required in the macro */
204   Mat            A = baij->A;
205   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
206   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
207   MatScalar      *aa=a->a;
208 
209   Mat            B = baij->B;
210   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
211   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
212   MatScalar      *ba=b->a;
213 
214   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
215   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
216   MatScalar      *ap,*bap;
217 
218   PetscFunctionBegin;
219   if (v) PetscValidScalarPointer(v,6);
220   for (i=0; i<m; i++) {
221     if (im[i] < 0) continue;
222 #if defined(PETSC_USE_DEBUG)
223     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
224 #endif
225     if (im[i] >= rstart_orig && im[i] < rend_orig) {
226       row = im[i] - rstart_orig;
227       for (j=0; j<n; j++) {
228         if (in[j] >= cstart_orig && in[j] < cend_orig){
229           col = in[j] - cstart_orig;
230           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
231           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
232           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
233         } else if (in[j] < 0) continue;
234 #if defined(PETSC_USE_DEBUG)
235         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap->N-1);}
236 #endif
237         else {
238           if (mat->was_assembled) {
239             if (!baij->colmap) {
240               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
241             }
242 #if defined (PETSC_USE_CTABLE)
243             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
244             col  = col - 1;
245 #else
246             col = baij->colmap[in[j]/bs] - 1;
247 #endif
248             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
249               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
250               col =  in[j];
251               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
252               B = baij->B;
253               b = (Mat_SeqBAIJ*)(B)->data;
254               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
255               ba=b->a;
256             } else col += in[j]%bs;
257           } else col = in[j];
258           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
259           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
260           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
261         }
262       }
263     } else {
264       if (!baij->donotstash) {
265         if (roworiented) {
266           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
267         } else {
268           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
269         }
270       }
271     }
272   }
273   PetscFunctionReturn(0);
274 }
275 
276 #undef __FUNCT__
277 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
278 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
279 {
280   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
281   const PetscScalar *value;
282   MatScalar         *barray=baij->barray;
283   PetscTruth        roworiented = baij->roworiented;
284   PetscErrorCode    ierr;
285   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
286   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
287   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
288 
289   PetscFunctionBegin;
290   if(!barray) {
291     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
292     baij->barray = barray;
293   }
294 
295   if (roworiented) {
296     stepval = (n-1)*bs;
297   } else {
298     stepval = (m-1)*bs;
299   }
300   for (i=0; i<m; i++) {
301     if (im[i] < 0) continue;
302 #if defined(PETSC_USE_DEBUG)
303     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
304 #endif
305     if (im[i] >= rstart && im[i] < rend) {
306       row = im[i] - rstart;
307       for (j=0; j<n; j++) {
308         /* If NumCol = 1 then a copy is not required */
309         if ((roworiented) && (n == 1)) {
310           barray = (MatScalar*)v + i*bs2;
311         } else if((!roworiented) && (m == 1)) {
312           barray = (MatScalar*)v + j*bs2;
313         } else { /* Here a copy is required */
314           if (roworiented) {
315             value = v + i*(stepval+bs)*bs + j*bs;
316           } else {
317             value = v + j*(stepval+bs)*bs + i*bs;
318           }
319           for (ii=0; ii<bs; ii++,value+=stepval) {
320             for (jj=0; jj<bs; jj++) {
321               *barray++  = *value++;
322             }
323           }
324           barray -=bs2;
325         }
326 
327         if (in[j] >= cstart && in[j] < cend){
328           col  = in[j] - cstart;
329           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
330         }
331         else if (in[j] < 0) continue;
332 #if defined(PETSC_USE_DEBUG)
333         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
334 #endif
335         else {
336           if (mat->was_assembled) {
337             if (!baij->colmap) {
338               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
339             }
340 
341 #if defined(PETSC_USE_DEBUG)
342 #if defined (PETSC_USE_CTABLE)
343             { PetscInt data;
344               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
345               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
346             }
347 #else
348             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
349 #endif
350 #endif
351 #if defined (PETSC_USE_CTABLE)
352 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
353             col  = (col - 1)/bs;
354 #else
355             col = (baij->colmap[in[j]] - 1)/bs;
356 #endif
357             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
358               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
359               col =  in[j];
360             }
361           }
362           else col = in[j];
363           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
364         }
365       }
366     } else {
367       if (!baij->donotstash) {
368         if (roworiented) {
369           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
370         } else {
371           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
372         }
373       }
374     }
375   }
376   PetscFunctionReturn(0);
377 }
378 
379 #define HASH_KEY 0.6180339887
380 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
381 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
382 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
383 #undef __FUNCT__
384 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
385 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
386 {
387   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
388   PetscTruth     roworiented = baij->roworiented;
389   PetscErrorCode ierr;
390   PetscInt       i,j,row,col;
391   PetscInt       rstart_orig=mat->rmap->rstart;
392   PetscInt       rend_orig=mat->rmap->rend,Nbs=baij->Nbs;
393   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
394   PetscReal      tmp;
395   MatScalar      **HD = baij->hd,value;
396 #if defined(PETSC_USE_DEBUG)
397   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
398 #endif
399 
400   PetscFunctionBegin;
401   if (v) PetscValidScalarPointer(v,6);
402   for (i=0; i<m; i++) {
403 #if defined(PETSC_USE_DEBUG)
404     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
405     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
406 #endif
407       row = im[i];
408     if (row >= rstart_orig && row < rend_orig) {
409       for (j=0; j<n; j++) {
410         col = in[j];
411         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
412         /* Look up PetscInto the Hash Table */
413         key = (row/bs)*Nbs+(col/bs)+1;
414         h1  = HASH(size,key,tmp);
415 
416 
417         idx = h1;
418 #if defined(PETSC_USE_DEBUG)
419         insert_ct++;
420         total_ct++;
421         if (HT[idx] != key) {
422           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
423           if (idx == size) {
424             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
425             if (idx == h1) {
426               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
427             }
428           }
429         }
430 #else
431         if (HT[idx] != key) {
432           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
433           if (idx == size) {
434             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
435             if (idx == h1) {
436               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
437             }
438           }
439         }
440 #endif
441         /* A HASH table entry is found, so insert the values at the correct address */
442         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
443         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
444       }
445     } else {
446       if (!baij->donotstash) {
447         if (roworiented) {
448           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
449         } else {
450           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
451         }
452       }
453     }
454   }
455 #if defined(PETSC_USE_DEBUG)
456   baij->ht_total_ct = total_ct;
457   baij->ht_insert_ct = insert_ct;
458 #endif
459   PetscFunctionReturn(0);
460 }
461 
462 #undef __FUNCT__
463 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
464 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
465 {
466   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
467   PetscTruth        roworiented = baij->roworiented;
468   PetscErrorCode    ierr;
469   PetscInt          i,j,ii,jj,row,col;
470   PetscInt          rstart=baij->rstartbs;
471   PetscInt          rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
472   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
473   PetscReal         tmp;
474   MatScalar         **HD = baij->hd,*baij_a;
475   const PetscScalar *v_t,*value;
476 #if defined(PETSC_USE_DEBUG)
477   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
478 #endif
479 
480   PetscFunctionBegin;
481 
482   if (roworiented) {
483     stepval = (n-1)*bs;
484   } else {
485     stepval = (m-1)*bs;
486   }
487   for (i=0; i<m; i++) {
488 #if defined(PETSC_USE_DEBUG)
489     if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
490     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
491 #endif
492     row   = im[i];
493     v_t   = v + i*nbs2;
494     if (row >= rstart && row < rend) {
495       for (j=0; j<n; j++) {
496         col = in[j];
497 
498         /* Look up into the Hash Table */
499         key = row*Nbs+col+1;
500         h1  = HASH(size,key,tmp);
501 
502         idx = h1;
503 #if defined(PETSC_USE_DEBUG)
504         total_ct++;
505         insert_ct++;
506        if (HT[idx] != key) {
507           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
508           if (idx == size) {
509             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
510             if (idx == h1) {
511               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
512             }
513           }
514         }
515 #else
516         if (HT[idx] != key) {
517           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
518           if (idx == size) {
519             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
520             if (idx == h1) {
521               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
522             }
523           }
524         }
525 #endif
526         baij_a = HD[idx];
527         if (roworiented) {
528           /*value = v + i*(stepval+bs)*bs + j*bs;*/
529           /* value = v + (i*(stepval+bs)+j)*bs; */
530           value = v_t;
531           v_t  += bs;
532           if (addv == ADD_VALUES) {
533             for (ii=0; ii<bs; ii++,value+=stepval) {
534               for (jj=ii; jj<bs2; jj+=bs) {
535                 baij_a[jj]  += *value++;
536               }
537             }
538           } else {
539             for (ii=0; ii<bs; ii++,value+=stepval) {
540               for (jj=ii; jj<bs2; jj+=bs) {
541                 baij_a[jj]  = *value++;
542               }
543             }
544           }
545         } else {
546           value = v + j*(stepval+bs)*bs + i*bs;
547           if (addv == ADD_VALUES) {
548             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
549               for (jj=0; jj<bs; jj++) {
550                 baij_a[jj]  += *value++;
551               }
552             }
553           } else {
554             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
555               for (jj=0; jj<bs; jj++) {
556                 baij_a[jj]  = *value++;
557               }
558             }
559           }
560         }
561       }
562     } else {
563       if (!baij->donotstash) {
564         if (roworiented) {
565           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
566         } else {
567           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
568         }
569       }
570     }
571   }
572 #if defined(PETSC_USE_DEBUG)
573   baij->ht_total_ct = total_ct;
574   baij->ht_insert_ct = insert_ct;
575 #endif
576   PetscFunctionReturn(0);
577 }
578 
579 #undef __FUNCT__
580 #define __FUNCT__ "MatGetValues_MPIBAIJ"
581 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
582 {
583   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
584   PetscErrorCode ierr;
585   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
586   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
587 
588   PetscFunctionBegin;
589   for (i=0; i<m; i++) {
590     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
591     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
592     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
593       row = idxm[i] - bsrstart;
594       for (j=0; j<n; j++) {
595         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
596         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
597         if (idxn[j] >= bscstart && idxn[j] < bscend){
598           col = idxn[j] - bscstart;
599           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
600         } else {
601           if (!baij->colmap) {
602             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
603           }
604 #if defined (PETSC_USE_CTABLE)
605           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
606           data --;
607 #else
608           data = baij->colmap[idxn[j]/bs]-1;
609 #endif
610           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
611           else {
612             col  = data + idxn[j]%bs;
613             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
614           }
615         }
616       }
617     } else {
618       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
619     }
620   }
621  PetscFunctionReturn(0);
622 }
623 
624 #undef __FUNCT__
625 #define __FUNCT__ "MatNorm_MPIBAIJ"
626 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
627 {
628   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
629   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
630   PetscErrorCode ierr;
631   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
632   PetscReal      sum = 0.0;
633   MatScalar      *v;
634 
635   PetscFunctionBegin;
636   if (baij->size == 1) {
637     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
638   } else {
639     if (type == NORM_FROBENIUS) {
640       v = amat->a;
641       nz = amat->nz*bs2;
642       for (i=0; i<nz; i++) {
643 #if defined(PETSC_USE_COMPLEX)
644         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
645 #else
646         sum += (*v)*(*v); v++;
647 #endif
648       }
649       v = bmat->a;
650       nz = bmat->nz*bs2;
651       for (i=0; i<nz; i++) {
652 #if defined(PETSC_USE_COMPLEX)
653         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
654 #else
655         sum += (*v)*(*v); v++;
656 #endif
657       }
658       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
659       *nrm = sqrt(*nrm);
660     } else if (type == NORM_1) { /* max column sum */
661       PetscReal *tmp,*tmp2;
662       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
663       ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
664       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
665       v = amat->a; jj = amat->j;
666       for (i=0; i<amat->nz; i++) {
667         for (j=0; j<bs; j++){
668           col = bs*(cstart + *jj) + j; /* column index */
669           for (row=0; row<bs; row++){
670             tmp[col] += PetscAbsScalar(*v);  v++;
671           }
672         }
673         jj++;
674       }
675       v = bmat->a; jj = bmat->j;
676       for (i=0; i<bmat->nz; i++) {
677         for (j=0; j<bs; j++){
678           col = bs*garray[*jj] + j;
679           for (row=0; row<bs; row++){
680             tmp[col] += PetscAbsScalar(*v); v++;
681           }
682         }
683         jj++;
684       }
685       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
686       *nrm = 0.0;
687       for (j=0; j<mat->cmap->N; j++) {
688         if (tmp2[j] > *nrm) *nrm = tmp2[j];
689       }
690       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
691     } else if (type == NORM_INFINITY) { /* max row sum */
692       PetscReal *sums;
693       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr)
694       sum = 0.0;
695       for (j=0; j<amat->mbs; j++) {
696         for (row=0; row<bs; row++) sums[row] = 0.0;
697         v = amat->a + bs2*amat->i[j];
698         nz = amat->i[j+1]-amat->i[j];
699         for (i=0; i<nz; i++) {
700           for (col=0; col<bs; col++){
701             for (row=0; row<bs; row++){
702               sums[row] += PetscAbsScalar(*v); v++;
703             }
704           }
705         }
706         v = bmat->a + bs2*bmat->i[j];
707         nz = bmat->i[j+1]-bmat->i[j];
708         for (i=0; i<nz; i++) {
709           for (col=0; col<bs; col++){
710             for (row=0; row<bs; row++){
711               sums[row] += PetscAbsScalar(*v); v++;
712             }
713           }
714         }
715         for (row=0; row<bs; row++){
716           if (sums[row] > sum) sum = sums[row];
717         }
718       }
719       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
720       ierr = PetscFree(sums);CHKERRQ(ierr);
721     } else {
722       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
723     }
724   }
725   PetscFunctionReturn(0);
726 }
727 
728 /*
729   Creates the hash table, and sets the table
730   This table is created only once.
731   If new entried need to be added to the matrix
732   then the hash table has to be destroyed and
733   recreated.
734 */
735 #undef __FUNCT__
736 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
737 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
738 {
739   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
740   Mat            A = baij->A,B=baij->B;
741   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
742   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
743   PetscErrorCode ierr;
744   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
745   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
746   PetscInt       *HT,key;
747   MatScalar      **HD;
748   PetscReal      tmp;
749 #if defined(PETSC_USE_INFO)
750   PetscInt       ct=0,max=0;
751 #endif
752 
753   PetscFunctionBegin;
754   if (baij->ht) PetscFunctionReturn(0);
755 
756   baij->ht_size = (PetscInt)(factor*nz);
757   ht_size       = baij->ht_size;
758 
759   /* Allocate Memory for Hash Table */
760   ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr);
761   ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr);
762   ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr);
763   HD   = baij->hd;
764   HT   = baij->ht;
765 
766   /* Loop Over A */
767   for (i=0; i<a->mbs; i++) {
768     for (j=ai[i]; j<ai[i+1]; j++) {
769       row = i+rstart;
770       col = aj[j]+cstart;
771 
772       key = row*Nbs + col + 1;
773       h1  = HASH(ht_size,key,tmp);
774       for (k=0; k<ht_size; k++){
775         if (!HT[(h1+k)%ht_size]) {
776           HT[(h1+k)%ht_size] = key;
777           HD[(h1+k)%ht_size] = a->a + j*bs2;
778           break;
779 #if defined(PETSC_USE_INFO)
780         } else {
781           ct++;
782 #endif
783         }
784       }
785 #if defined(PETSC_USE_INFO)
786       if (k> max) max = k;
787 #endif
788     }
789   }
790   /* Loop Over B */
791   for (i=0; i<b->mbs; i++) {
792     for (j=bi[i]; j<bi[i+1]; j++) {
793       row = i+rstart;
794       col = garray[bj[j]];
795       key = row*Nbs + col + 1;
796       h1  = HASH(ht_size,key,tmp);
797       for (k=0; k<ht_size; k++){
798         if (!HT[(h1+k)%ht_size]) {
799           HT[(h1+k)%ht_size] = key;
800           HD[(h1+k)%ht_size] = b->a + j*bs2;
801           break;
802 #if defined(PETSC_USE_INFO)
803         } else {
804           ct++;
805 #endif
806         }
807       }
808 #if defined(PETSC_USE_INFO)
809       if (k> max) max = k;
810 #endif
811     }
812   }
813 
814   /* Print Summary */
815 #if defined(PETSC_USE_INFO)
816   for (i=0,j=0; i<ht_size; i++) {
817     if (HT[i]) {j++;}
818   }
819   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
820 #endif
821   PetscFunctionReturn(0);
822 }
823 
824 #undef __FUNCT__
825 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
826 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
827 {
828   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
829   PetscErrorCode ierr;
830   PetscInt       nstash,reallocs;
831   InsertMode     addv;
832 
833   PetscFunctionBegin;
834   if (baij->donotstash) {
835     PetscFunctionReturn(0);
836   }
837 
838   /* make sure all processors are either in INSERTMODE or ADDMODE */
839   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
840   if (addv == (ADD_VALUES|INSERT_VALUES)) {
841     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
842   }
843   mat->insertmode = addv; /* in case this processor had no cache */
844 
845   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
846   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
847   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
848   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
849   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
850   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
851   PetscFunctionReturn(0);
852 }
853 
854 #undef __FUNCT__
855 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
856 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
857 {
858   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
859   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
860   PetscErrorCode ierr;
861   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
862   PetscInt       *row,*col;
863   PetscTruth     r1,r2,r3,other_disassembled;
864   MatScalar      *val;
865   InsertMode     addv = mat->insertmode;
866   PetscMPIInt    n;
867 
868   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
869   PetscFunctionBegin;
870   if (!baij->donotstash) {
871     while (1) {
872       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
873       if (!flg) break;
874 
875       for (i=0; i<n;) {
876         /* Now identify the consecutive vals belonging to the same row */
877         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
878         if (j < n) ncols = j-i;
879         else       ncols = n-i;
880         /* Now assemble all these values with a single function call */
881         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
882         i = j;
883       }
884     }
885     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
886     /* Now process the block-stash. Since the values are stashed column-oriented,
887        set the roworiented flag to column oriented, and after MatSetValues()
888        restore the original flags */
889     r1 = baij->roworiented;
890     r2 = a->roworiented;
891     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
892     baij->roworiented = PETSC_FALSE;
893     a->roworiented    = PETSC_FALSE;
894     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
895     while (1) {
896       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
897       if (!flg) break;
898 
899       for (i=0; i<n;) {
900         /* Now identify the consecutive vals belonging to the same row */
901         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
902         if (j < n) ncols = j-i;
903         else       ncols = n-i;
904         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
905         i = j;
906       }
907     }
908     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
909     baij->roworiented = r1;
910     a->roworiented    = r2;
911     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
912   }
913 
914   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
915   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
916 
917   /* determine if any processor has disassembled, if so we must
918      also disassemble ourselfs, in order that we may reassemble. */
919   /*
920      if nonzero structure of submatrix B cannot change then we know that
921      no processor disassembled thus we can skip this stuff
922   */
923   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
924     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
925     if (mat->was_assembled && !other_disassembled) {
926       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
927     }
928   }
929 
930   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
931     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
932   }
933   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_FALSE; /* b->compressedrow.use */
934   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
935   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
936 
937 #if defined(PETSC_USE_INFO)
938   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
939     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
940     baij->ht_total_ct  = 0;
941     baij->ht_insert_ct = 0;
942   }
943 #endif
944   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
945     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
946     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
947     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
948   }
949 
950   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
951   baij->rowvalues = 0;
952   PetscFunctionReturn(0);
953 }
954 
955 #undef __FUNCT__
956 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
957 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
958 {
959   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
960   PetscErrorCode    ierr;
961   PetscMPIInt       size = baij->size,rank = baij->rank;
962   PetscInt          bs = mat->rmap->bs;
963   PetscTruth        iascii,isdraw;
964   PetscViewer       sviewer;
965   PetscViewerFormat format;
966 
967   PetscFunctionBegin;
968   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
969   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
970   if (iascii) {
971     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
972     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
973       MatInfo info;
974       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
975       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
976       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
977              rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
978       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
979       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
980       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
981       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
982       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
983       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
984       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
985       PetscFunctionReturn(0);
986     } else if (format == PETSC_VIEWER_ASCII_INFO) {
987       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
988       PetscFunctionReturn(0);
989     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
990       PetscFunctionReturn(0);
991     }
992   }
993 
994   if (isdraw) {
995     PetscDraw       draw;
996     PetscTruth isnull;
997     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
998     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
999   }
1000 
1001   if (size == 1) {
1002     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1003     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1004   } else {
1005     /* assemble the entire matrix onto first processor. */
1006     Mat         A;
1007     Mat_SeqBAIJ *Aloc;
1008     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1009     MatScalar   *a;
1010 
1011     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1012     /* Perhaps this should be the type of mat? */
1013     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1014     if (!rank) {
1015       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1016     } else {
1017       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1018     }
1019     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1020     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1021     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1022 
1023     /* copy over the A part */
1024     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1025     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1026     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1027 
1028     for (i=0; i<mbs; i++) {
1029       rvals[0] = bs*(baij->rstartbs + i);
1030       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1031       for (j=ai[i]; j<ai[i+1]; j++) {
1032         col = (baij->cstartbs+aj[j])*bs;
1033         for (k=0; k<bs; k++) {
1034           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1035           col++; a += bs;
1036         }
1037       }
1038     }
1039     /* copy over the B part */
1040     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1041     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1042     for (i=0; i<mbs; i++) {
1043       rvals[0] = bs*(baij->rstartbs + i);
1044       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1045       for (j=ai[i]; j<ai[i+1]; j++) {
1046         col = baij->garray[aj[j]]*bs;
1047         for (k=0; k<bs; k++) {
1048           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1049           col++; a += bs;
1050         }
1051       }
1052     }
1053     ierr = PetscFree(rvals);CHKERRQ(ierr);
1054     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1055     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1056     /*
1057        Everyone has to call to draw the matrix since the graphics waits are
1058        synchronized across all processors that share the PetscDraw object
1059     */
1060     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1061     if (!rank) {
1062       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1063       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1064     }
1065     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1066     ierr = MatDestroy(A);CHKERRQ(ierr);
1067   }
1068   PetscFunctionReturn(0);
1069 }
1070 
1071 #undef __FUNCT__
1072 #define __FUNCT__ "MatView_MPIBAIJ"
1073 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1074 {
1075   PetscErrorCode ierr;
1076   PetscTruth     iascii,isdraw,issocket,isbinary;
1077 
1078   PetscFunctionBegin;
1079   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1080   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1081   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1082   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1083   if (iascii || isdraw || issocket || isbinary) {
1084     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1085   } else {
1086     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1087   }
1088   PetscFunctionReturn(0);
1089 }
1090 
1091 #undef __FUNCT__
1092 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1093 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1094 {
1095   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1096   PetscErrorCode ierr;
1097 
1098   PetscFunctionBegin;
1099 #if defined(PETSC_USE_LOG)
1100   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1101 #endif
1102   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1103   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1104   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1105   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1106 #if defined (PETSC_USE_CTABLE)
1107   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
1108 #else
1109   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1110 #endif
1111   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1112   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1113   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1114   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1115   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1116   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1117   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1118   ierr = PetscFree(baij);CHKERRQ(ierr);
1119 
1120   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1121   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1122   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1123   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1124   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1125   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1126   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1127   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1128   PetscFunctionReturn(0);
1129 }
1130 
1131 #undef __FUNCT__
1132 #define __FUNCT__ "MatMult_MPIBAIJ"
1133 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1134 {
1135   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1136   PetscErrorCode ierr;
1137   PetscInt       nt;
1138 
1139   PetscFunctionBegin;
1140   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1141   if (nt != A->cmap->n) {
1142     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1143   }
1144   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1145   if (nt != A->rmap->n) {
1146     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1147   }
1148   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1149   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1150   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1151   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1152   PetscFunctionReturn(0);
1153 }
1154 
1155 #undef __FUNCT__
1156 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1157 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1158 {
1159   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1160   PetscErrorCode ierr;
1161 
1162   PetscFunctionBegin;
1163   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1164   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1165   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1166   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1167   PetscFunctionReturn(0);
1168 }
1169 
1170 #undef __FUNCT__
1171 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1172 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1173 {
1174   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1175   PetscErrorCode ierr;
1176   PetscTruth     merged;
1177 
1178   PetscFunctionBegin;
1179   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1180   /* do nondiagonal part */
1181   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1182   if (!merged) {
1183     /* send it on its way */
1184     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1185     /* do local part */
1186     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1187     /* receive remote parts: note this assumes the values are not actually */
1188     /* inserted in yy until the next line */
1189     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1190   } else {
1191     /* do local part */
1192     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1193     /* send it on its way */
1194     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1195     /* values actually were received in the Begin() but we need to call this nop */
1196     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1197   }
1198   PetscFunctionReturn(0);
1199 }
1200 
1201 #undef __FUNCT__
1202 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1203 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1204 {
1205   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1206   PetscErrorCode ierr;
1207 
1208   PetscFunctionBegin;
1209   /* do nondiagonal part */
1210   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1211   /* send it on its way */
1212   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1213   /* do local part */
1214   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1215   /* receive remote parts: note this assumes the values are not actually */
1216   /* inserted in yy until the next line, which is true for my implementation*/
1217   /* but is not perhaps always true. */
1218   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1219   PetscFunctionReturn(0);
1220 }
1221 
1222 /*
1223   This only works correctly for square matrices where the subblock A->A is the
1224    diagonal block
1225 */
1226 #undef __FUNCT__
1227 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1228 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1229 {
1230   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1231   PetscErrorCode ierr;
1232 
1233   PetscFunctionBegin;
1234   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1235   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1236   PetscFunctionReturn(0);
1237 }
1238 
1239 #undef __FUNCT__
1240 #define __FUNCT__ "MatScale_MPIBAIJ"
1241 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1242 {
1243   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1244   PetscErrorCode ierr;
1245 
1246   PetscFunctionBegin;
1247   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1248   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1249   PetscFunctionReturn(0);
1250 }
1251 
1252 #undef __FUNCT__
1253 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1254 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1255 {
1256   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1257   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1258   PetscErrorCode ierr;
1259   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1260   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1261   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1262 
1263   PetscFunctionBegin;
1264   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1265   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1266   mat->getrowactive = PETSC_TRUE;
1267 
1268   if (!mat->rowvalues && (idx || v)) {
1269     /*
1270         allocate enough space to hold information from the longest row.
1271     */
1272     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1273     PetscInt     max = 1,mbs = mat->mbs,tmp;
1274     for (i=0; i<mbs; i++) {
1275       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1276       if (max < tmp) { max = tmp; }
1277     }
1278     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1279   }
1280   lrow = row - brstart;
1281 
1282   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1283   if (!v)   {pvA = 0; pvB = 0;}
1284   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1285   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1286   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1287   nztot = nzA + nzB;
1288 
1289   cmap  = mat->garray;
1290   if (v  || idx) {
1291     if (nztot) {
1292       /* Sort by increasing column numbers, assuming A and B already sorted */
1293       PetscInt imark = -1;
1294       if (v) {
1295         *v = v_p = mat->rowvalues;
1296         for (i=0; i<nzB; i++) {
1297           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1298           else break;
1299         }
1300         imark = i;
1301         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1302         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1303       }
1304       if (idx) {
1305         *idx = idx_p = mat->rowindices;
1306         if (imark > -1) {
1307           for (i=0; i<imark; i++) {
1308             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1309           }
1310         } else {
1311           for (i=0; i<nzB; i++) {
1312             if (cmap[cworkB[i]/bs] < cstart)
1313               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1314             else break;
1315           }
1316           imark = i;
1317         }
1318         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1319         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1320       }
1321     } else {
1322       if (idx) *idx = 0;
1323       if (v)   *v   = 0;
1324     }
1325   }
1326   *nz = nztot;
1327   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1328   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1329   PetscFunctionReturn(0);
1330 }
1331 
1332 #undef __FUNCT__
1333 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1334 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1335 {
1336   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1337 
1338   PetscFunctionBegin;
1339   if (!baij->getrowactive) {
1340     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1341   }
1342   baij->getrowactive = PETSC_FALSE;
1343   PetscFunctionReturn(0);
1344 }
1345 
1346 #undef __FUNCT__
1347 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1348 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1349 {
1350   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1351   PetscErrorCode ierr;
1352 
1353   PetscFunctionBegin;
1354   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1355   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1356   PetscFunctionReturn(0);
1357 }
1358 
1359 #undef __FUNCT__
1360 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1361 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1362 {
1363   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1364   Mat            A = a->A,B = a->B;
1365   PetscErrorCode ierr;
1366   PetscReal      isend[5],irecv[5];
1367 
1368   PetscFunctionBegin;
1369   info->block_size     = (PetscReal)matin->rmap->bs;
1370   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1371   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1372   isend[3] = info->memory;  isend[4] = info->mallocs;
1373   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1374   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1375   isend[3] += info->memory;  isend[4] += info->mallocs;
1376   if (flag == MAT_LOCAL) {
1377     info->nz_used      = isend[0];
1378     info->nz_allocated = isend[1];
1379     info->nz_unneeded  = isend[2];
1380     info->memory       = isend[3];
1381     info->mallocs      = isend[4];
1382   } else if (flag == MAT_GLOBAL_MAX) {
1383     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1384     info->nz_used      = irecv[0];
1385     info->nz_allocated = irecv[1];
1386     info->nz_unneeded  = irecv[2];
1387     info->memory       = irecv[3];
1388     info->mallocs      = irecv[4];
1389   } else if (flag == MAT_GLOBAL_SUM) {
1390     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1391     info->nz_used      = irecv[0];
1392     info->nz_allocated = irecv[1];
1393     info->nz_unneeded  = irecv[2];
1394     info->memory       = irecv[3];
1395     info->mallocs      = irecv[4];
1396   } else {
1397     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1398   }
1399   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1400   info->fill_ratio_needed = 0;
1401   info->factor_mallocs    = 0;
1402   PetscFunctionReturn(0);
1403 }
1404 
1405 #undef __FUNCT__
1406 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1407 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscTruth flg)
1408 {
1409   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1410   PetscErrorCode ierr;
1411 
1412   PetscFunctionBegin;
1413   switch (op) {
1414   case MAT_NEW_NONZERO_LOCATIONS:
1415   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1416   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1417   case MAT_KEEP_NONZERO_PATTERN:
1418   case MAT_NEW_NONZERO_LOCATION_ERR:
1419     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1420     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1421     break;
1422   case MAT_ROW_ORIENTED:
1423     a->roworiented = flg;
1424     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1425     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1426     break;
1427   case MAT_NEW_DIAGONALS:
1428     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1429     break;
1430   case MAT_IGNORE_OFF_PROC_ENTRIES:
1431     a->donotstash = flg;
1432     break;
1433   case MAT_USE_HASH_TABLE:
1434     a->ht_flag = flg;
1435     break;
1436   case MAT_SYMMETRIC:
1437   case MAT_STRUCTURALLY_SYMMETRIC:
1438   case MAT_HERMITIAN:
1439   case MAT_SYMMETRY_ETERNAL:
1440     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1441     break;
1442   default:
1443     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1444   }
1445   PetscFunctionReturn(0);
1446 }
1447 
1448 #undef __FUNCT__
1449 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1450 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1451 {
1452   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1453   Mat_SeqBAIJ    *Aloc;
1454   Mat            B;
1455   PetscErrorCode ierr;
1456   PetscInt       M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1457   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1458   MatScalar      *a;
1459 
1460   PetscFunctionBegin;
1461   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1462   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1463     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1464     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1465     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1466     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1467   } else {
1468     B = *matout;
1469   }
1470 
1471   /* copy over the A part */
1472   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1473   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1474   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1475 
1476   for (i=0; i<mbs; i++) {
1477     rvals[0] = bs*(baij->rstartbs + i);
1478     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1479     for (j=ai[i]; j<ai[i+1]; j++) {
1480       col = (baij->cstartbs+aj[j])*bs;
1481       for (k=0; k<bs; k++) {
1482         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1483         col++; a += bs;
1484       }
1485     }
1486   }
1487   /* copy over the B part */
1488   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1489   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1490   for (i=0; i<mbs; i++) {
1491     rvals[0] = bs*(baij->rstartbs + i);
1492     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1493     for (j=ai[i]; j<ai[i+1]; j++) {
1494       col = baij->garray[aj[j]]*bs;
1495       for (k=0; k<bs; k++) {
1496         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1497         col++; a += bs;
1498       }
1499     }
1500   }
1501   ierr = PetscFree(rvals);CHKERRQ(ierr);
1502   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1503   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1504 
1505   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1506     *matout = B;
1507   } else {
1508     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1509   }
1510   PetscFunctionReturn(0);
1511 }
1512 
1513 #undef __FUNCT__
1514 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1515 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1516 {
1517   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1518   Mat            a = baij->A,b = baij->B;
1519   PetscErrorCode ierr;
1520   PetscInt       s1,s2,s3;
1521 
1522   PetscFunctionBegin;
1523   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1524   if (rr) {
1525     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1526     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1527     /* Overlap communication with computation. */
1528     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1529   }
1530   if (ll) {
1531     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1532     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1533     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1534   }
1535   /* scale  the diagonal block */
1536   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1537 
1538   if (rr) {
1539     /* Do a scatter end and then right scale the off-diagonal block */
1540     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1541     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1542   }
1543 
1544   PetscFunctionReturn(0);
1545 }
1546 
1547 #undef __FUNCT__
1548 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1549 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
1550 {
1551   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1552   PetscErrorCode ierr;
1553   PetscMPIInt    imdex,size = l->size,n,rank = l->rank;
1554   PetscInt       i,*owners = A->rmap->range;
1555   PetscInt       *nprocs,j,idx,nsends,row;
1556   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
1557   PetscInt       *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1558   PetscInt       *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1559   MPI_Comm       comm = ((PetscObject)A)->comm;
1560   MPI_Request    *send_waits,*recv_waits;
1561   MPI_Status     recv_status,*send_status;
1562 #if defined(PETSC_DEBUG)
1563   PetscTruth     found = PETSC_FALSE;
1564 #endif
1565 
1566   PetscFunctionBegin;
1567   /*  first count number of contributors to each processor */
1568   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1569   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1570   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1571   j = 0;
1572   for (i=0; i<N; i++) {
1573     if (lastidx > (idx = rows[i])) j = 0;
1574     lastidx = idx;
1575     for (; j<size; j++) {
1576       if (idx >= owners[j] && idx < owners[j+1]) {
1577         nprocs[2*j]++;
1578         nprocs[2*j+1] = 1;
1579         owner[i] = j;
1580 #if defined(PETSC_DEBUG)
1581         found = PETSC_TRUE;
1582 #endif
1583         break;
1584       }
1585     }
1586 #if defined(PETSC_DEBUG)
1587     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1588     found = PETSC_FALSE;
1589 #endif
1590   }
1591   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1592 
1593   /* inform other processors of number of messages and max length*/
1594   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1595 
1596   /* post receives:   */
1597   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1598   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1599   for (i=0; i<nrecvs; i++) {
1600     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1601   }
1602 
1603   /* do sends:
1604      1) starts[i] gives the starting index in svalues for stuff going to
1605      the ith processor
1606   */
1607   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1608   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1609   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1610   starts[0]  = 0;
1611   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1612   for (i=0; i<N; i++) {
1613     svalues[starts[owner[i]]++] = rows[i];
1614   }
1615 
1616   starts[0] = 0;
1617   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1618   count = 0;
1619   for (i=0; i<size; i++) {
1620     if (nprocs[2*i+1]) {
1621       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1622     }
1623   }
1624   ierr = PetscFree(starts);CHKERRQ(ierr);
1625 
1626   base = owners[rank];
1627 
1628   /*  wait on receives */
1629   ierr   = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr);
1630   count  = nrecvs;
1631   slen = 0;
1632   while (count) {
1633     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1634     /* unpack receives into our local space */
1635     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1636     source[imdex]  = recv_status.MPI_SOURCE;
1637     lens[imdex]    = n;
1638     slen          += n;
1639     count--;
1640   }
1641   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1642 
1643   /* move the data into the send scatter */
1644   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1645   count = 0;
1646   for (i=0; i<nrecvs; i++) {
1647     values = rvalues + i*nmax;
1648     for (j=0; j<lens[i]; j++) {
1649       lrows[count++] = values[j] - base;
1650     }
1651   }
1652   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1653   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1654   ierr = PetscFree(owner);CHKERRQ(ierr);
1655   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1656 
1657   /* actually zap the local rows */
1658   /*
1659         Zero the required rows. If the "diagonal block" of the matrix
1660      is square and the user wishes to set the diagonal we use separate
1661      code so that MatSetValues() is not called for each diagonal allocating
1662      new memory, thus calling lots of mallocs and slowing things down.
1663 
1664   */
1665   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1666   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr);
1667   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1668     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr);
1669   } else if (diag != 0.0) {
1670     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1671     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1672       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1673 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1674     }
1675     for (i=0; i<slen; i++) {
1676       row  = lrows[i] + rstart_bs;
1677       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1678     }
1679     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1680     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1681   } else {
1682     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1683   }
1684 
1685   ierr = PetscFree(lrows);CHKERRQ(ierr);
1686 
1687   /* wait on sends */
1688   if (nsends) {
1689     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1690     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1691     ierr = PetscFree(send_status);CHKERRQ(ierr);
1692   }
1693   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1694   ierr = PetscFree(svalues);CHKERRQ(ierr);
1695 
1696   PetscFunctionReturn(0);
1697 }
1698 
1699 #undef __FUNCT__
1700 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1701 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1702 {
1703   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1704   PetscErrorCode ierr;
1705 
1706   PetscFunctionBegin;
1707   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1708   PetscFunctionReturn(0);
1709 }
1710 
1711 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1712 
1713 #undef __FUNCT__
1714 #define __FUNCT__ "MatEqual_MPIBAIJ"
1715 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1716 {
1717   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1718   Mat            a,b,c,d;
1719   PetscTruth     flg;
1720   PetscErrorCode ierr;
1721 
1722   PetscFunctionBegin;
1723   a = matA->A; b = matA->B;
1724   c = matB->A; d = matB->B;
1725 
1726   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1727   if (flg) {
1728     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1729   }
1730   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1731   PetscFunctionReturn(0);
1732 }
1733 
1734 #undef __FUNCT__
1735 #define __FUNCT__ "MatCopy_MPIBAIJ"
1736 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1737 {
1738   PetscErrorCode ierr;
1739   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1740   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1741 
1742   PetscFunctionBegin;
1743   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1744   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1745     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1746   } else {
1747     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1748     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1749   }
1750   PetscFunctionReturn(0);
1751 }
1752 
1753 #undef __FUNCT__
1754 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1755 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1756 {
1757   PetscErrorCode ierr;
1758 
1759   PetscFunctionBegin;
1760   ierr =  MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1761   PetscFunctionReturn(0);
1762 }
1763 
1764 #undef __FUNCT__
1765 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1766 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1767 {
1768   PetscErrorCode ierr;
1769   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1770   PetscBLASInt   bnz,one=1;
1771   Mat_SeqBAIJ    *x,*y;
1772 
1773   PetscFunctionBegin;
1774   if (str == SAME_NONZERO_PATTERN) {
1775     PetscScalar alpha = a;
1776     x = (Mat_SeqBAIJ *)xx->A->data;
1777     y = (Mat_SeqBAIJ *)yy->A->data;
1778     bnz = PetscBLASIntCast(x->nz);
1779     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1780     x = (Mat_SeqBAIJ *)xx->B->data;
1781     y = (Mat_SeqBAIJ *)yy->B->data;
1782     bnz = PetscBLASIntCast(x->nz);
1783     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1784   } else {
1785     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1786   }
1787   PetscFunctionReturn(0);
1788 }
1789 
1790 #undef __FUNCT__
1791 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ"
1792 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs)
1793 {
1794   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1795   PetscInt rbs,cbs;
1796   PetscErrorCode ierr;
1797 
1798   PetscFunctionBegin;
1799   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1800   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1801   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
1802   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
1803   if (rbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
1804   if (cbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
1805   PetscFunctionReturn(0);
1806 }
1807 
1808 #undef __FUNCT__
1809 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1810 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1811 {
1812   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1813   PetscErrorCode ierr;
1814 
1815   PetscFunctionBegin;
1816   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1817   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1818   PetscFunctionReturn(0);
1819 }
1820 
1821 #undef __FUNCT__
1822 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
1823 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1824 {
1825   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1826   PetscErrorCode ierr;
1827 
1828   PetscFunctionBegin;
1829   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1830   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1831   PetscFunctionReturn(0);
1832 }
1833 
1834 #undef __FUNCT__
1835 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
1836 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1837 {
1838   PetscErrorCode ierr;
1839   IS             iscol_local;
1840   PetscInt       csize;
1841 
1842   PetscFunctionBegin;
1843   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1844   if (call == MAT_REUSE_MATRIX) {
1845     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1846     if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1847   } else {
1848     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1849   }
1850   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1851   if (call == MAT_INITIAL_MATRIX) {
1852     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1853     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
1854   }
1855   PetscFunctionReturn(0);
1856 }
1857 
1858 #undef __FUNCT__
1859 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
1860 /*
1861     Not great since it makes two copies of the submatrix, first an SeqBAIJ
1862   in local and then by concatenating the local matrices the end result.
1863   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
1864 */
1865 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
1866 {
1867   PetscErrorCode ierr;
1868   PetscMPIInt    rank,size;
1869   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
1870   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
1871   Mat            *local,M,Mreuse;
1872   MatScalar      *vwork,*aa;
1873   MPI_Comm       comm = ((PetscObject)mat)->comm;
1874   Mat_SeqBAIJ    *aij;
1875 
1876 
1877   PetscFunctionBegin;
1878   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1879   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1880 
1881   if (call ==  MAT_REUSE_MATRIX) {
1882     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
1883     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1884     local = &Mreuse;
1885     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
1886   } else {
1887     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
1888     Mreuse = *local;
1889     ierr   = PetscFree(local);CHKERRQ(ierr);
1890   }
1891 
1892   /*
1893       m - number of local rows
1894       n - number of columns (same on all processors)
1895       rstart - first row in new global matrix generated
1896   */
1897   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
1898   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1899   m    = m/bs;
1900   n    = n/bs;
1901 
1902   if (call == MAT_INITIAL_MATRIX) {
1903     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
1904     ii  = aij->i;
1905     jj  = aij->j;
1906 
1907     /*
1908         Determine the number of non-zeros in the diagonal and off-diagonal
1909         portions of the matrix in order to do correct preallocation
1910     */
1911 
1912     /* first get start and end of "diagonal" columns */
1913     if (csize == PETSC_DECIDE) {
1914       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
1915       if (mglobal == n*bs) { /* square matrix */
1916 	nlocal = m;
1917       } else {
1918         nlocal = n/size + ((n % size) > rank);
1919       }
1920     } else {
1921       nlocal = csize/bs;
1922     }
1923     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1924     rstart = rend - nlocal;
1925     if (rank == size - 1 && rend != n) {
1926       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
1927     }
1928 
1929     /* next, compute all the lengths */
1930     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
1931     olens = dlens + m;
1932     for (i=0; i<m; i++) {
1933       jend = ii[i+1] - ii[i];
1934       olen = 0;
1935       dlen = 0;
1936       for (j=0; j<jend; j++) {
1937         if (*jj < rstart || *jj >= rend) olen++;
1938         else dlen++;
1939         jj++;
1940       }
1941       olens[i] = olen;
1942       dlens[i] = dlen;
1943     }
1944     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
1945     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
1946     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
1947     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
1948     ierr = PetscFree(dlens);CHKERRQ(ierr);
1949   } else {
1950     PetscInt ml,nl;
1951 
1952     M = *newmat;
1953     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
1954     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
1955     ierr = MatZeroEntries(M);CHKERRQ(ierr);
1956     /*
1957          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
1958        rather than the slower MatSetValues().
1959     */
1960     M->was_assembled = PETSC_TRUE;
1961     M->assembled     = PETSC_FALSE;
1962   }
1963   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1964   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
1965   aij = (Mat_SeqBAIJ*)(Mreuse)->data;
1966   ii  = aij->i;
1967   jj  = aij->j;
1968   aa  = aij->a;
1969   for (i=0; i<m; i++) {
1970     row   = rstart/bs + i;
1971     nz    = ii[i+1] - ii[i];
1972     cwork = jj;     jj += nz;
1973     vwork = aa;     aa += nz;
1974     ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
1975   }
1976 
1977   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1978   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1979   *newmat = M;
1980 
1981   /* save submatrix used in processor for next request */
1982   if (call ==  MAT_INITIAL_MATRIX) {
1983     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
1984     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
1985   }
1986 
1987   PetscFunctionReturn(0);
1988 }
1989 
1990 #undef __FUNCT__
1991 #define __FUNCT__ "MatPermute_MPIBAIJ"
1992 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
1993 {
1994   MPI_Comm       comm,pcomm;
1995   PetscInt       first,local_size,nrows;
1996   const PetscInt *rows;
1997   PetscMPIInt    size;
1998   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1999   PetscErrorCode ierr;
2000 
2001   PetscFunctionBegin;
2002   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2003   /* make a collective version of 'rowp' */
2004   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2005   if (pcomm==comm) {
2006     crowp = rowp;
2007   } else {
2008     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2009     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2010     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
2011     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2012   }
2013   /* collect the global row permutation and invert it */
2014   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
2015   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
2016   if (pcomm!=comm) {
2017     ierr = ISDestroy(crowp);CHKERRQ(ierr);
2018   }
2019   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
2020   /* get the local target indices */
2021   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
2022   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
2023   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
2024   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
2025   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
2026   ierr = ISDestroy(irowp);CHKERRQ(ierr);
2027   /* the column permutation is so much easier;
2028      make a local version of 'colp' and invert it */
2029   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2030   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2031   if (size==1) {
2032     lcolp = colp;
2033   } else {
2034     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
2035     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
2036     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
2037   }
2038   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2039   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
2040   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
2041   if (size>1) {
2042     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
2043     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
2044   }
2045   /* now we just get the submatrix */
2046   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2047   /* clean up */
2048   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
2049   ierr = ISDestroy(icolp);CHKERRQ(ierr);
2050   PetscFunctionReturn(0);
2051 }
2052 
2053 #undef __FUNCT__
2054 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2055 PetscErrorCode PETSCMAT_DLLEXPORT MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2056 {
2057   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*) mat->data;
2058   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
2059 
2060   PetscFunctionBegin;
2061   if (nghosts) { *nghosts = B->nbs;}
2062   if (ghosts) {*ghosts = baij->garray;}
2063   PetscFunctionReturn(0);
2064 }
2065 
2066 EXTERN PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat);
2067 
2068 #undef __FUNCT__
2069 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ"
2070 /*
2071     This routine is almost identical to MatFDColoringCreate_MPIBAIJ()!
2072 */
2073 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
2074 {
2075   Mat_MPIBAIJ            *baij = (Mat_MPIBAIJ*)mat->data;
2076   PetscErrorCode        ierr;
2077   PetscMPIInt           size,*ncolsonproc,*disp,nn;
2078   PetscInt              bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col;
2079   const PetscInt        *is;
2080   PetscInt              nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj;
2081   PetscInt              *rowhit,M,cstart,cend,colb;
2082   PetscInt              *columnsforrow,l;
2083   IS                    *isa;
2084   PetscTruth             done,flg;
2085   ISLocalToGlobalMapping map = mat->bmapping;
2086   PetscInt               *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype;
2087 
2088   PetscFunctionBegin;
2089   if (!mat->assembled) {
2090     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();");
2091   }
2092   if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock");
2093 
2094   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
2095 
2096   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2097   M                = mat->rmap->n/bs;
2098   cstart           = mat->cmap->rstart/bs;
2099   cend             = mat->cmap->rend/bs;
2100   c->M             = mat->rmap->N/bs;  /* set the global rows and columns and local rows */
2101   c->N             = mat->cmap->N/bs;
2102   c->m             = mat->rmap->n/bs;
2103   c->rstart        = mat->rmap->rstart/bs;
2104 
2105   c->ncolors       = nis;
2106   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
2107   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
2108   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
2109   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
2110   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
2111   ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
2112 
2113   /* Allow access to data structures of local part of matrix */
2114   if (!baij->colmap) {
2115     ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
2116   }
2117   ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2118   ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2119 
2120   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
2121   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
2122 
2123   for (i=0; i<nis; i++) {
2124     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
2125     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
2126     c->ncolumns[i] = n;
2127     if (n) {
2128       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
2129       ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr);
2130       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
2131     } else {
2132       c->columns[i]  = 0;
2133     }
2134 
2135     if (ctype == IS_COLORING_GLOBAL){
2136       /* Determine the total (parallel) number of columns of this color */
2137       ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
2138       ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr);
2139 
2140       nn   = PetscMPIIntCast(n);
2141       ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2142       nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];}
2143       if (!nctot) {
2144         ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
2145       }
2146 
2147       disp[0] = 0;
2148       for (j=1; j<size; j++) {
2149         disp[j] = disp[j-1] + ncolsonproc[j-1];
2150       }
2151 
2152       /* Get complete list of columns for color on each processor */
2153       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2154       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2155       ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr);
2156     } else if (ctype == IS_COLORING_GHOSTED){
2157       /* Determine local number of columns of this color on this process, including ghost points */
2158       nctot = n;
2159       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2160       ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
2161     } else {
2162       SETERRQ(PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
2163     }
2164 
2165     /*
2166        Mark all rows affect by these columns
2167     */
2168     /* Temporary option to allow for debugging/testing */
2169     flg  = PETSC_FALSE;
2170     ierr = PetscOptionsGetTruth(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr);
2171     if (!flg) {/*-----------------------------------------------------------------------------*/
2172       /* crude, fast version */
2173       ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
2174       /* loop over columns*/
2175       for (j=0; j<nctot; j++) {
2176         if (ctype == IS_COLORING_GHOSTED) {
2177           col = ltog[cols[j]];
2178         } else {
2179           col  = cols[j];
2180         }
2181         if (col >= cstart && col < cend) {
2182           /* column is in diagonal block of matrix */
2183           rows = A_cj + A_ci[col-cstart];
2184           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2185         } else {
2186 #if defined (PETSC_USE_CTABLE)
2187           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr)
2188 	  colb --;
2189 #else
2190           colb = baij->colmap[col] - 1;
2191 #endif
2192           if (colb == -1) {
2193             m = 0;
2194           } else {
2195             colb = colb/bs;
2196             rows = B_cj + B_ci[colb];
2197             m    = B_ci[colb+1] - B_ci[colb];
2198           }
2199         }
2200         /* loop over columns marking them in rowhit */
2201         for (k=0; k<m; k++) {
2202           rowhit[*rows++] = col + 1;
2203         }
2204       }
2205 
2206       /* count the number of hits */
2207       nrows = 0;
2208       for (j=0; j<M; j++) {
2209         if (rowhit[j]) nrows++;
2210       }
2211       c->nrows[i]         = nrows;
2212       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2213       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2214       ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2215       nrows = 0;
2216       for (j=0; j<M; j++) {
2217         if (rowhit[j]) {
2218           c->rows[i][nrows]           = j;
2219           c->columnsforrow[i][nrows] = rowhit[j] - 1;
2220           nrows++;
2221         }
2222       }
2223     } else {/*-------------------------------------------------------------------------------*/
2224       /* slow version, using rowhit as a linked list */
2225       PetscInt currentcol,fm,mfm;
2226       rowhit[M] = M;
2227       nrows     = 0;
2228       /* loop over columns*/
2229       for (j=0; j<nctot; j++) {
2230         if (ctype == IS_COLORING_GHOSTED) {
2231           col = ltog[cols[j]];
2232         } else {
2233           col  = cols[j];
2234         }
2235         if (col >= cstart && col < cend) {
2236           /* column is in diagonal block of matrix */
2237           rows = A_cj + A_ci[col-cstart];
2238           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2239         } else {
2240 #if defined (PETSC_USE_CTABLE)
2241           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2242           colb --;
2243 #else
2244           colb = baij->colmap[col] - 1;
2245 #endif
2246           if (colb == -1) {
2247             m = 0;
2248           } else {
2249             colb = colb/bs;
2250             rows = B_cj + B_ci[colb];
2251             m    = B_ci[colb+1] - B_ci[colb];
2252           }
2253         }
2254 
2255         /* loop over columns marking them in rowhit */
2256         fm    = M; /* fm points to first entry in linked list */
2257         for (k=0; k<m; k++) {
2258           currentcol = *rows++;
2259 	  /* is it already in the list? */
2260           do {
2261             mfm  = fm;
2262             fm   = rowhit[fm];
2263           } while (fm < currentcol);
2264           /* not in list so add it */
2265           if (fm != currentcol) {
2266             nrows++;
2267             columnsforrow[currentcol] = col;
2268             /* next three lines insert new entry into linked list */
2269             rowhit[mfm]               = currentcol;
2270             rowhit[currentcol]        = fm;
2271             fm                        = currentcol;
2272             /* fm points to present position in list since we know the columns are sorted */
2273           } else {
2274             SETERRQ(PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
2275           }
2276         }
2277       }
2278       c->nrows[i]         = nrows;
2279       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2280       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2281       ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2282       /* now store the linked list of rows into c->rows[i] */
2283       nrows = 0;
2284       fm    = rowhit[M];
2285       do {
2286         c->rows[i][nrows]            = fm;
2287         c->columnsforrow[i][nrows++] = columnsforrow[fm];
2288         fm                           = rowhit[fm];
2289       } while (fm < M);
2290     } /* ---------------------------------------------------------------------------------------*/
2291     ierr = PetscFree(cols);CHKERRQ(ierr);
2292   }
2293 
2294   /* Optimize by adding the vscale, and scaleforrow[][] fields */
2295   /*
2296        vscale will contain the "diagonal" on processor scalings followed by the off processor
2297   */
2298   if (ctype == IS_COLORING_GLOBAL) {
2299     PetscInt *garray;
2300     ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2301     for (i=0; i<baij->B->cmap->n/bs; i++) {
2302       for (j=0; j<bs; j++) {
2303         garray[i*bs+j] = bs*baij->garray[i]+j;
2304       }
2305     }
2306     ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr);
2307     ierr = PetscFree(garray);CHKERRQ(ierr);
2308     CHKMEMQ;
2309     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2310     for (k=0; k<c->ncolors; k++) {
2311       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2312       for (l=0; l<c->nrows[k]; l++) {
2313         col = c->columnsforrow[k][l];
2314         if (col >= cstart && col < cend) {
2315           /* column is in diagonal block of matrix */
2316           colb = col - cstart;
2317         } else {
2318           /* column  is in "off-processor" part */
2319 #if defined (PETSC_USE_CTABLE)
2320           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2321           colb --;
2322 #else
2323           colb = baij->colmap[col] - 1;
2324 #endif
2325           colb = colb/bs;
2326           colb += cend - cstart;
2327         }
2328         c->vscaleforrow[k][l] = colb;
2329       }
2330     }
2331   } else if (ctype == IS_COLORING_GHOSTED) {
2332     /* Get gtol mapping */
2333     PetscInt N = mat->cmap->N, *gtol;
2334     ierr = PetscMalloc((N+1)*sizeof(PetscInt),&gtol);CHKERRQ(ierr);
2335     for (i=0; i<N; i++) gtol[i] = -1;
2336     for (i=0; i<map->n; i++) gtol[ltog[i]] = i;
2337 
2338     c->vscale = 0; /* will be created in MatFDColoringApply() */
2339     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2340     for (k=0; k<c->ncolors; k++) {
2341       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2342       for (l=0; l<c->nrows[k]; l++) {
2343         col = c->columnsforrow[k][l];      /* global column index */
2344         c->vscaleforrow[k][l] = gtol[col]; /* local column index */
2345       }
2346     }
2347     ierr = PetscFree(gtol);CHKERRQ(ierr);
2348   }
2349   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
2350 
2351   ierr = PetscFree(rowhit);CHKERRQ(ierr);
2352   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
2353   ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2354   ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2355     CHKMEMQ;
2356   PetscFunctionReturn(0);
2357 }
2358 
2359 #undef __FUNCT__
2360 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ"
2361 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat)
2362 {
2363   Mat            B;
2364   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
2365   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2366   Mat_SeqAIJ     *b;
2367   PetscErrorCode ierr;
2368   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2369   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2370   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2371 
2372   PetscFunctionBegin;
2373   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2374   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2375 
2376   /* ----------------------------------------------------------------
2377      Tell every processor the number of nonzeros per row
2378   */
2379   ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
2380   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2381     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2382   }
2383   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2384   ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr);
2385   displs     = recvcounts + size;
2386   for (i=0; i<size; i++) {
2387     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2388     displs[i]     = A->rmap->range[i]/bs;
2389   }
2390 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2391   ierr  = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2392 #else
2393   ierr  = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2394 #endif
2395   /* ---------------------------------------------------------------
2396      Create the sequential matrix of the same type as the local block diagonal
2397   */
2398   ierr  = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2399   ierr  = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2400   ierr  = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2401   ierr  = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2402   b = (Mat_SeqAIJ *)B->data;
2403 
2404   /*--------------------------------------------------------------------
2405     Copy my part of matrix column indices over
2406   */
2407   sendcount  = ad->nz + bd->nz;
2408   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2409   a_jsendbuf = ad->j;
2410   b_jsendbuf = bd->j;
2411   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2412   cnt        = 0;
2413   for (i=0; i<n; i++) {
2414 
2415     /* put in lower diagonal portion */
2416     m = bd->i[i+1] - bd->i[i];
2417     while (m > 0) {
2418       /* is it above diagonal (in bd (compressed) numbering) */
2419       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2420       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2421       m--;
2422     }
2423 
2424     /* put in diagonal portion */
2425     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2426       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2427     }
2428 
2429     /* put in upper diagonal portion */
2430     while (m-- > 0) {
2431       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2432     }
2433   }
2434   if (cnt != sendcount) SETERRQ2(PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2435 
2436   /*--------------------------------------------------------------------
2437     Gather all column indices to all processors
2438   */
2439   for (i=0; i<size; i++) {
2440     recvcounts[i] = 0;
2441     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2442       recvcounts[i] += lens[j];
2443     }
2444   }
2445   displs[0]  = 0;
2446   for (i=1; i<size; i++) {
2447     displs[i] = displs[i-1] + recvcounts[i-1];
2448   }
2449 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2450   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2451 #else
2452   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2453 #endif
2454   /*--------------------------------------------------------------------
2455     Assemble the matrix into useable form (note numerical values not yet set)
2456   */
2457   /* set the b->ilen (length of each row) values */
2458   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2459   /* set the b->i indices */
2460   b->i[0] = 0;
2461   for (i=1; i<=A->rmap->N/bs; i++) {
2462     b->i[i] = b->i[i-1] + lens[i-1];
2463   }
2464   ierr = PetscFree(lens);CHKERRQ(ierr);
2465   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2466   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2467   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2468 
2469   if (A->symmetric){
2470     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2471   } else if (A->hermitian) {
2472     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2473   } else if (A->structurally_symmetric) {
2474     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2475   }
2476   *newmat = B;
2477   PetscFunctionReturn(0);
2478 }
2479 
2480 #undef __FUNCT__
2481 #define __FUNCT__ "MatSOR_MPIBAIJ"
2482 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2483 {
2484   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2485   PetscErrorCode ierr;
2486   Vec            bb1 = 0;
2487 
2488   PetscFunctionBegin;
2489   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2490     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2491   }
2492 
2493   if (flag == SOR_APPLY_UPPER) {
2494     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2495     PetscFunctionReturn(0);
2496   }
2497 
2498   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2499     if (flag & SOR_ZERO_INITIAL_GUESS) {
2500       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2501       its--;
2502     }
2503 
2504     while (its--) {
2505       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2506       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2507 
2508       /* update rhs: bb1 = bb - B*x */
2509       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2510       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2511 
2512       /* local sweep */
2513       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2514     }
2515   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
2516     if (flag & SOR_ZERO_INITIAL_GUESS) {
2517       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2518       its--;
2519     }
2520     while (its--) {
2521       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2522       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2523 
2524       /* update rhs: bb1 = bb - B*x */
2525       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2526       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2527 
2528       /* local sweep */
2529       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2530     }
2531   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
2532     if (flag & SOR_ZERO_INITIAL_GUESS) {
2533       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2534       its--;
2535     }
2536     while (its--) {
2537       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2538       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2539 
2540       /* update rhs: bb1 = bb - B*x */
2541       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2542       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2543 
2544       /* local sweep */
2545       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2546     }
2547   } else {
2548     SETERRQ(PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2549   }
2550 
2551   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
2552   PetscFunctionReturn(0);
2553 }
2554 
2555 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2556 
2557 
2558 /* -------------------------------------------------------------------*/
2559 static struct _MatOps MatOps_Values = {
2560        MatSetValues_MPIBAIJ,
2561        MatGetRow_MPIBAIJ,
2562        MatRestoreRow_MPIBAIJ,
2563        MatMult_MPIBAIJ,
2564 /* 4*/ MatMultAdd_MPIBAIJ,
2565        MatMultTranspose_MPIBAIJ,
2566        MatMultTransposeAdd_MPIBAIJ,
2567        0,
2568        0,
2569        0,
2570 /*10*/ 0,
2571        0,
2572        0,
2573        MatSOR_MPIBAIJ,
2574        MatTranspose_MPIBAIJ,
2575 /*15*/ MatGetInfo_MPIBAIJ,
2576        MatEqual_MPIBAIJ,
2577        MatGetDiagonal_MPIBAIJ,
2578        MatDiagonalScale_MPIBAIJ,
2579        MatNorm_MPIBAIJ,
2580 /*20*/ MatAssemblyBegin_MPIBAIJ,
2581        MatAssemblyEnd_MPIBAIJ,
2582        MatSetOption_MPIBAIJ,
2583        MatZeroEntries_MPIBAIJ,
2584 /*24*/ MatZeroRows_MPIBAIJ,
2585        0,
2586        0,
2587        0,
2588        0,
2589 /*29*/ MatSetUpPreallocation_MPIBAIJ,
2590        0,
2591        0,
2592        0,
2593        0,
2594 /*34*/ MatDuplicate_MPIBAIJ,
2595        0,
2596        0,
2597        0,
2598        0,
2599 /*39*/ MatAXPY_MPIBAIJ,
2600        MatGetSubMatrices_MPIBAIJ,
2601        MatIncreaseOverlap_MPIBAIJ,
2602        MatGetValues_MPIBAIJ,
2603        MatCopy_MPIBAIJ,
2604 /*44*/ 0,
2605        MatScale_MPIBAIJ,
2606        0,
2607        0,
2608        0,
2609 /*49*/ MatSetBlockSize_MPIBAIJ,
2610        0,
2611        0,
2612        0,
2613        0,
2614 /*54*/ MatFDColoringCreate_MPIBAIJ,
2615        0,
2616        MatSetUnfactored_MPIBAIJ,
2617        MatPermute_MPIBAIJ,
2618        MatSetValuesBlocked_MPIBAIJ,
2619 /*59*/ MatGetSubMatrix_MPIBAIJ,
2620        MatDestroy_MPIBAIJ,
2621        MatView_MPIBAIJ,
2622        0,
2623        0,
2624 /*64*/ 0,
2625        0,
2626        0,
2627        0,
2628        0,
2629 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2630        0,
2631        0,
2632        0,
2633        0,
2634 /*74*/ 0,
2635        MatFDColoringApply_BAIJ,
2636        0,
2637        0,
2638        0,
2639 /*79*/ 0,
2640        0,
2641        0,
2642        0,
2643        MatLoad_MPIBAIJ,
2644 /*84*/ 0,
2645        0,
2646        0,
2647        0,
2648        0,
2649 /*89*/ 0,
2650        0,
2651        0,
2652        0,
2653        0,
2654 /*94*/ 0,
2655        0,
2656        0,
2657        0,
2658        0,
2659 /*99*/ 0,
2660        0,
2661        0,
2662        0,
2663        0,
2664 /*104*/0,
2665        MatRealPart_MPIBAIJ,
2666        MatImaginaryPart_MPIBAIJ,
2667        0,
2668        0,
2669 /*109*/0,
2670        0,
2671        0,
2672        0,
2673        0,
2674 /*114*/MatGetSeqNonzerostructure_MPIBAIJ,
2675        0,
2676        MatGetGhosts_MPIBAIJ
2677 };
2678 
2679 EXTERN_C_BEGIN
2680 #undef __FUNCT__
2681 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2682 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
2683 {
2684   PetscFunctionBegin;
2685   *a      = ((Mat_MPIBAIJ *)A->data)->A;
2686   *iscopy = PETSC_FALSE;
2687   PetscFunctionReturn(0);
2688 }
2689 EXTERN_C_END
2690 
2691 EXTERN_C_BEGIN
2692 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2693 EXTERN_C_END
2694 
2695 EXTERN_C_BEGIN
2696 #undef __FUNCT__
2697 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2698 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2699 {
2700   PetscInt       m,rstart,cstart,cend;
2701   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2702   const PetscInt *JJ=0;
2703   PetscScalar    *values=0;
2704   PetscErrorCode ierr;
2705 
2706   PetscFunctionBegin;
2707 
2708   if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2709   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2710   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2711   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2712   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2713   m      = B->rmap->n/bs;
2714   rstart = B->rmap->rstart/bs;
2715   cstart = B->cmap->rstart/bs;
2716   cend   = B->cmap->rend/bs;
2717 
2718   if (ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2719   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
2720   for (i=0; i<m; i++) {
2721     nz = ii[i+1] - ii[i];
2722     if (nz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2723     nz_max = PetscMax(nz_max,nz);
2724     JJ  = jj + ii[i];
2725     for (j=0; j<nz; j++) {
2726       if (*JJ >= cstart) break;
2727       JJ++;
2728     }
2729     d = 0;
2730     for (; j<nz; j++) {
2731       if (*JJ++ >= cend) break;
2732       d++;
2733     }
2734     d_nnz[i] = d;
2735     o_nnz[i] = nz - d;
2736   }
2737   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2738   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2739 
2740   values = (PetscScalar*)V;
2741   if (!values) {
2742     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2743     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2744   }
2745   for (i=0; i<m; i++) {
2746     PetscInt          row    = i + rstart;
2747     PetscInt          ncols  = ii[i+1] - ii[i];
2748     const PetscInt    *icols = jj + ii[i];
2749     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2750     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2751   }
2752 
2753   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2754   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2755   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2756 
2757   PetscFunctionReturn(0);
2758 }
2759 EXTERN_C_END
2760 
2761 #undef __FUNCT__
2762 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2763 /*@C
2764    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2765    (the default parallel PETSc format).
2766 
2767    Collective on MPI_Comm
2768 
2769    Input Parameters:
2770 +  A - the matrix
2771 .  i - the indices into j for the start of each local row (starts with zero)
2772 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2773 -  v - optional values in the matrix
2774 
2775    Level: developer
2776 
2777 .keywords: matrix, aij, compressed row, sparse, parallel
2778 
2779 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2780 @*/
2781 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2782 {
2783   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
2784 
2785   PetscFunctionBegin;
2786   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2787   if (f) {
2788     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
2789   }
2790   PetscFunctionReturn(0);
2791 }
2792 
2793 EXTERN_C_BEGIN
2794 #undef __FUNCT__
2795 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2796 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2797 {
2798   Mat_MPIBAIJ    *b;
2799   PetscErrorCode ierr;
2800   PetscInt       i, newbs = PetscAbs(bs);
2801 
2802   PetscFunctionBegin;
2803   if (bs < 0) {
2804     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr);
2805       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2806     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2807     bs   = PetscAbs(bs);
2808   }
2809   if ((d_nnz || o_nnz) && newbs != bs) {
2810     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
2811   }
2812   bs = newbs;
2813 
2814 
2815   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2816   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2817   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2818   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2819   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2820 
2821   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2822   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2823   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2824   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2825 
2826   if (d_nnz) {
2827     for (i=0; i<B->rmap->n/bs; i++) {
2828       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2829     }
2830   }
2831   if (o_nnz) {
2832     for (i=0; i<B->rmap->n/bs; i++) {
2833       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2834     }
2835   }
2836 
2837   b = (Mat_MPIBAIJ*)B->data;
2838   b->bs2 = bs*bs;
2839   b->mbs = B->rmap->n/bs;
2840   b->nbs = B->cmap->n/bs;
2841   b->Mbs = B->rmap->N/bs;
2842   b->Nbs = B->cmap->N/bs;
2843 
2844   for (i=0; i<=b->size; i++) {
2845     b->rangebs[i] = B->rmap->range[i]/bs;
2846   }
2847   b->rstartbs = B->rmap->rstart/bs;
2848   b->rendbs   = B->rmap->rend/bs;
2849   b->cstartbs = B->cmap->rstart/bs;
2850   b->cendbs   = B->cmap->rend/bs;
2851 
2852   if (!B->preallocated) {
2853     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2854     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2855     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2856     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2857     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2858     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2859     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2860     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2861     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
2862   }
2863 
2864   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2865   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2866   B->preallocated = PETSC_TRUE;
2867   PetscFunctionReturn(0);
2868 }
2869 EXTERN_C_END
2870 
2871 EXTERN_C_BEGIN
2872 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2873 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2874 EXTERN_C_END
2875 
2876 
2877 EXTERN_C_BEGIN
2878 #undef __FUNCT__
2879 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
2880 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj)
2881 {
2882   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2883   PetscErrorCode ierr;
2884   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2885   PetscInt       M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2886   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2887 
2888   PetscFunctionBegin;
2889   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr);
2890   ii[0] = 0;
2891   CHKMEMQ;
2892   for (i=0; i<M; i++) {
2893     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2894     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
2895     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2896     /* remove one from count of matrix has diagonal */
2897     for (j=id[i]; j<id[i+1]; j++) {
2898       if (jd[j] == i) {ii[i+1]--;break;}
2899     }
2900   CHKMEMQ;
2901   }
2902   ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2903   cnt = 0;
2904   for (i=0; i<M; i++) {
2905     for (j=io[i]; j<io[i+1]; j++) {
2906       if (garray[jo[j]] > rstart) break;
2907       jj[cnt++] = garray[jo[j]];
2908   CHKMEMQ;
2909     }
2910     for (k=id[i]; k<id[i+1]; k++) {
2911       if (jd[k] != i) {
2912         jj[cnt++] = rstart + jd[k];
2913   CHKMEMQ;
2914       }
2915     }
2916     for (;j<io[i+1]; j++) {
2917       jj[cnt++] = garray[jo[j]];
2918   CHKMEMQ;
2919     }
2920   }
2921   ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr);
2922   PetscFunctionReturn(0);
2923 }
2924 EXTERN_C_END
2925 
2926 /*MC
2927    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2928 
2929    Options Database Keys:
2930 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2931 . -mat_block_size <bs> - set the blocksize used to store the matrix
2932 - -mat_use_hash_table <fact>
2933 
2934   Level: beginner
2935 
2936 .seealso: MatCreateMPIBAIJ
2937 M*/
2938 
2939 EXTERN_C_BEGIN
2940 #undef __FUNCT__
2941 #define __FUNCT__ "MatCreate_MPIBAIJ"
2942 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B)
2943 {
2944   Mat_MPIBAIJ    *b;
2945   PetscErrorCode ierr;
2946   PetscTruth     flg;
2947 
2948   PetscFunctionBegin;
2949   ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
2950   B->data = (void*)b;
2951 
2952 
2953   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2954   B->mapping    = 0;
2955   B->assembled  = PETSC_FALSE;
2956 
2957   B->insertmode = NOT_SET_VALUES;
2958   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
2959   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
2960 
2961   /* build local table of row and column ownerships */
2962   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
2963 
2964   /* build cache for off array entries formed */
2965   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
2966   b->donotstash  = PETSC_FALSE;
2967   b->colmap      = PETSC_NULL;
2968   b->garray      = PETSC_NULL;
2969   b->roworiented = PETSC_TRUE;
2970 
2971   /* stuff used in block assembly */
2972   b->barray       = 0;
2973 
2974   /* stuff used for matrix vector multiply */
2975   b->lvec         = 0;
2976   b->Mvctx        = 0;
2977 
2978   /* stuff for MatGetRow() */
2979   b->rowindices   = 0;
2980   b->rowvalues    = 0;
2981   b->getrowactive = PETSC_FALSE;
2982 
2983   /* hash table stuff */
2984   b->ht           = 0;
2985   b->hd           = 0;
2986   b->ht_size      = 0;
2987   b->ht_flag      = PETSC_FALSE;
2988   b->ht_fact      = 0;
2989   b->ht_total_ct  = 0;
2990   b->ht_insert_ct = 0;
2991 
2992   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
2993     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2994     if (flg) {
2995       PetscReal fact = 1.39;
2996       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2997       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
2998       if (fact <= 1.0) fact = 1.39;
2999       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3000       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3001     }
3002   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3003 
3004   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",
3005                                      "MatConvert_MPIBAIJ_MPIAdj",
3006                                       MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3007   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3008                                      "MatStoreValues_MPIBAIJ",
3009                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3010   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3011                                      "MatRetrieveValues_MPIBAIJ",
3012                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3013   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3014                                      "MatGetDiagonalBlock_MPIBAIJ",
3015                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3016   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
3017                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
3018                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3019   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
3020 				     "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
3021 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3022   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3023                                      "MatDiagonalScaleLocal_MPIBAIJ",
3024                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3025   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
3026                                      "MatSetHashTableFactor_MPIBAIJ",
3027                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3028   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3029   PetscFunctionReturn(0);
3030 }
3031 EXTERN_C_END
3032 
3033 /*MC
3034    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3035 
3036    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3037    and MATMPIBAIJ otherwise.
3038 
3039    Options Database Keys:
3040 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3041 
3042   Level: beginner
3043 
3044 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3045 M*/
3046 
3047 EXTERN_C_BEGIN
3048 #undef __FUNCT__
3049 #define __FUNCT__ "MatCreate_BAIJ"
3050 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A)
3051 {
3052   PetscErrorCode ierr;
3053   PetscMPIInt    size;
3054 
3055   PetscFunctionBegin;
3056   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
3057   if (size == 1) {
3058     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
3059   } else {
3060     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
3061   }
3062   PetscFunctionReturn(0);
3063 }
3064 EXTERN_C_END
3065 
3066 #undef __FUNCT__
3067 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3068 /*@C
3069    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3070    (block compressed row).  For good matrix assembly performance
3071    the user should preallocate the matrix storage by setting the parameters
3072    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3073    performance can be increased by more than a factor of 50.
3074 
3075    Collective on Mat
3076 
3077    Input Parameters:
3078 +  A - the matrix
3079 .  bs   - size of blockk
3080 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3081            submatrix  (same for all local rows)
3082 .  d_nnz - array containing the number of block nonzeros in the various block rows
3083            of the in diagonal portion of the local (possibly different for each block
3084            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3085 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3086            submatrix (same for all local rows).
3087 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3088            off-diagonal portion of the local submatrix (possibly different for
3089            each block row) or PETSC_NULL.
3090 
3091    If the *_nnz parameter is given then the *_nz parameter is ignored
3092 
3093    Options Database Keys:
3094 +   -mat_block_size - size of the blocks to use
3095 -   -mat_use_hash_table <fact>
3096 
3097    Notes:
3098    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3099    than it must be used on all processors that share the object for that argument.
3100 
3101    Storage Information:
3102    For a square global matrix we define each processor's diagonal portion
3103    to be its local rows and the corresponding columns (a square submatrix);
3104    each processor's off-diagonal portion encompasses the remainder of the
3105    local matrix (a rectangular submatrix).
3106 
3107    The user can specify preallocated storage for the diagonal part of
3108    the local submatrix with either d_nz or d_nnz (not both).  Set
3109    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3110    memory allocation.  Likewise, specify preallocated storage for the
3111    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3112 
3113    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3114    the figure below we depict these three local rows and all columns (0-11).
3115 
3116 .vb
3117            0 1 2 3 4 5 6 7 8 9 10 11
3118           -------------------
3119    row 3  |  o o o d d d o o o o o o
3120    row 4  |  o o o d d d o o o o o o
3121    row 5  |  o o o d d d o o o o o o
3122           -------------------
3123 .ve
3124 
3125    Thus, any entries in the d locations are stored in the d (diagonal)
3126    submatrix, and any entries in the o locations are stored in the
3127    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3128    stored simply in the MATSEQBAIJ format for compressed row storage.
3129 
3130    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3131    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3132    In general, for PDE problems in which most nonzeros are near the diagonal,
3133    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3134    or you will get TERRIBLE performance; see the users' manual chapter on
3135    matrices.
3136 
3137    You can call MatGetInfo() to get information on how effective the preallocation was;
3138    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3139    You can also run with the option -info and look for messages with the string
3140    malloc in them to see if additional memory allocation was needed.
3141 
3142    Level: intermediate
3143 
3144 .keywords: matrix, block, aij, compressed row, sparse, parallel
3145 
3146 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
3147 @*/
3148 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3149 {
3150   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3151 
3152   PetscFunctionBegin;
3153   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3154   if (f) {
3155     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3156   }
3157   PetscFunctionReturn(0);
3158 }
3159 
3160 #undef __FUNCT__
3161 #define __FUNCT__ "MatCreateMPIBAIJ"
3162 /*@C
3163    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
3164    (block compressed row).  For good matrix assembly performance
3165    the user should preallocate the matrix storage by setting the parameters
3166    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3167    performance can be increased by more than a factor of 50.
3168 
3169    Collective on MPI_Comm
3170 
3171    Input Parameters:
3172 +  comm - MPI communicator
3173 .  bs   - size of blockk
3174 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3175            This value should be the same as the local size used in creating the
3176            y vector for the matrix-vector product y = Ax.
3177 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3178            This value should be the same as the local size used in creating the
3179            x vector for the matrix-vector product y = Ax.
3180 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3181 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3182 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3183            submatrix  (same for all local rows)
3184 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3185            of the in diagonal portion of the local (possibly different for each block
3186            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3187 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3188            submatrix (same for all local rows).
3189 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3190            off-diagonal portion of the local submatrix (possibly different for
3191            each block row) or PETSC_NULL.
3192 
3193    Output Parameter:
3194 .  A - the matrix
3195 
3196    Options Database Keys:
3197 +   -mat_block_size - size of the blocks to use
3198 -   -mat_use_hash_table <fact>
3199 
3200    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3201    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3202    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3203 
3204    Notes:
3205    If the *_nnz parameter is given then the *_nz parameter is ignored
3206 
3207    A nonzero block is any block that as 1 or more nonzeros in it
3208 
3209    The user MUST specify either the local or global matrix dimensions
3210    (possibly both).
3211 
3212    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3213    than it must be used on all processors that share the object for that argument.
3214 
3215    Storage Information:
3216    For a square global matrix we define each processor's diagonal portion
3217    to be its local rows and the corresponding columns (a square submatrix);
3218    each processor's off-diagonal portion encompasses the remainder of the
3219    local matrix (a rectangular submatrix).
3220 
3221    The user can specify preallocated storage for the diagonal part of
3222    the local submatrix with either d_nz or d_nnz (not both).  Set
3223    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3224    memory allocation.  Likewise, specify preallocated storage for the
3225    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3226 
3227    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3228    the figure below we depict these three local rows and all columns (0-11).
3229 
3230 .vb
3231            0 1 2 3 4 5 6 7 8 9 10 11
3232           -------------------
3233    row 3  |  o o o d d d o o o o o o
3234    row 4  |  o o o d d d o o o o o o
3235    row 5  |  o o o d d d o o o o o o
3236           -------------------
3237 .ve
3238 
3239    Thus, any entries in the d locations are stored in the d (diagonal)
3240    submatrix, and any entries in the o locations are stored in the
3241    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3242    stored simply in the MATSEQBAIJ format for compressed row storage.
3243 
3244    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3245    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3246    In general, for PDE problems in which most nonzeros are near the diagonal,
3247    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3248    or you will get TERRIBLE performance; see the users' manual chapter on
3249    matrices.
3250 
3251    Level: intermediate
3252 
3253 .keywords: matrix, block, aij, compressed row, sparse, parallel
3254 
3255 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3256 @*/
3257 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3258 {
3259   PetscErrorCode ierr;
3260   PetscMPIInt    size;
3261 
3262   PetscFunctionBegin;
3263   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3264   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3265   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3266   if (size > 1) {
3267     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3268     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3269   } else {
3270     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3271     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3272   }
3273   PetscFunctionReturn(0);
3274 }
3275 
3276 #undef __FUNCT__
3277 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3278 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3279 {
3280   Mat            mat;
3281   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3282   PetscErrorCode ierr;
3283   PetscInt       len=0;
3284 
3285   PetscFunctionBegin;
3286   *newmat       = 0;
3287   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3288   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3289   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3290   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3291 
3292   mat->factor       = matin->factor;
3293   mat->preallocated = PETSC_TRUE;
3294   mat->assembled    = PETSC_TRUE;
3295   mat->insertmode   = NOT_SET_VALUES;
3296 
3297   a      = (Mat_MPIBAIJ*)mat->data;
3298   mat->rmap->bs  = matin->rmap->bs;
3299   a->bs2   = oldmat->bs2;
3300   a->mbs   = oldmat->mbs;
3301   a->nbs   = oldmat->nbs;
3302   a->Mbs   = oldmat->Mbs;
3303   a->Nbs   = oldmat->Nbs;
3304 
3305   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3306   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3307 
3308   a->size         = oldmat->size;
3309   a->rank         = oldmat->rank;
3310   a->donotstash   = oldmat->donotstash;
3311   a->roworiented  = oldmat->roworiented;
3312   a->rowindices   = 0;
3313   a->rowvalues    = 0;
3314   a->getrowactive = PETSC_FALSE;
3315   a->barray       = 0;
3316   a->rstartbs     = oldmat->rstartbs;
3317   a->rendbs       = oldmat->rendbs;
3318   a->cstartbs     = oldmat->cstartbs;
3319   a->cendbs       = oldmat->cendbs;
3320 
3321   /* hash table stuff */
3322   a->ht           = 0;
3323   a->hd           = 0;
3324   a->ht_size      = 0;
3325   a->ht_flag      = oldmat->ht_flag;
3326   a->ht_fact      = oldmat->ht_fact;
3327   a->ht_total_ct  = 0;
3328   a->ht_insert_ct = 0;
3329 
3330   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3331   if (oldmat->colmap) {
3332 #if defined (PETSC_USE_CTABLE)
3333   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3334 #else
3335   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3336   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3337   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3338 #endif
3339   } else a->colmap = 0;
3340 
3341   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3342     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3343     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3344     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3345   } else a->garray = 0;
3346 
3347   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3348   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3349   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3350   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3351   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3352 
3353   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3354   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3355   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3356   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3357   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3358   *newmat = mat;
3359 
3360   PetscFunctionReturn(0);
3361 }
3362 
3363 #undef __FUNCT__
3364 #define __FUNCT__ "MatLoad_MPIBAIJ"
3365 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
3366 {
3367   Mat            A;
3368   PetscErrorCode ierr;
3369   int            fd;
3370   PetscInt       i,nz,j,rstart,rend;
3371   PetscScalar    *vals,*buf;
3372   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3373   MPI_Status     status;
3374   PetscMPIInt    rank,size,maxnz;
3375   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3376   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
3377   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3378   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
3379   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
3380   PetscInt       dcount,kmax,k,nzcount,tmp,mend;
3381 
3382   PetscFunctionBegin;
3383   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3384     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3385   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3386 
3387   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3388   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3389   if (!rank) {
3390     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3391     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3392     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3393   }
3394 
3395   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3396   M = header[1]; N = header[2];
3397 
3398   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
3399 
3400   /*
3401      This code adds extra rows to make sure the number of rows is
3402      divisible by the blocksize
3403   */
3404   Mbs        = M/bs;
3405   extra_rows = bs - M + bs*Mbs;
3406   if (extra_rows == bs) extra_rows = 0;
3407   else                  Mbs++;
3408   if (extra_rows && !rank) {
3409     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3410   }
3411 
3412   /* determine ownership of all rows */
3413   mbs        = Mbs/size + ((Mbs % size) > rank);
3414   m          = mbs*bs;
3415   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
3416   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3417 
3418   /* process 0 needs enough room for process with most rows */
3419   if (!rank) {
3420     mmax = rowners[1];
3421     for (i=2; i<size; i++) {
3422       mmax = PetscMax(mmax,rowners[i]);
3423     }
3424     mmax*=bs;
3425   } else mmax = m;
3426 
3427   rowners[0] = 0;
3428   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
3429   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
3430   rstart = rowners[rank];
3431   rend   = rowners[rank+1];
3432 
3433   /* distribute row lengths to all processors */
3434   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
3435   if (!rank) {
3436     mend = m;
3437     if (size == 1) mend = mend - extra_rows;
3438     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3439     for (j=mend; j<m; j++) locrowlens[j] = 1;
3440     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3441     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3442     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3443     for (j=0; j<m; j++) {
3444       procsnz[0] += locrowlens[j];
3445     }
3446     for (i=1; i<size; i++) {
3447       mend = browners[i+1] - browners[i];
3448       if (i == size-1) mend = mend - extra_rows;
3449       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3450       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3451       /* calculate the number of nonzeros on each processor */
3452       for (j=0; j<browners[i+1]-browners[i]; j++) {
3453         procsnz[i] += rowlengths[j];
3454       }
3455       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3456     }
3457     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3458   } else {
3459     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3460   }
3461 
3462   if (!rank) {
3463     /* determine max buffer needed and allocate it */
3464     maxnz = procsnz[0];
3465     for (i=1; i<size; i++) {
3466       maxnz = PetscMax(maxnz,procsnz[i]);
3467     }
3468     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3469 
3470     /* read in my part of the matrix column indices  */
3471     nz     = procsnz[0];
3472     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3473     mycols = ibuf;
3474     if (size == 1)  nz -= extra_rows;
3475     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3476     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
3477 
3478     /* read in every ones (except the last) and ship off */
3479     for (i=1; i<size-1; i++) {
3480       nz   = procsnz[i];
3481       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3482       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3483     }
3484     /* read in the stuff for the last proc */
3485     if (size != 1) {
3486       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3487       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3488       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3489       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3490     }
3491     ierr = PetscFree(cols);CHKERRQ(ierr);
3492   } else {
3493     /* determine buffer space needed for message */
3494     nz = 0;
3495     for (i=0; i<m; i++) {
3496       nz += locrowlens[i];
3497     }
3498     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3499     mycols = ibuf;
3500     /* receive message of column indices*/
3501     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3502     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3503     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3504   }
3505 
3506   /* loop over local rows, determining number of off diagonal entries */
3507   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
3508   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
3509   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3510   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3511   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3512   rowcount = 0; nzcount = 0;
3513   for (i=0; i<mbs; i++) {
3514     dcount  = 0;
3515     odcount = 0;
3516     for (j=0; j<bs; j++) {
3517       kmax = locrowlens[rowcount];
3518       for (k=0; k<kmax; k++) {
3519         tmp = mycols[nzcount++]/bs;
3520         if (!mask[tmp]) {
3521           mask[tmp] = 1;
3522           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3523           else masked1[dcount++] = tmp;
3524         }
3525       }
3526       rowcount++;
3527     }
3528 
3529     dlens[i]  = dcount;
3530     odlens[i] = odcount;
3531 
3532     /* zero out the mask elements we set */
3533     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3534     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3535   }
3536 
3537   /* create our matrix */
3538   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
3539   ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3540   ierr = MatSetType(A,type);CHKERRQ(ierr)
3541   ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3542 
3543   if (!rank) {
3544     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3545     /* read in my part of the matrix numerical values  */
3546     nz = procsnz[0];
3547     vals = buf;
3548     mycols = ibuf;
3549     if (size == 1)  nz -= extra_rows;
3550     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3551     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
3552 
3553     /* insert into matrix */
3554     jj      = rstart*bs;
3555     for (i=0; i<m; i++) {
3556       ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3557       mycols += locrowlens[i];
3558       vals   += locrowlens[i];
3559       jj++;
3560     }
3561     /* read in other processors (except the last one) and ship out */
3562     for (i=1; i<size-1; i++) {
3563       nz   = procsnz[i];
3564       vals = buf;
3565       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3566       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
3567     }
3568     /* the last proc */
3569     if (size != 1){
3570       nz   = procsnz[i] - extra_rows;
3571       vals = buf;
3572       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3573       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3574       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
3575     }
3576     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3577   } else {
3578     /* receive numeric values */
3579     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3580 
3581     /* receive message of values*/
3582     vals   = buf;
3583     mycols = ibuf;
3584     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
3585     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
3586     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3587 
3588     /* insert into matrix */
3589     jj      = rstart*bs;
3590     for (i=0; i<m; i++) {
3591       ierr    = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3592       mycols += locrowlens[i];
3593       vals   += locrowlens[i];
3594       jj++;
3595     }
3596   }
3597   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3598   ierr = PetscFree(buf);CHKERRQ(ierr);
3599   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3600   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3601   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3602   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3603   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3604   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3605 
3606   *newmat = A;
3607   PetscFunctionReturn(0);
3608 }
3609 
3610 #undef __FUNCT__
3611 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3612 /*@
3613    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3614 
3615    Input Parameters:
3616 .  mat  - the matrix
3617 .  fact - factor
3618 
3619    Collective on Mat
3620 
3621    Level: advanced
3622 
3623   Notes:
3624    This can also be set by the command line option: -mat_use_hash_table <fact>
3625 
3626 .keywords: matrix, hashtable, factor, HT
3627 
3628 .seealso: MatSetOption()
3629 @*/
3630 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3631 {
3632   PetscErrorCode ierr,(*f)(Mat,PetscReal);
3633 
3634   PetscFunctionBegin;
3635   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
3636   if (f) {
3637     ierr = (*f)(mat,fact);CHKERRQ(ierr);
3638   }
3639   PetscFunctionReturn(0);
3640 }
3641 
3642 EXTERN_C_BEGIN
3643 #undef __FUNCT__
3644 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3645 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3646 {
3647   Mat_MPIBAIJ *baij;
3648 
3649   PetscFunctionBegin;
3650   baij = (Mat_MPIBAIJ*)mat->data;
3651   baij->ht_fact = fact;
3652   PetscFunctionReturn(0);
3653 }
3654 EXTERN_C_END
3655 
3656 #undef __FUNCT__
3657 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3658 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3659 {
3660   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3661   PetscFunctionBegin;
3662   *Ad     = a->A;
3663   *Ao     = a->B;
3664   *colmap = a->garray;
3665   PetscFunctionReturn(0);
3666 }
3667 
3668 /*
3669     Special version for direct calls from Fortran (to eliminate two function call overheads
3670 */
3671 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3672 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3673 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3674 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3675 #endif
3676 
3677 #undef __FUNCT__
3678 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3679 /*@C
3680   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3681 
3682   Collective on Mat
3683 
3684   Input Parameters:
3685 + mat - the matrix
3686 . min - number of input rows
3687 . im - input rows
3688 . nin - number of input columns
3689 . in - input columns
3690 . v - numerical values input
3691 - addvin - INSERT_VALUES or ADD_VALUES
3692 
3693   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3694 
3695   Level: advanced
3696 
3697 .seealso:   MatSetValuesBlocked()
3698 @*/
3699 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3700 {
3701   /* convert input arguments to C version */
3702   Mat             mat = *matin;
3703   PetscInt        m = *min, n = *nin;
3704   InsertMode      addv = *addvin;
3705 
3706   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3707   const MatScalar *value;
3708   MatScalar       *barray=baij->barray;
3709   PetscTruth      roworiented = baij->roworiented;
3710   PetscErrorCode  ierr;
3711   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3712   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3713   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3714 
3715   PetscFunctionBegin;
3716   /* tasks normally handled by MatSetValuesBlocked() */
3717   if (mat->insertmode == NOT_SET_VALUES) {
3718     mat->insertmode = addv;
3719   }
3720 #if defined(PETSC_USE_DEBUG)
3721   else if (mat->insertmode != addv) {
3722     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3723   }
3724   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3725 #endif
3726   if (mat->assembled) {
3727     mat->was_assembled = PETSC_TRUE;
3728     mat->assembled     = PETSC_FALSE;
3729   }
3730   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3731 
3732 
3733   if(!barray) {
3734     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
3735     baij->barray = barray;
3736   }
3737 
3738   if (roworiented) {
3739     stepval = (n-1)*bs;
3740   } else {
3741     stepval = (m-1)*bs;
3742   }
3743   for (i=0; i<m; i++) {
3744     if (im[i] < 0) continue;
3745 #if defined(PETSC_USE_DEBUG)
3746     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3747 #endif
3748     if (im[i] >= rstart && im[i] < rend) {
3749       row = im[i] - rstart;
3750       for (j=0; j<n; j++) {
3751         /* If NumCol = 1 then a copy is not required */
3752         if ((roworiented) && (n == 1)) {
3753           barray = (MatScalar*)v + i*bs2;
3754         } else if((!roworiented) && (m == 1)) {
3755           barray = (MatScalar*)v + j*bs2;
3756         } else { /* Here a copy is required */
3757           if (roworiented) {
3758             value = v + i*(stepval+bs)*bs + j*bs;
3759           } else {
3760             value = v + j*(stepval+bs)*bs + i*bs;
3761           }
3762           for (ii=0; ii<bs; ii++,value+=stepval) {
3763             for (jj=0; jj<bs; jj++) {
3764               *barray++  = *value++;
3765             }
3766           }
3767           barray -=bs2;
3768         }
3769 
3770         if (in[j] >= cstart && in[j] < cend){
3771           col  = in[j] - cstart;
3772           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3773         }
3774         else if (in[j] < 0) continue;
3775 #if defined(PETSC_USE_DEBUG)
3776         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
3777 #endif
3778         else {
3779           if (mat->was_assembled) {
3780             if (!baij->colmap) {
3781               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3782             }
3783 
3784 #if defined(PETSC_USE_DEBUG)
3785 #if defined (PETSC_USE_CTABLE)
3786             { PetscInt data;
3787               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3788               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
3789             }
3790 #else
3791             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
3792 #endif
3793 #endif
3794 #if defined (PETSC_USE_CTABLE)
3795 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3796             col  = (col - 1)/bs;
3797 #else
3798             col = (baij->colmap[in[j]] - 1)/bs;
3799 #endif
3800             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3801               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3802               col =  in[j];
3803             }
3804           }
3805           else col = in[j];
3806           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3807         }
3808       }
3809     } else {
3810       if (!baij->donotstash) {
3811         if (roworiented) {
3812           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3813         } else {
3814           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3815         }
3816       }
3817     }
3818   }
3819 
3820   /* task normally handled by MatSetValuesBlocked() */
3821   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3822   PetscFunctionReturn(0);
3823 }
3824