xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision e7e72b3d0edcd0d15e7f68c03be08666507fc872)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4 #include "petscblaslapack.h"
5 
6 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
7 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
8 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
9 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
10 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
11 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
12 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
13 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
14 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
15 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar);
16 
17 #undef __FUNCT__
18 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
19 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
20 {
21   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
22   PetscErrorCode ierr;
23   PetscInt       i,*idxb = 0;
24   PetscScalar    *va,*vb;
25   Vec            vtmp;
26 
27   PetscFunctionBegin;
28   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
29   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
30   if (idx) {
31     for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;}
32   }
33 
34   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
35   if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
36   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
37   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
38 
39   for (i=0; i<A->rmap->n; i++){
40     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);}
41   }
42 
43   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
44   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
45   if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);}
46   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
47   PetscFunctionReturn(0);
48 }
49 
50 EXTERN_C_BEGIN
51 #undef __FUNCT__
52 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
53 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat)
54 {
55   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
56   PetscErrorCode ierr;
57 
58   PetscFunctionBegin;
59   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
60   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
61   PetscFunctionReturn(0);
62 }
63 EXTERN_C_END
64 
65 EXTERN_C_BEGIN
66 #undef __FUNCT__
67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
68 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat)
69 {
70   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
71   PetscErrorCode ierr;
72 
73   PetscFunctionBegin;
74   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
75   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
76   PetscFunctionReturn(0);
77 }
78 EXTERN_C_END
79 
80 /*
81      Local utility routine that creates a mapping from the global column
82    number to the local number in the off-diagonal part of the local
83    storage of the matrix.  This is done in a non scalable way since the
84    length of colmap equals the global matrix length.
85 */
86 #undef __FUNCT__
87 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
88 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
89 {
90   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
91   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
92   PetscErrorCode ierr;
93   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
94 
95   PetscFunctionBegin;
96 #if defined (PETSC_USE_CTABLE)
97   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
98   for (i=0; i<nbs; i++){
99     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
100   }
101 #else
102   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
103   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
104   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
105   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
106 #endif
107   PetscFunctionReturn(0);
108 }
109 
110 #define CHUNKSIZE  10
111 
112 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
113 { \
114  \
115     brow = row/bs;  \
116     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
117     rmax = aimax[brow]; nrow = ailen[brow]; \
118       bcol = col/bs; \
119       ridx = row % bs; cidx = col % bs; \
120       low = 0; high = nrow; \
121       while (high-low > 3) { \
122         t = (low+high)/2; \
123         if (rp[t] > bcol) high = t; \
124         else              low  = t; \
125       } \
126       for (_i=low; _i<high; _i++) { \
127         if (rp[_i] > bcol) break; \
128         if (rp[_i] == bcol) { \
129           bap  = ap +  bs2*_i + bs*cidx + ridx; \
130           if (addv == ADD_VALUES) *bap += value;  \
131           else                    *bap  = value;  \
132           goto a_noinsert; \
133         } \
134       } \
135       if (a->nonew == 1) goto a_noinsert; \
136       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
137       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
138       N = nrow++ - 1;  \
139       /* shift up all the later entries in this row */ \
140       for (ii=N; ii>=_i; ii--) { \
141         rp[ii+1] = rp[ii]; \
142         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
143       } \
144       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
145       rp[_i]                      = bcol;  \
146       ap[bs2*_i + bs*cidx + ridx] = value;  \
147       a_noinsert:; \
148     ailen[brow] = nrow; \
149 }
150 
151 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
152 { \
153     brow = row/bs;  \
154     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
155     rmax = bimax[brow]; nrow = bilen[brow]; \
156       bcol = col/bs; \
157       ridx = row % bs; cidx = col % bs; \
158       low = 0; high = nrow; \
159       while (high-low > 3) { \
160         t = (low+high)/2; \
161         if (rp[t] > bcol) high = t; \
162         else              low  = t; \
163       } \
164       for (_i=low; _i<high; _i++) { \
165         if (rp[_i] > bcol) break; \
166         if (rp[_i] == bcol) { \
167           bap  = ap +  bs2*_i + bs*cidx + ridx; \
168           if (addv == ADD_VALUES) *bap += value;  \
169           else                    *bap  = value;  \
170           goto b_noinsert; \
171         } \
172       } \
173       if (b->nonew == 1) goto b_noinsert; \
174       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
175       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
176       CHKMEMQ;\
177       N = nrow++ - 1;  \
178       /* shift up all the later entries in this row */ \
179       for (ii=N; ii>=_i; ii--) { \
180         rp[ii+1] = rp[ii]; \
181         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
182       } \
183       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
184       rp[_i]                      = bcol;  \
185       ap[bs2*_i + bs*cidx + ridx] = value;  \
186       b_noinsert:; \
187     bilen[brow] = nrow; \
188 }
189 
190 #undef __FUNCT__
191 #define __FUNCT__ "MatSetValues_MPIBAIJ"
192 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
193 {
194   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
195   MatScalar      value;
196   PetscTruth     roworiented = baij->roworiented;
197   PetscErrorCode ierr;
198   PetscInt       i,j,row,col;
199   PetscInt       rstart_orig=mat->rmap->rstart;
200   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
201   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
202 
203   /* Some Variables required in the macro */
204   Mat            A = baij->A;
205   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
206   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
207   MatScalar      *aa=a->a;
208 
209   Mat            B = baij->B;
210   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
211   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
212   MatScalar      *ba=b->a;
213 
214   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
215   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
216   MatScalar      *ap,*bap;
217 
218   PetscFunctionBegin;
219   if (v) PetscValidScalarPointer(v,6);
220   for (i=0; i<m; i++) {
221     if (im[i] < 0) continue;
222 #if defined(PETSC_USE_DEBUG)
223     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
224 #endif
225     if (im[i] >= rstart_orig && im[i] < rend_orig) {
226       row = im[i] - rstart_orig;
227       for (j=0; j<n; j++) {
228         if (in[j] >= cstart_orig && in[j] < cend_orig){
229           col = in[j] - cstart_orig;
230           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
231           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
232           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
233         } else if (in[j] < 0) continue;
234 #if defined(PETSC_USE_DEBUG)
235         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap->N-1);}
236 #endif
237         else {
238           if (mat->was_assembled) {
239             if (!baij->colmap) {
240               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
241             }
242 #if defined (PETSC_USE_CTABLE)
243             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
244             col  = col - 1;
245 #else
246             col = baij->colmap[in[j]/bs] - 1;
247 #endif
248             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
249               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
250               col =  in[j];
251               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
252               B = baij->B;
253               b = (Mat_SeqBAIJ*)(B)->data;
254               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
255               ba=b->a;
256             } else col += in[j]%bs;
257           } else col = in[j];
258           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
259           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
260           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
261         }
262       }
263     } else {
264       if (!baij->donotstash) {
265         if (roworiented) {
266           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
267         } else {
268           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
269         }
270       }
271     }
272   }
273   PetscFunctionReturn(0);
274 }
275 
276 #undef __FUNCT__
277 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
278 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
279 {
280   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
281   const PetscScalar *value;
282   MatScalar         *barray=baij->barray;
283   PetscTruth        roworiented = baij->roworiented;
284   PetscErrorCode    ierr;
285   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
286   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
287   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
288 
289   PetscFunctionBegin;
290   if(!barray) {
291     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
292     baij->barray = barray;
293   }
294 
295   if (roworiented) {
296     stepval = (n-1)*bs;
297   } else {
298     stepval = (m-1)*bs;
299   }
300   for (i=0; i<m; i++) {
301     if (im[i] < 0) continue;
302 #if defined(PETSC_USE_DEBUG)
303     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
304 #endif
305     if (im[i] >= rstart && im[i] < rend) {
306       row = im[i] - rstart;
307       for (j=0; j<n; j++) {
308         /* If NumCol = 1 then a copy is not required */
309         if ((roworiented) && (n == 1)) {
310           barray = (MatScalar*)v + i*bs2;
311         } else if((!roworiented) && (m == 1)) {
312           barray = (MatScalar*)v + j*bs2;
313         } else { /* Here a copy is required */
314           if (roworiented) {
315             value = v + i*(stepval+bs)*bs + j*bs;
316           } else {
317             value = v + j*(stepval+bs)*bs + i*bs;
318           }
319           for (ii=0; ii<bs; ii++,value+=stepval) {
320             for (jj=0; jj<bs; jj++) {
321               *barray++  = *value++;
322             }
323           }
324           barray -=bs2;
325         }
326 
327         if (in[j] >= cstart && in[j] < cend){
328           col  = in[j] - cstart;
329           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
330         }
331         else if (in[j] < 0) continue;
332 #if defined(PETSC_USE_DEBUG)
333         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
334 #endif
335         else {
336           if (mat->was_assembled) {
337             if (!baij->colmap) {
338               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
339             }
340 
341 #if defined(PETSC_USE_DEBUG)
342 #if defined (PETSC_USE_CTABLE)
343             { PetscInt data;
344               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
345               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
346             }
347 #else
348             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
349 #endif
350 #endif
351 #if defined (PETSC_USE_CTABLE)
352 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
353             col  = (col - 1)/bs;
354 #else
355             col = (baij->colmap[in[j]] - 1)/bs;
356 #endif
357             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
358               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
359               col =  in[j];
360             }
361           }
362           else col = in[j];
363           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
364         }
365       }
366     } else {
367       if (!baij->donotstash) {
368         if (roworiented) {
369           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
370         } else {
371           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
372         }
373       }
374     }
375   }
376   PetscFunctionReturn(0);
377 }
378 
379 #define HASH_KEY 0.6180339887
380 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
381 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
382 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
383 #undef __FUNCT__
384 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
385 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
386 {
387   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
388   PetscTruth     roworiented = baij->roworiented;
389   PetscErrorCode ierr;
390   PetscInt       i,j,row,col;
391   PetscInt       rstart_orig=mat->rmap->rstart;
392   PetscInt       rend_orig=mat->rmap->rend,Nbs=baij->Nbs;
393   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
394   PetscReal      tmp;
395   MatScalar      **HD = baij->hd,value;
396 #if defined(PETSC_USE_DEBUG)
397   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
398 #endif
399 
400   PetscFunctionBegin;
401   if (v) PetscValidScalarPointer(v,6);
402   for (i=0; i<m; i++) {
403 #if defined(PETSC_USE_DEBUG)
404     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
405     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
406 #endif
407       row = im[i];
408     if (row >= rstart_orig && row < rend_orig) {
409       for (j=0; j<n; j++) {
410         col = in[j];
411         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
412         /* Look up PetscInto the Hash Table */
413         key = (row/bs)*Nbs+(col/bs)+1;
414         h1  = HASH(size,key,tmp);
415 
416 
417         idx = h1;
418 #if defined(PETSC_USE_DEBUG)
419         insert_ct++;
420         total_ct++;
421         if (HT[idx] != key) {
422           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
423           if (idx == size) {
424             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
425             if (idx == h1) {
426               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
427             }
428           }
429         }
430 #else
431         if (HT[idx] != key) {
432           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
433           if (idx == size) {
434             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
435             if (idx == h1) {
436               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
437             }
438           }
439         }
440 #endif
441         /* A HASH table entry is found, so insert the values at the correct address */
442         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
443         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
444       }
445     } else {
446       if (!baij->donotstash) {
447         if (roworiented) {
448           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
449         } else {
450           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
451         }
452       }
453     }
454   }
455 #if defined(PETSC_USE_DEBUG)
456   baij->ht_total_ct = total_ct;
457   baij->ht_insert_ct = insert_ct;
458 #endif
459   PetscFunctionReturn(0);
460 }
461 
462 #undef __FUNCT__
463 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
464 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
465 {
466   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
467   PetscTruth        roworiented = baij->roworiented;
468   PetscErrorCode    ierr;
469   PetscInt          i,j,ii,jj,row,col;
470   PetscInt          rstart=baij->rstartbs;
471   PetscInt          rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
472   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
473   PetscReal         tmp;
474   MatScalar         **HD = baij->hd,*baij_a;
475   const PetscScalar *v_t,*value;
476 #if defined(PETSC_USE_DEBUG)
477   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
478 #endif
479 
480   PetscFunctionBegin;
481 
482   if (roworiented) {
483     stepval = (n-1)*bs;
484   } else {
485     stepval = (m-1)*bs;
486   }
487   for (i=0; i<m; i++) {
488 #if defined(PETSC_USE_DEBUG)
489     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
490     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
491 #endif
492     row   = im[i];
493     v_t   = v + i*nbs2;
494     if (row >= rstart && row < rend) {
495       for (j=0; j<n; j++) {
496         col = in[j];
497 
498         /* Look up into the Hash Table */
499         key = row*Nbs+col+1;
500         h1  = HASH(size,key,tmp);
501 
502         idx = h1;
503 #if defined(PETSC_USE_DEBUG)
504         total_ct++;
505         insert_ct++;
506        if (HT[idx] != key) {
507           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
508           if (idx == size) {
509             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
510             if (idx == h1) {
511               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
512             }
513           }
514         }
515 #else
516         if (HT[idx] != key) {
517           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
518           if (idx == size) {
519             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
520             if (idx == h1) {
521               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
522             }
523           }
524         }
525 #endif
526         baij_a = HD[idx];
527         if (roworiented) {
528           /*value = v + i*(stepval+bs)*bs + j*bs;*/
529           /* value = v + (i*(stepval+bs)+j)*bs; */
530           value = v_t;
531           v_t  += bs;
532           if (addv == ADD_VALUES) {
533             for (ii=0; ii<bs; ii++,value+=stepval) {
534               for (jj=ii; jj<bs2; jj+=bs) {
535                 baij_a[jj]  += *value++;
536               }
537             }
538           } else {
539             for (ii=0; ii<bs; ii++,value+=stepval) {
540               for (jj=ii; jj<bs2; jj+=bs) {
541                 baij_a[jj]  = *value++;
542               }
543             }
544           }
545         } else {
546           value = v + j*(stepval+bs)*bs + i*bs;
547           if (addv == ADD_VALUES) {
548             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
549               for (jj=0; jj<bs; jj++) {
550                 baij_a[jj]  += *value++;
551               }
552             }
553           } else {
554             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
555               for (jj=0; jj<bs; jj++) {
556                 baij_a[jj]  = *value++;
557               }
558             }
559           }
560         }
561       }
562     } else {
563       if (!baij->donotstash) {
564         if (roworiented) {
565           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
566         } else {
567           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
568         }
569       }
570     }
571   }
572 #if defined(PETSC_USE_DEBUG)
573   baij->ht_total_ct = total_ct;
574   baij->ht_insert_ct = insert_ct;
575 #endif
576   PetscFunctionReturn(0);
577 }
578 
579 #undef __FUNCT__
580 #define __FUNCT__ "MatGetValues_MPIBAIJ"
581 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
582 {
583   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
584   PetscErrorCode ierr;
585   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
586   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
587 
588   PetscFunctionBegin;
589   for (i=0; i<m; i++) {
590     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
591     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
592     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
593       row = idxm[i] - bsrstart;
594       for (j=0; j<n; j++) {
595         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
596         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
597         if (idxn[j] >= bscstart && idxn[j] < bscend){
598           col = idxn[j] - bscstart;
599           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
600         } else {
601           if (!baij->colmap) {
602             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
603           }
604 #if defined (PETSC_USE_CTABLE)
605           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
606           data --;
607 #else
608           data = baij->colmap[idxn[j]/bs]-1;
609 #endif
610           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
611           else {
612             col  = data + idxn[j]%bs;
613             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
614           }
615         }
616       }
617     } else {
618       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
619     }
620   }
621  PetscFunctionReturn(0);
622 }
623 
624 #undef __FUNCT__
625 #define __FUNCT__ "MatNorm_MPIBAIJ"
626 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
627 {
628   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
629   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
630   PetscErrorCode ierr;
631   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
632   PetscReal      sum = 0.0;
633   MatScalar      *v;
634 
635   PetscFunctionBegin;
636   if (baij->size == 1) {
637     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
638   } else {
639     if (type == NORM_FROBENIUS) {
640       v = amat->a;
641       nz = amat->nz*bs2;
642       for (i=0; i<nz; i++) {
643 #if defined(PETSC_USE_COMPLEX)
644         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
645 #else
646         sum += (*v)*(*v); v++;
647 #endif
648       }
649       v = bmat->a;
650       nz = bmat->nz*bs2;
651       for (i=0; i<nz; i++) {
652 #if defined(PETSC_USE_COMPLEX)
653         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
654 #else
655         sum += (*v)*(*v); v++;
656 #endif
657       }
658       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
659       *nrm = sqrt(*nrm);
660     } else if (type == NORM_1) { /* max column sum */
661       PetscReal *tmp,*tmp2;
662       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
663       ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
664       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
665       v = amat->a; jj = amat->j;
666       for (i=0; i<amat->nz; i++) {
667         for (j=0; j<bs; j++){
668           col = bs*(cstart + *jj) + j; /* column index */
669           for (row=0; row<bs; row++){
670             tmp[col] += PetscAbsScalar(*v);  v++;
671           }
672         }
673         jj++;
674       }
675       v = bmat->a; jj = bmat->j;
676       for (i=0; i<bmat->nz; i++) {
677         for (j=0; j<bs; j++){
678           col = bs*garray[*jj] + j;
679           for (row=0; row<bs; row++){
680             tmp[col] += PetscAbsScalar(*v); v++;
681           }
682         }
683         jj++;
684       }
685       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
686       *nrm = 0.0;
687       for (j=0; j<mat->cmap->N; j++) {
688         if (tmp2[j] > *nrm) *nrm = tmp2[j];
689       }
690       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
691     } else if (type == NORM_INFINITY) { /* max row sum */
692       PetscReal *sums;
693       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr)
694       sum = 0.0;
695       for (j=0; j<amat->mbs; j++) {
696         for (row=0; row<bs; row++) sums[row] = 0.0;
697         v = amat->a + bs2*amat->i[j];
698         nz = amat->i[j+1]-amat->i[j];
699         for (i=0; i<nz; i++) {
700           for (col=0; col<bs; col++){
701             for (row=0; row<bs; row++){
702               sums[row] += PetscAbsScalar(*v); v++;
703             }
704           }
705         }
706         v = bmat->a + bs2*bmat->i[j];
707         nz = bmat->i[j+1]-bmat->i[j];
708         for (i=0; i<nz; i++) {
709           for (col=0; col<bs; col++){
710             for (row=0; row<bs; row++){
711               sums[row] += PetscAbsScalar(*v); v++;
712             }
713           }
714         }
715         for (row=0; row<bs; row++){
716           if (sums[row] > sum) sum = sums[row];
717         }
718       }
719       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
720       ierr = PetscFree(sums);CHKERRQ(ierr);
721     } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet");
722   }
723   PetscFunctionReturn(0);
724 }
725 
726 /*
727   Creates the hash table, and sets the table
728   This table is created only once.
729   If new entried need to be added to the matrix
730   then the hash table has to be destroyed and
731   recreated.
732 */
733 #undef __FUNCT__
734 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
735 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
736 {
737   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
738   Mat            A = baij->A,B=baij->B;
739   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
740   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
741   PetscErrorCode ierr;
742   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
743   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
744   PetscInt       *HT,key;
745   MatScalar      **HD;
746   PetscReal      tmp;
747 #if defined(PETSC_USE_INFO)
748   PetscInt       ct=0,max=0;
749 #endif
750 
751   PetscFunctionBegin;
752   if (baij->ht) PetscFunctionReturn(0);
753 
754   baij->ht_size = (PetscInt)(factor*nz);
755   ht_size       = baij->ht_size;
756 
757   /* Allocate Memory for Hash Table */
758   ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr);
759   ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr);
760   ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr);
761   HD   = baij->hd;
762   HT   = baij->ht;
763 
764   /* Loop Over A */
765   for (i=0; i<a->mbs; i++) {
766     for (j=ai[i]; j<ai[i+1]; j++) {
767       row = i+rstart;
768       col = aj[j]+cstart;
769 
770       key = row*Nbs + col + 1;
771       h1  = HASH(ht_size,key,tmp);
772       for (k=0; k<ht_size; k++){
773         if (!HT[(h1+k)%ht_size]) {
774           HT[(h1+k)%ht_size] = key;
775           HD[(h1+k)%ht_size] = a->a + j*bs2;
776           break;
777 #if defined(PETSC_USE_INFO)
778         } else {
779           ct++;
780 #endif
781         }
782       }
783 #if defined(PETSC_USE_INFO)
784       if (k> max) max = k;
785 #endif
786     }
787   }
788   /* Loop Over B */
789   for (i=0; i<b->mbs; i++) {
790     for (j=bi[i]; j<bi[i+1]; j++) {
791       row = i+rstart;
792       col = garray[bj[j]];
793       key = row*Nbs + col + 1;
794       h1  = HASH(ht_size,key,tmp);
795       for (k=0; k<ht_size; k++){
796         if (!HT[(h1+k)%ht_size]) {
797           HT[(h1+k)%ht_size] = key;
798           HD[(h1+k)%ht_size] = b->a + j*bs2;
799           break;
800 #if defined(PETSC_USE_INFO)
801         } else {
802           ct++;
803 #endif
804         }
805       }
806 #if defined(PETSC_USE_INFO)
807       if (k> max) max = k;
808 #endif
809     }
810   }
811 
812   /* Print Summary */
813 #if defined(PETSC_USE_INFO)
814   for (i=0,j=0; i<ht_size; i++) {
815     if (HT[i]) {j++;}
816   }
817   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
818 #endif
819   PetscFunctionReturn(0);
820 }
821 
822 #undef __FUNCT__
823 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
824 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
825 {
826   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
827   PetscErrorCode ierr;
828   PetscInt       nstash,reallocs;
829   InsertMode     addv;
830 
831   PetscFunctionBegin;
832   if (baij->donotstash) {
833     PetscFunctionReturn(0);
834   }
835 
836   /* make sure all processors are either in INSERTMODE or ADDMODE */
837   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
838   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
839   mat->insertmode = addv; /* in case this processor had no cache */
840 
841   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
842   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
843   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
844   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
845   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
846   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
847   PetscFunctionReturn(0);
848 }
849 
850 #undef __FUNCT__
851 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
852 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
853 {
854   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
855   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
856   PetscErrorCode ierr;
857   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
858   PetscInt       *row,*col;
859   PetscTruth     r1,r2,r3,other_disassembled;
860   MatScalar      *val;
861   InsertMode     addv = mat->insertmode;
862   PetscMPIInt    n;
863 
864   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
865   PetscFunctionBegin;
866   if (!baij->donotstash) {
867     while (1) {
868       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
869       if (!flg) break;
870 
871       for (i=0; i<n;) {
872         /* Now identify the consecutive vals belonging to the same row */
873         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
874         if (j < n) ncols = j-i;
875         else       ncols = n-i;
876         /* Now assemble all these values with a single function call */
877         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
878         i = j;
879       }
880     }
881     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
882     /* Now process the block-stash. Since the values are stashed column-oriented,
883        set the roworiented flag to column oriented, and after MatSetValues()
884        restore the original flags */
885     r1 = baij->roworiented;
886     r2 = a->roworiented;
887     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
888     baij->roworiented = PETSC_FALSE;
889     a->roworiented    = PETSC_FALSE;
890     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
891     while (1) {
892       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
893       if (!flg) break;
894 
895       for (i=0; i<n;) {
896         /* Now identify the consecutive vals belonging to the same row */
897         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
898         if (j < n) ncols = j-i;
899         else       ncols = n-i;
900         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
901         i = j;
902       }
903     }
904     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
905     baij->roworiented = r1;
906     a->roworiented    = r2;
907     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
908   }
909 
910   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
911   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
912 
913   /* determine if any processor has disassembled, if so we must
914      also disassemble ourselfs, in order that we may reassemble. */
915   /*
916      if nonzero structure of submatrix B cannot change then we know that
917      no processor disassembled thus we can skip this stuff
918   */
919   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
920     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
921     if (mat->was_assembled && !other_disassembled) {
922       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
923     }
924   }
925 
926   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
927     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
928   }
929   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
930   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
931   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
932 
933 #if defined(PETSC_USE_INFO)
934   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
935     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
936     baij->ht_total_ct  = 0;
937     baij->ht_insert_ct = 0;
938   }
939 #endif
940   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
941     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
942     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
943     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
944   }
945 
946   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
947   baij->rowvalues = 0;
948   PetscFunctionReturn(0);
949 }
950 
951 #undef __FUNCT__
952 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
953 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
954 {
955   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
956   PetscErrorCode    ierr;
957   PetscMPIInt       size = baij->size,rank = baij->rank;
958   PetscInt          bs = mat->rmap->bs;
959   PetscTruth        iascii,isdraw;
960   PetscViewer       sviewer;
961   PetscViewerFormat format;
962 
963   PetscFunctionBegin;
964   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
965   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
966   if (iascii) {
967     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
968     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
969       MatInfo info;
970       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
971       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
972       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
973               rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
974               mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
975       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
976       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
977       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
978       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
979       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
980       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
981       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
982       PetscFunctionReturn(0);
983     } else if (format == PETSC_VIEWER_ASCII_INFO) {
984       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
985       PetscFunctionReturn(0);
986     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
987       PetscFunctionReturn(0);
988     }
989   }
990 
991   if (isdraw) {
992     PetscDraw       draw;
993     PetscTruth isnull;
994     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
995     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
996   }
997 
998   if (size == 1) {
999     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1000     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1001   } else {
1002     /* assemble the entire matrix onto first processor. */
1003     Mat         A;
1004     Mat_SeqBAIJ *Aloc;
1005     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1006     MatScalar   *a;
1007 
1008     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1009     /* Perhaps this should be the type of mat? */
1010     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1011     if (!rank) {
1012       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1013     } else {
1014       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1015     }
1016     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1017     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1018     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1019 
1020     /* copy over the A part */
1021     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1022     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1023     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1024 
1025     for (i=0; i<mbs; i++) {
1026       rvals[0] = bs*(baij->rstartbs + i);
1027       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1028       for (j=ai[i]; j<ai[i+1]; j++) {
1029         col = (baij->cstartbs+aj[j])*bs;
1030         for (k=0; k<bs; k++) {
1031           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1032           col++; a += bs;
1033         }
1034       }
1035     }
1036     /* copy over the B part */
1037     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1038     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1039     for (i=0; i<mbs; i++) {
1040       rvals[0] = bs*(baij->rstartbs + i);
1041       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1042       for (j=ai[i]; j<ai[i+1]; j++) {
1043         col = baij->garray[aj[j]]*bs;
1044         for (k=0; k<bs; k++) {
1045           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1046           col++; a += bs;
1047         }
1048       }
1049     }
1050     ierr = PetscFree(rvals);CHKERRQ(ierr);
1051     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1052     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1053     /*
1054        Everyone has to call to draw the matrix since the graphics waits are
1055        synchronized across all processors that share the PetscDraw object
1056     */
1057     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1058     if (!rank) {
1059       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1060       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1061     }
1062     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1063     ierr = MatDestroy(A);CHKERRQ(ierr);
1064   }
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 #undef __FUNCT__
1069 #define __FUNCT__ "MatView_MPIBAIJ"
1070 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1071 {
1072   PetscErrorCode ierr;
1073   PetscTruth     iascii,isdraw,issocket,isbinary;
1074 
1075   PetscFunctionBegin;
1076   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1077   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1078   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1079   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1080   if (iascii || isdraw || issocket || isbinary) {
1081     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1082   } else {
1083     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1084   }
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 #undef __FUNCT__
1089 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1090 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1091 {
1092   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1093   PetscErrorCode ierr;
1094 
1095   PetscFunctionBegin;
1096 #if defined(PETSC_USE_LOG)
1097   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1098 #endif
1099   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1100   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1101   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1102   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1103 #if defined (PETSC_USE_CTABLE)
1104   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
1105 #else
1106   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1107 #endif
1108   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1109   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1110   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1111   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1112   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1113   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1114   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1115   ierr = PetscFree(baij);CHKERRQ(ierr);
1116 
1117   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1118   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1119   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1120   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1121   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1122   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1123   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1124   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1125   PetscFunctionReturn(0);
1126 }
1127 
1128 #undef __FUNCT__
1129 #define __FUNCT__ "MatMult_MPIBAIJ"
1130 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1131 {
1132   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1133   PetscErrorCode ierr;
1134   PetscInt       nt;
1135 
1136   PetscFunctionBegin;
1137   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1138   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1139   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1140   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1141   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1142   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1143   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1144   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1145   PetscFunctionReturn(0);
1146 }
1147 
1148 #undef __FUNCT__
1149 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1150 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1151 {
1152   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1153   PetscErrorCode ierr;
1154 
1155   PetscFunctionBegin;
1156   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1157   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1158   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1159   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1160   PetscFunctionReturn(0);
1161 }
1162 
1163 #undef __FUNCT__
1164 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1165 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1166 {
1167   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1168   PetscErrorCode ierr;
1169   PetscTruth     merged;
1170 
1171   PetscFunctionBegin;
1172   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1173   /* do nondiagonal part */
1174   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1175   if (!merged) {
1176     /* send it on its way */
1177     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1178     /* do local part */
1179     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1180     /* receive remote parts: note this assumes the values are not actually */
1181     /* inserted in yy until the next line */
1182     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1183   } else {
1184     /* do local part */
1185     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1186     /* send it on its way */
1187     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1188     /* values actually were received in the Begin() but we need to call this nop */
1189     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1190   }
1191   PetscFunctionReturn(0);
1192 }
1193 
1194 #undef __FUNCT__
1195 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1196 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1197 {
1198   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1199   PetscErrorCode ierr;
1200 
1201   PetscFunctionBegin;
1202   /* do nondiagonal part */
1203   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1204   /* send it on its way */
1205   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1206   /* do local part */
1207   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1208   /* receive remote parts: note this assumes the values are not actually */
1209   /* inserted in yy until the next line, which is true for my implementation*/
1210   /* but is not perhaps always true. */
1211   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1212   PetscFunctionReturn(0);
1213 }
1214 
1215 /*
1216   This only works correctly for square matrices where the subblock A->A is the
1217    diagonal block
1218 */
1219 #undef __FUNCT__
1220 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1221 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1222 {
1223   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1224   PetscErrorCode ierr;
1225 
1226   PetscFunctionBegin;
1227   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1228   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1229   PetscFunctionReturn(0);
1230 }
1231 
1232 #undef __FUNCT__
1233 #define __FUNCT__ "MatScale_MPIBAIJ"
1234 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1235 {
1236   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1237   PetscErrorCode ierr;
1238 
1239   PetscFunctionBegin;
1240   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1241   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1242   PetscFunctionReturn(0);
1243 }
1244 
1245 #undef __FUNCT__
1246 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1247 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1248 {
1249   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1250   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1251   PetscErrorCode ierr;
1252   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1253   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1254   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1255 
1256   PetscFunctionBegin;
1257   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1258   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1259   mat->getrowactive = PETSC_TRUE;
1260 
1261   if (!mat->rowvalues && (idx || v)) {
1262     /*
1263         allocate enough space to hold information from the longest row.
1264     */
1265     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1266     PetscInt     max = 1,mbs = mat->mbs,tmp;
1267     for (i=0; i<mbs; i++) {
1268       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1269       if (max < tmp) { max = tmp; }
1270     }
1271     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1272   }
1273   lrow = row - brstart;
1274 
1275   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1276   if (!v)   {pvA = 0; pvB = 0;}
1277   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1278   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1279   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1280   nztot = nzA + nzB;
1281 
1282   cmap  = mat->garray;
1283   if (v  || idx) {
1284     if (nztot) {
1285       /* Sort by increasing column numbers, assuming A and B already sorted */
1286       PetscInt imark = -1;
1287       if (v) {
1288         *v = v_p = mat->rowvalues;
1289         for (i=0; i<nzB; i++) {
1290           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1291           else break;
1292         }
1293         imark = i;
1294         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1295         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1296       }
1297       if (idx) {
1298         *idx = idx_p = mat->rowindices;
1299         if (imark > -1) {
1300           for (i=0; i<imark; i++) {
1301             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1302           }
1303         } else {
1304           for (i=0; i<nzB; i++) {
1305             if (cmap[cworkB[i]/bs] < cstart)
1306               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1307             else break;
1308           }
1309           imark = i;
1310         }
1311         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1312         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1313       }
1314     } else {
1315       if (idx) *idx = 0;
1316       if (v)   *v   = 0;
1317     }
1318   }
1319   *nz = nztot;
1320   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1321   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1322   PetscFunctionReturn(0);
1323 }
1324 
1325 #undef __FUNCT__
1326 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1327 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1328 {
1329   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1330 
1331   PetscFunctionBegin;
1332   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1333   baij->getrowactive = PETSC_FALSE;
1334   PetscFunctionReturn(0);
1335 }
1336 
1337 #undef __FUNCT__
1338 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1339 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1340 {
1341   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1342   PetscErrorCode ierr;
1343 
1344   PetscFunctionBegin;
1345   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1346   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1347   PetscFunctionReturn(0);
1348 }
1349 
1350 #undef __FUNCT__
1351 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1352 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1353 {
1354   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1355   Mat            A = a->A,B = a->B;
1356   PetscErrorCode ierr;
1357   PetscReal      isend[5],irecv[5];
1358 
1359   PetscFunctionBegin;
1360   info->block_size     = (PetscReal)matin->rmap->bs;
1361   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1362   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1363   isend[3] = info->memory;  isend[4] = info->mallocs;
1364   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1365   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1366   isend[3] += info->memory;  isend[4] += info->mallocs;
1367   if (flag == MAT_LOCAL) {
1368     info->nz_used      = isend[0];
1369     info->nz_allocated = isend[1];
1370     info->nz_unneeded  = isend[2];
1371     info->memory       = isend[3];
1372     info->mallocs      = isend[4];
1373   } else if (flag == MAT_GLOBAL_MAX) {
1374     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1375     info->nz_used      = irecv[0];
1376     info->nz_allocated = irecv[1];
1377     info->nz_unneeded  = irecv[2];
1378     info->memory       = irecv[3];
1379     info->mallocs      = irecv[4];
1380   } else if (flag == MAT_GLOBAL_SUM) {
1381     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1382     info->nz_used      = irecv[0];
1383     info->nz_allocated = irecv[1];
1384     info->nz_unneeded  = irecv[2];
1385     info->memory       = irecv[3];
1386     info->mallocs      = irecv[4];
1387   } else {
1388     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1389   }
1390   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1391   info->fill_ratio_needed = 0;
1392   info->factor_mallocs    = 0;
1393   PetscFunctionReturn(0);
1394 }
1395 
1396 #undef __FUNCT__
1397 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1398 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscTruth flg)
1399 {
1400   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1401   PetscErrorCode ierr;
1402 
1403   PetscFunctionBegin;
1404   switch (op) {
1405   case MAT_NEW_NONZERO_LOCATIONS:
1406   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1407   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1408   case MAT_KEEP_NONZERO_PATTERN:
1409   case MAT_NEW_NONZERO_LOCATION_ERR:
1410     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1411     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1412     break;
1413   case MAT_ROW_ORIENTED:
1414     a->roworiented = flg;
1415     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1416     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1417     break;
1418   case MAT_NEW_DIAGONALS:
1419     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1420     break;
1421   case MAT_IGNORE_OFF_PROC_ENTRIES:
1422     a->donotstash = flg;
1423     break;
1424   case MAT_USE_HASH_TABLE:
1425     a->ht_flag = flg;
1426     break;
1427   case MAT_SYMMETRIC:
1428   case MAT_STRUCTURALLY_SYMMETRIC:
1429   case MAT_HERMITIAN:
1430   case MAT_SYMMETRY_ETERNAL:
1431     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1432     break;
1433   default:
1434     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1435   }
1436   PetscFunctionReturn(0);
1437 }
1438 
1439 #undef __FUNCT__
1440 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1441 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1442 {
1443   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1444   Mat_SeqBAIJ    *Aloc;
1445   Mat            B;
1446   PetscErrorCode ierr;
1447   PetscInt       M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1448   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1449   MatScalar      *a;
1450 
1451   PetscFunctionBegin;
1452   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1453   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1454     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1455     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1456     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1457     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1458   } else {
1459     B = *matout;
1460   }
1461 
1462   /* copy over the A part */
1463   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1464   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1465   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1466 
1467   for (i=0; i<mbs; i++) {
1468     rvals[0] = bs*(baij->rstartbs + i);
1469     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1470     for (j=ai[i]; j<ai[i+1]; j++) {
1471       col = (baij->cstartbs+aj[j])*bs;
1472       for (k=0; k<bs; k++) {
1473         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1474         col++; a += bs;
1475       }
1476     }
1477   }
1478   /* copy over the B part */
1479   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1480   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1481   for (i=0; i<mbs; i++) {
1482     rvals[0] = bs*(baij->rstartbs + i);
1483     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1484     for (j=ai[i]; j<ai[i+1]; j++) {
1485       col = baij->garray[aj[j]]*bs;
1486       for (k=0; k<bs; k++) {
1487         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1488         col++; a += bs;
1489       }
1490     }
1491   }
1492   ierr = PetscFree(rvals);CHKERRQ(ierr);
1493   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1494   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1495 
1496   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1497     *matout = B;
1498   } else {
1499     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1500   }
1501   PetscFunctionReturn(0);
1502 }
1503 
1504 #undef __FUNCT__
1505 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1506 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1507 {
1508   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1509   Mat            a = baij->A,b = baij->B;
1510   PetscErrorCode ierr;
1511   PetscInt       s1,s2,s3;
1512 
1513   PetscFunctionBegin;
1514   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1515   if (rr) {
1516     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1517     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1518     /* Overlap communication with computation. */
1519     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1520   }
1521   if (ll) {
1522     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1523     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1524     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1525   }
1526   /* scale  the diagonal block */
1527   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1528 
1529   if (rr) {
1530     /* Do a scatter end and then right scale the off-diagonal block */
1531     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1532     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1533   }
1534 
1535   PetscFunctionReturn(0);
1536 }
1537 
1538 #undef __FUNCT__
1539 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1540 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
1541 {
1542   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1543   PetscErrorCode ierr;
1544   PetscMPIInt    imdex,size = l->size,n,rank = l->rank;
1545   PetscInt       i,*owners = A->rmap->range;
1546   PetscInt       *nprocs,j,idx,nsends,row;
1547   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
1548   PetscInt       *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1549   PetscInt       *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1550   MPI_Comm       comm = ((PetscObject)A)->comm;
1551   MPI_Request    *send_waits,*recv_waits;
1552   MPI_Status     recv_status,*send_status;
1553 #if defined(PETSC_DEBUG)
1554   PetscTruth     found = PETSC_FALSE;
1555 #endif
1556 
1557   PetscFunctionBegin;
1558   /*  first count number of contributors to each processor */
1559   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1560   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1561   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1562   j = 0;
1563   for (i=0; i<N; i++) {
1564     if (lastidx > (idx = rows[i])) j = 0;
1565     lastidx = idx;
1566     for (; j<size; j++) {
1567       if (idx >= owners[j] && idx < owners[j+1]) {
1568         nprocs[2*j]++;
1569         nprocs[2*j+1] = 1;
1570         owner[i] = j;
1571 #if defined(PETSC_DEBUG)
1572         found = PETSC_TRUE;
1573 #endif
1574         break;
1575       }
1576     }
1577 #if defined(PETSC_DEBUG)
1578     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1579     found = PETSC_FALSE;
1580 #endif
1581   }
1582   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1583 
1584   /* inform other processors of number of messages and max length*/
1585   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1586 
1587   /* post receives:   */
1588   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1589   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1590   for (i=0; i<nrecvs; i++) {
1591     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1592   }
1593 
1594   /* do sends:
1595      1) starts[i] gives the starting index in svalues for stuff going to
1596      the ith processor
1597   */
1598   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1599   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1600   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1601   starts[0]  = 0;
1602   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1603   for (i=0; i<N; i++) {
1604     svalues[starts[owner[i]]++] = rows[i];
1605   }
1606 
1607   starts[0] = 0;
1608   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1609   count = 0;
1610   for (i=0; i<size; i++) {
1611     if (nprocs[2*i+1]) {
1612       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1613     }
1614   }
1615   ierr = PetscFree(starts);CHKERRQ(ierr);
1616 
1617   base = owners[rank];
1618 
1619   /*  wait on receives */
1620   ierr   = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr);
1621   count  = nrecvs;
1622   slen = 0;
1623   while (count) {
1624     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1625     /* unpack receives into our local space */
1626     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1627     source[imdex]  = recv_status.MPI_SOURCE;
1628     lens[imdex]    = n;
1629     slen          += n;
1630     count--;
1631   }
1632   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1633 
1634   /* move the data into the send scatter */
1635   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1636   count = 0;
1637   for (i=0; i<nrecvs; i++) {
1638     values = rvalues + i*nmax;
1639     for (j=0; j<lens[i]; j++) {
1640       lrows[count++] = values[j] - base;
1641     }
1642   }
1643   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1644   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1645   ierr = PetscFree(owner);CHKERRQ(ierr);
1646   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1647 
1648   /* actually zap the local rows */
1649   /*
1650         Zero the required rows. If the "diagonal block" of the matrix
1651      is square and the user wishes to set the diagonal we use separate
1652      code so that MatSetValues() is not called for each diagonal allocating
1653      new memory, thus calling lots of mallocs and slowing things down.
1654 
1655   */
1656   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1657   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr);
1658   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1659     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr);
1660   } else if (diag != 0.0) {
1661     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1662     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1663        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1664     for (i=0; i<slen; i++) {
1665       row  = lrows[i] + rstart_bs;
1666       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1667     }
1668     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1669     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1670   } else {
1671     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1672   }
1673 
1674   ierr = PetscFree(lrows);CHKERRQ(ierr);
1675 
1676   /* wait on sends */
1677   if (nsends) {
1678     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1679     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1680     ierr = PetscFree(send_status);CHKERRQ(ierr);
1681   }
1682   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1683   ierr = PetscFree(svalues);CHKERRQ(ierr);
1684 
1685   PetscFunctionReturn(0);
1686 }
1687 
1688 #undef __FUNCT__
1689 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1690 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1691 {
1692   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1693   PetscErrorCode ierr;
1694 
1695   PetscFunctionBegin;
1696   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1697   PetscFunctionReturn(0);
1698 }
1699 
1700 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1701 
1702 #undef __FUNCT__
1703 #define __FUNCT__ "MatEqual_MPIBAIJ"
1704 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1705 {
1706   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1707   Mat            a,b,c,d;
1708   PetscTruth     flg;
1709   PetscErrorCode ierr;
1710 
1711   PetscFunctionBegin;
1712   a = matA->A; b = matA->B;
1713   c = matB->A; d = matB->B;
1714 
1715   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1716   if (flg) {
1717     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1718   }
1719   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1720   PetscFunctionReturn(0);
1721 }
1722 
1723 #undef __FUNCT__
1724 #define __FUNCT__ "MatCopy_MPIBAIJ"
1725 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1726 {
1727   PetscErrorCode ierr;
1728   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1729   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1730 
1731   PetscFunctionBegin;
1732   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1733   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1734     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1735   } else {
1736     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1737     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1738   }
1739   PetscFunctionReturn(0);
1740 }
1741 
1742 #undef __FUNCT__
1743 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1744 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1745 {
1746   PetscErrorCode ierr;
1747 
1748   PetscFunctionBegin;
1749   ierr =  MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1750   PetscFunctionReturn(0);
1751 }
1752 
1753 #undef __FUNCT__
1754 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1755 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1756 {
1757   PetscErrorCode ierr;
1758   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1759   PetscBLASInt   bnz,one=1;
1760   Mat_SeqBAIJ    *x,*y;
1761 
1762   PetscFunctionBegin;
1763   if (str == SAME_NONZERO_PATTERN) {
1764     PetscScalar alpha = a;
1765     x = (Mat_SeqBAIJ *)xx->A->data;
1766     y = (Mat_SeqBAIJ *)yy->A->data;
1767     bnz = PetscBLASIntCast(x->nz);
1768     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1769     x = (Mat_SeqBAIJ *)xx->B->data;
1770     y = (Mat_SeqBAIJ *)yy->B->data;
1771     bnz = PetscBLASIntCast(x->nz);
1772     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1773   } else {
1774     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1775   }
1776   PetscFunctionReturn(0);
1777 }
1778 
1779 #undef __FUNCT__
1780 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ"
1781 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs)
1782 {
1783   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1784   PetscInt rbs,cbs;
1785   PetscErrorCode ierr;
1786 
1787   PetscFunctionBegin;
1788   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1789   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1790   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
1791   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
1792   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
1793   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
1794   PetscFunctionReturn(0);
1795 }
1796 
1797 #undef __FUNCT__
1798 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1799 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1800 {
1801   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1802   PetscErrorCode ierr;
1803 
1804   PetscFunctionBegin;
1805   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1806   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1807   PetscFunctionReturn(0);
1808 }
1809 
1810 #undef __FUNCT__
1811 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
1812 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1813 {
1814   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1815   PetscErrorCode ierr;
1816 
1817   PetscFunctionBegin;
1818   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1819   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1820   PetscFunctionReturn(0);
1821 }
1822 
1823 #undef __FUNCT__
1824 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
1825 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1826 {
1827   PetscErrorCode ierr;
1828   IS             iscol_local;
1829   PetscInt       csize;
1830 
1831   PetscFunctionBegin;
1832   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1833   if (call == MAT_REUSE_MATRIX) {
1834     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1835     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1836   } else {
1837     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1838   }
1839   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1840   if (call == MAT_INITIAL_MATRIX) {
1841     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1842     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
1843   }
1844   PetscFunctionReturn(0);
1845 }
1846 
1847 #undef __FUNCT__
1848 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
1849 /*
1850     Not great since it makes two copies of the submatrix, first an SeqBAIJ
1851   in local and then by concatenating the local matrices the end result.
1852   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
1853 */
1854 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
1855 {
1856   PetscErrorCode ierr;
1857   PetscMPIInt    rank,size;
1858   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
1859   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
1860   Mat            *local,M,Mreuse;
1861   MatScalar      *vwork,*aa;
1862   MPI_Comm       comm = ((PetscObject)mat)->comm;
1863   Mat_SeqBAIJ    *aij;
1864 
1865 
1866   PetscFunctionBegin;
1867   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1868   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1869 
1870   if (call ==  MAT_REUSE_MATRIX) {
1871     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
1872     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1873     local = &Mreuse;
1874     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
1875   } else {
1876     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
1877     Mreuse = *local;
1878     ierr   = PetscFree(local);CHKERRQ(ierr);
1879   }
1880 
1881   /*
1882       m - number of local rows
1883       n - number of columns (same on all processors)
1884       rstart - first row in new global matrix generated
1885   */
1886   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
1887   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1888   m    = m/bs;
1889   n    = n/bs;
1890 
1891   if (call == MAT_INITIAL_MATRIX) {
1892     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
1893     ii  = aij->i;
1894     jj  = aij->j;
1895 
1896     /*
1897         Determine the number of non-zeros in the diagonal and off-diagonal
1898         portions of the matrix in order to do correct preallocation
1899     */
1900 
1901     /* first get start and end of "diagonal" columns */
1902     if (csize == PETSC_DECIDE) {
1903       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
1904       if (mglobal == n*bs) { /* square matrix */
1905 	nlocal = m;
1906       } else {
1907         nlocal = n/size + ((n % size) > rank);
1908       }
1909     } else {
1910       nlocal = csize/bs;
1911     }
1912     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1913     rstart = rend - nlocal;
1914     if (rank == size - 1 && rend != n) {
1915       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
1916     }
1917 
1918     /* next, compute all the lengths */
1919     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
1920     olens = dlens + m;
1921     for (i=0; i<m; i++) {
1922       jend = ii[i+1] - ii[i];
1923       olen = 0;
1924       dlen = 0;
1925       for (j=0; j<jend; j++) {
1926         if (*jj < rstart || *jj >= rend) olen++;
1927         else dlen++;
1928         jj++;
1929       }
1930       olens[i] = olen;
1931       dlens[i] = dlen;
1932     }
1933     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
1934     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
1935     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
1936     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
1937     ierr = PetscFree(dlens);CHKERRQ(ierr);
1938   } else {
1939     PetscInt ml,nl;
1940 
1941     M = *newmat;
1942     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
1943     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
1944     ierr = MatZeroEntries(M);CHKERRQ(ierr);
1945     /*
1946          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
1947        rather than the slower MatSetValues().
1948     */
1949     M->was_assembled = PETSC_TRUE;
1950     M->assembled     = PETSC_FALSE;
1951   }
1952   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1953   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
1954   aij = (Mat_SeqBAIJ*)(Mreuse)->data;
1955   ii  = aij->i;
1956   jj  = aij->j;
1957   aa  = aij->a;
1958   for (i=0; i<m; i++) {
1959     row   = rstart/bs + i;
1960     nz    = ii[i+1] - ii[i];
1961     cwork = jj;     jj += nz;
1962     vwork = aa;     aa += nz;
1963     ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
1964   }
1965 
1966   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1967   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1968   *newmat = M;
1969 
1970   /* save submatrix used in processor for next request */
1971   if (call ==  MAT_INITIAL_MATRIX) {
1972     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
1973     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
1974   }
1975 
1976   PetscFunctionReturn(0);
1977 }
1978 
1979 #undef __FUNCT__
1980 #define __FUNCT__ "MatPermute_MPIBAIJ"
1981 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
1982 {
1983   MPI_Comm       comm,pcomm;
1984   PetscInt       first,local_size,nrows;
1985   const PetscInt *rows;
1986   PetscMPIInt    size;
1987   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1988   PetscErrorCode ierr;
1989 
1990   PetscFunctionBegin;
1991   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1992   /* make a collective version of 'rowp' */
1993   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1994   if (pcomm==comm) {
1995     crowp = rowp;
1996   } else {
1997     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1998     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1999     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
2000     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2001   }
2002   /* collect the global row permutation and invert it */
2003   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
2004   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
2005   if (pcomm!=comm) {
2006     ierr = ISDestroy(crowp);CHKERRQ(ierr);
2007   }
2008   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
2009   /* get the local target indices */
2010   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
2011   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
2012   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
2013   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
2014   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
2015   ierr = ISDestroy(irowp);CHKERRQ(ierr);
2016   /* the column permutation is so much easier;
2017      make a local version of 'colp' and invert it */
2018   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2019   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2020   if (size==1) {
2021     lcolp = colp;
2022   } else {
2023     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
2024     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
2025     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
2026   }
2027   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2028   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
2029   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
2030   if (size>1) {
2031     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
2032     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
2033   }
2034   /* now we just get the submatrix */
2035   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2036   /* clean up */
2037   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
2038   ierr = ISDestroy(icolp);CHKERRQ(ierr);
2039   PetscFunctionReturn(0);
2040 }
2041 
2042 #undef __FUNCT__
2043 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2044 PetscErrorCode PETSCMAT_DLLEXPORT MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2045 {
2046   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*) mat->data;
2047   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
2048 
2049   PetscFunctionBegin;
2050   if (nghosts) { *nghosts = B->nbs;}
2051   if (ghosts) {*ghosts = baij->garray;}
2052   PetscFunctionReturn(0);
2053 }
2054 
2055 EXTERN PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat);
2056 
2057 #undef __FUNCT__
2058 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ"
2059 /*
2060     This routine is almost identical to MatFDColoringCreate_MPIBAIJ()!
2061 */
2062 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
2063 {
2064   Mat_MPIBAIJ            *baij = (Mat_MPIBAIJ*)mat->data;
2065   PetscErrorCode        ierr;
2066   PetscMPIInt           size,*ncolsonproc,*disp,nn;
2067   PetscInt              bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col;
2068   const PetscInt        *is;
2069   PetscInt              nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj;
2070   PetscInt              *rowhit,M,cstart,cend,colb;
2071   PetscInt              *columnsforrow,l;
2072   IS                    *isa;
2073   PetscTruth             done,flg;
2074   ISLocalToGlobalMapping map = mat->bmapping;
2075   PetscInt               *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype;
2076 
2077   PetscFunctionBegin;
2078   if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();");
2079   if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock");
2080 
2081   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
2082   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2083   M                = mat->rmap->n/bs;
2084   cstart           = mat->cmap->rstart/bs;
2085   cend             = mat->cmap->rend/bs;
2086   c->M             = mat->rmap->N/bs;  /* set the global rows and columns and local rows */
2087   c->N             = mat->cmap->N/bs;
2088   c->m             = mat->rmap->n/bs;
2089   c->rstart        = mat->rmap->rstart/bs;
2090 
2091   c->ncolors       = nis;
2092   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
2093   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
2094   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
2095   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
2096   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
2097   ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
2098 
2099   /* Allow access to data structures of local part of matrix */
2100   if (!baij->colmap) {
2101     ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
2102   }
2103   ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2104   ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2105 
2106   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
2107   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
2108 
2109   for (i=0; i<nis; i++) {
2110     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
2111     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
2112     c->ncolumns[i] = n;
2113     if (n) {
2114       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
2115       ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr);
2116       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
2117     } else {
2118       c->columns[i]  = 0;
2119     }
2120 
2121     if (ctype == IS_COLORING_GLOBAL){
2122       /* Determine the total (parallel) number of columns of this color */
2123       ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
2124       ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr);
2125 
2126       nn   = PetscMPIIntCast(n);
2127       ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2128       nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];}
2129       if (!nctot) {
2130         ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
2131       }
2132 
2133       disp[0] = 0;
2134       for (j=1; j<size; j++) {
2135         disp[j] = disp[j-1] + ncolsonproc[j-1];
2136       }
2137 
2138       /* Get complete list of columns for color on each processor */
2139       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2140       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2141       ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr);
2142     } else if (ctype == IS_COLORING_GHOSTED){
2143       /* Determine local number of columns of this color on this process, including ghost points */
2144       nctot = n;
2145       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2146       ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
2147     } else {
2148       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
2149     }
2150 
2151     /*
2152        Mark all rows affect by these columns
2153     */
2154     /* Temporary option to allow for debugging/testing */
2155     flg  = PETSC_FALSE;
2156     ierr = PetscOptionsGetTruth(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr);
2157     if (!flg) {/*-----------------------------------------------------------------------------*/
2158       /* crude, fast version */
2159       ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
2160       /* loop over columns*/
2161       for (j=0; j<nctot; j++) {
2162         if (ctype == IS_COLORING_GHOSTED) {
2163           col = ltog[cols[j]];
2164         } else {
2165           col  = cols[j];
2166         }
2167         if (col >= cstart && col < cend) {
2168           /* column is in diagonal block of matrix */
2169           rows = A_cj + A_ci[col-cstart];
2170           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2171         } else {
2172 #if defined (PETSC_USE_CTABLE)
2173           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr)
2174 	  colb --;
2175 #else
2176           colb = baij->colmap[col] - 1;
2177 #endif
2178           if (colb == -1) {
2179             m = 0;
2180           } else {
2181             colb = colb/bs;
2182             rows = B_cj + B_ci[colb];
2183             m    = B_ci[colb+1] - B_ci[colb];
2184           }
2185         }
2186         /* loop over columns marking them in rowhit */
2187         for (k=0; k<m; k++) {
2188           rowhit[*rows++] = col + 1;
2189         }
2190       }
2191 
2192       /* count the number of hits */
2193       nrows = 0;
2194       for (j=0; j<M; j++) {
2195         if (rowhit[j]) nrows++;
2196       }
2197       c->nrows[i]         = nrows;
2198       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2199       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2200       ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2201       nrows = 0;
2202       for (j=0; j<M; j++) {
2203         if (rowhit[j]) {
2204           c->rows[i][nrows]           = j;
2205           c->columnsforrow[i][nrows] = rowhit[j] - 1;
2206           nrows++;
2207         }
2208       }
2209     } else {/*-------------------------------------------------------------------------------*/
2210       /* slow version, using rowhit as a linked list */
2211       PetscInt currentcol,fm,mfm;
2212       rowhit[M] = M;
2213       nrows     = 0;
2214       /* loop over columns*/
2215       for (j=0; j<nctot; j++) {
2216         if (ctype == IS_COLORING_GHOSTED) {
2217           col = ltog[cols[j]];
2218         } else {
2219           col  = cols[j];
2220         }
2221         if (col >= cstart && col < cend) {
2222           /* column is in diagonal block of matrix */
2223           rows = A_cj + A_ci[col-cstart];
2224           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2225         } else {
2226 #if defined (PETSC_USE_CTABLE)
2227           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2228           colb --;
2229 #else
2230           colb = baij->colmap[col] - 1;
2231 #endif
2232           if (colb == -1) {
2233             m = 0;
2234           } else {
2235             colb = colb/bs;
2236             rows = B_cj + B_ci[colb];
2237             m    = B_ci[colb+1] - B_ci[colb];
2238           }
2239         }
2240 
2241         /* loop over columns marking them in rowhit */
2242         fm    = M; /* fm points to first entry in linked list */
2243         for (k=0; k<m; k++) {
2244           currentcol = *rows++;
2245 	  /* is it already in the list? */
2246           do {
2247             mfm  = fm;
2248             fm   = rowhit[fm];
2249           } while (fm < currentcol);
2250           /* not in list so add it */
2251           if (fm != currentcol) {
2252             nrows++;
2253             columnsforrow[currentcol] = col;
2254             /* next three lines insert new entry into linked list */
2255             rowhit[mfm]               = currentcol;
2256             rowhit[currentcol]        = fm;
2257             fm                        = currentcol;
2258             /* fm points to present position in list since we know the columns are sorted */
2259           } else {
2260             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
2261           }
2262         }
2263       }
2264       c->nrows[i]         = nrows;
2265       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2266       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2267       ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2268       /* now store the linked list of rows into c->rows[i] */
2269       nrows = 0;
2270       fm    = rowhit[M];
2271       do {
2272         c->rows[i][nrows]            = fm;
2273         c->columnsforrow[i][nrows++] = columnsforrow[fm];
2274         fm                           = rowhit[fm];
2275       } while (fm < M);
2276     } /* ---------------------------------------------------------------------------------------*/
2277     ierr = PetscFree(cols);CHKERRQ(ierr);
2278   }
2279 
2280   /* Optimize by adding the vscale, and scaleforrow[][] fields */
2281   /*
2282        vscale will contain the "diagonal" on processor scalings followed by the off processor
2283   */
2284   if (ctype == IS_COLORING_GLOBAL) {
2285     PetscInt *garray;
2286     ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2287     for (i=0; i<baij->B->cmap->n/bs; i++) {
2288       for (j=0; j<bs; j++) {
2289         garray[i*bs+j] = bs*baij->garray[i]+j;
2290       }
2291     }
2292     ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr);
2293     ierr = PetscFree(garray);CHKERRQ(ierr);
2294     CHKMEMQ;
2295     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2296     for (k=0; k<c->ncolors; k++) {
2297       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2298       for (l=0; l<c->nrows[k]; l++) {
2299         col = c->columnsforrow[k][l];
2300         if (col >= cstart && col < cend) {
2301           /* column is in diagonal block of matrix */
2302           colb = col - cstart;
2303         } else {
2304           /* column  is in "off-processor" part */
2305 #if defined (PETSC_USE_CTABLE)
2306           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2307           colb --;
2308 #else
2309           colb = baij->colmap[col] - 1;
2310 #endif
2311           colb = colb/bs;
2312           colb += cend - cstart;
2313         }
2314         c->vscaleforrow[k][l] = colb;
2315       }
2316     }
2317   } else if (ctype == IS_COLORING_GHOSTED) {
2318     /* Get gtol mapping */
2319     PetscInt N = mat->cmap->N, *gtol;
2320     ierr = PetscMalloc((N+1)*sizeof(PetscInt),&gtol);CHKERRQ(ierr);
2321     for (i=0; i<N; i++) gtol[i] = -1;
2322     for (i=0; i<map->n; i++) gtol[ltog[i]] = i;
2323 
2324     c->vscale = 0; /* will be created in MatFDColoringApply() */
2325     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2326     for (k=0; k<c->ncolors; k++) {
2327       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2328       for (l=0; l<c->nrows[k]; l++) {
2329         col = c->columnsforrow[k][l];      /* global column index */
2330         c->vscaleforrow[k][l] = gtol[col]; /* local column index */
2331       }
2332     }
2333     ierr = PetscFree(gtol);CHKERRQ(ierr);
2334   }
2335   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
2336 
2337   ierr = PetscFree(rowhit);CHKERRQ(ierr);
2338   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
2339   ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2340   ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2341     CHKMEMQ;
2342   PetscFunctionReturn(0);
2343 }
2344 
2345 #undef __FUNCT__
2346 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ"
2347 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat)
2348 {
2349   Mat            B;
2350   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
2351   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2352   Mat_SeqAIJ     *b;
2353   PetscErrorCode ierr;
2354   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2355   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2356   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2357 
2358   PetscFunctionBegin;
2359   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2360   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2361 
2362   /* ----------------------------------------------------------------
2363      Tell every processor the number of nonzeros per row
2364   */
2365   ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
2366   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2367     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2368   }
2369   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2370   ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr);
2371   displs     = recvcounts + size;
2372   for (i=0; i<size; i++) {
2373     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2374     displs[i]     = A->rmap->range[i]/bs;
2375   }
2376 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2377   ierr  = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2378 #else
2379   ierr  = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2380 #endif
2381   /* ---------------------------------------------------------------
2382      Create the sequential matrix of the same type as the local block diagonal
2383   */
2384   ierr  = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2385   ierr  = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2386   ierr  = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2387   ierr  = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2388   b = (Mat_SeqAIJ *)B->data;
2389 
2390   /*--------------------------------------------------------------------
2391     Copy my part of matrix column indices over
2392   */
2393   sendcount  = ad->nz + bd->nz;
2394   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2395   a_jsendbuf = ad->j;
2396   b_jsendbuf = bd->j;
2397   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2398   cnt        = 0;
2399   for (i=0; i<n; i++) {
2400 
2401     /* put in lower diagonal portion */
2402     m = bd->i[i+1] - bd->i[i];
2403     while (m > 0) {
2404       /* is it above diagonal (in bd (compressed) numbering) */
2405       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2406       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2407       m--;
2408     }
2409 
2410     /* put in diagonal portion */
2411     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2412       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2413     }
2414 
2415     /* put in upper diagonal portion */
2416     while (m-- > 0) {
2417       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2418     }
2419   }
2420   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2421 
2422   /*--------------------------------------------------------------------
2423     Gather all column indices to all processors
2424   */
2425   for (i=0; i<size; i++) {
2426     recvcounts[i] = 0;
2427     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2428       recvcounts[i] += lens[j];
2429     }
2430   }
2431   displs[0]  = 0;
2432   for (i=1; i<size; i++) {
2433     displs[i] = displs[i-1] + recvcounts[i-1];
2434   }
2435 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2436   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2437 #else
2438   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2439 #endif
2440   /*--------------------------------------------------------------------
2441     Assemble the matrix into useable form (note numerical values not yet set)
2442   */
2443   /* set the b->ilen (length of each row) values */
2444   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2445   /* set the b->i indices */
2446   b->i[0] = 0;
2447   for (i=1; i<=A->rmap->N/bs; i++) {
2448     b->i[i] = b->i[i-1] + lens[i-1];
2449   }
2450   ierr = PetscFree(lens);CHKERRQ(ierr);
2451   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2452   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2453   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2454 
2455   if (A->symmetric){
2456     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2457   } else if (A->hermitian) {
2458     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2459   } else if (A->structurally_symmetric) {
2460     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2461   }
2462   *newmat = B;
2463   PetscFunctionReturn(0);
2464 }
2465 
2466 #undef __FUNCT__
2467 #define __FUNCT__ "MatSOR_MPIBAIJ"
2468 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2469 {
2470   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2471   PetscErrorCode ierr;
2472   Vec            bb1 = 0;
2473 
2474   PetscFunctionBegin;
2475   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2476     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2477   }
2478 
2479   if (flag == SOR_APPLY_UPPER) {
2480     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2481     PetscFunctionReturn(0);
2482   }
2483 
2484   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2485     if (flag & SOR_ZERO_INITIAL_GUESS) {
2486       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2487       its--;
2488     }
2489 
2490     while (its--) {
2491       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2492       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2493 
2494       /* update rhs: bb1 = bb - B*x */
2495       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2496       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2497 
2498       /* local sweep */
2499       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2500     }
2501   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
2502     if (flag & SOR_ZERO_INITIAL_GUESS) {
2503       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2504       its--;
2505     }
2506     while (its--) {
2507       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2508       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2509 
2510       /* update rhs: bb1 = bb - B*x */
2511       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2512       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2513 
2514       /* local sweep */
2515       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2516     }
2517   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
2518     if (flag & SOR_ZERO_INITIAL_GUESS) {
2519       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2520       its--;
2521     }
2522     while (its--) {
2523       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2524       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2525 
2526       /* update rhs: bb1 = bb - B*x */
2527       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2528       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2529 
2530       /* local sweep */
2531       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2532     }
2533   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2534 
2535   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
2536   PetscFunctionReturn(0);
2537 }
2538 
2539 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2540 
2541 
2542 /* -------------------------------------------------------------------*/
2543 static struct _MatOps MatOps_Values = {
2544        MatSetValues_MPIBAIJ,
2545        MatGetRow_MPIBAIJ,
2546        MatRestoreRow_MPIBAIJ,
2547        MatMult_MPIBAIJ,
2548 /* 4*/ MatMultAdd_MPIBAIJ,
2549        MatMultTranspose_MPIBAIJ,
2550        MatMultTransposeAdd_MPIBAIJ,
2551        0,
2552        0,
2553        0,
2554 /*10*/ 0,
2555        0,
2556        0,
2557        MatSOR_MPIBAIJ,
2558        MatTranspose_MPIBAIJ,
2559 /*15*/ MatGetInfo_MPIBAIJ,
2560        MatEqual_MPIBAIJ,
2561        MatGetDiagonal_MPIBAIJ,
2562        MatDiagonalScale_MPIBAIJ,
2563        MatNorm_MPIBAIJ,
2564 /*20*/ MatAssemblyBegin_MPIBAIJ,
2565        MatAssemblyEnd_MPIBAIJ,
2566        MatSetOption_MPIBAIJ,
2567        MatZeroEntries_MPIBAIJ,
2568 /*24*/ MatZeroRows_MPIBAIJ,
2569        0,
2570        0,
2571        0,
2572        0,
2573 /*29*/ MatSetUpPreallocation_MPIBAIJ,
2574        0,
2575        0,
2576        0,
2577        0,
2578 /*34*/ MatDuplicate_MPIBAIJ,
2579        0,
2580        0,
2581        0,
2582        0,
2583 /*39*/ MatAXPY_MPIBAIJ,
2584        MatGetSubMatrices_MPIBAIJ,
2585        MatIncreaseOverlap_MPIBAIJ,
2586        MatGetValues_MPIBAIJ,
2587        MatCopy_MPIBAIJ,
2588 /*44*/ 0,
2589        MatScale_MPIBAIJ,
2590        0,
2591        0,
2592        0,
2593 /*49*/ MatSetBlockSize_MPIBAIJ,
2594        0,
2595        0,
2596        0,
2597        0,
2598 /*54*/ MatFDColoringCreate_MPIBAIJ,
2599        0,
2600        MatSetUnfactored_MPIBAIJ,
2601        MatPermute_MPIBAIJ,
2602        MatSetValuesBlocked_MPIBAIJ,
2603 /*59*/ MatGetSubMatrix_MPIBAIJ,
2604        MatDestroy_MPIBAIJ,
2605        MatView_MPIBAIJ,
2606        0,
2607        0,
2608 /*64*/ 0,
2609        0,
2610        0,
2611        0,
2612        0,
2613 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2614        0,
2615        0,
2616        0,
2617        0,
2618 /*74*/ 0,
2619        MatFDColoringApply_BAIJ,
2620        0,
2621        0,
2622        0,
2623 /*79*/ 0,
2624        0,
2625        0,
2626        0,
2627        MatLoad_MPIBAIJ,
2628 /*84*/ 0,
2629        0,
2630        0,
2631        0,
2632        0,
2633 /*89*/ 0,
2634        0,
2635        0,
2636        0,
2637        0,
2638 /*94*/ 0,
2639        0,
2640        0,
2641        0,
2642        0,
2643 /*99*/ 0,
2644        0,
2645        0,
2646        0,
2647        0,
2648 /*104*/0,
2649        MatRealPart_MPIBAIJ,
2650        MatImaginaryPart_MPIBAIJ,
2651        0,
2652        0,
2653 /*109*/0,
2654        0,
2655        0,
2656        0,
2657        0,
2658 /*114*/MatGetSeqNonzerostructure_MPIBAIJ,
2659        0,
2660        MatGetGhosts_MPIBAIJ
2661 };
2662 
2663 EXTERN_C_BEGIN
2664 #undef __FUNCT__
2665 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2666 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
2667 {
2668   PetscFunctionBegin;
2669   *a      = ((Mat_MPIBAIJ *)A->data)->A;
2670   *iscopy = PETSC_FALSE;
2671   PetscFunctionReturn(0);
2672 }
2673 EXTERN_C_END
2674 
2675 EXTERN_C_BEGIN
2676 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2677 EXTERN_C_END
2678 
2679 EXTERN_C_BEGIN
2680 #undef __FUNCT__
2681 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2682 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2683 {
2684   PetscInt       m,rstart,cstart,cend;
2685   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2686   const PetscInt *JJ=0;
2687   PetscScalar    *values=0;
2688   PetscErrorCode ierr;
2689 
2690   PetscFunctionBegin;
2691 
2692   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2693   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2694   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2695   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2696   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2697   m      = B->rmap->n/bs;
2698   rstart = B->rmap->rstart/bs;
2699   cstart = B->cmap->rstart/bs;
2700   cend   = B->cmap->rend/bs;
2701 
2702   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2703   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
2704   for (i=0; i<m; i++) {
2705     nz = ii[i+1] - ii[i];
2706     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2707     nz_max = PetscMax(nz_max,nz);
2708     JJ  = jj + ii[i];
2709     for (j=0; j<nz; j++) {
2710       if (*JJ >= cstart) break;
2711       JJ++;
2712     }
2713     d = 0;
2714     for (; j<nz; j++) {
2715       if (*JJ++ >= cend) break;
2716       d++;
2717     }
2718     d_nnz[i] = d;
2719     o_nnz[i] = nz - d;
2720   }
2721   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2722   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2723 
2724   values = (PetscScalar*)V;
2725   if (!values) {
2726     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2727     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2728   }
2729   for (i=0; i<m; i++) {
2730     PetscInt          row    = i + rstart;
2731     PetscInt          ncols  = ii[i+1] - ii[i];
2732     const PetscInt    *icols = jj + ii[i];
2733     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2734     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2735   }
2736 
2737   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2738   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2739   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2740 
2741   PetscFunctionReturn(0);
2742 }
2743 EXTERN_C_END
2744 
2745 #undef __FUNCT__
2746 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2747 /*@C
2748    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2749    (the default parallel PETSc format).
2750 
2751    Collective on MPI_Comm
2752 
2753    Input Parameters:
2754 +  A - the matrix
2755 .  i - the indices into j for the start of each local row (starts with zero)
2756 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2757 -  v - optional values in the matrix
2758 
2759    Level: developer
2760 
2761 .keywords: matrix, aij, compressed row, sparse, parallel
2762 
2763 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2764 @*/
2765 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2766 {
2767   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
2768 
2769   PetscFunctionBegin;
2770   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2771   if (f) {
2772     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
2773   }
2774   PetscFunctionReturn(0);
2775 }
2776 
2777 EXTERN_C_BEGIN
2778 #undef __FUNCT__
2779 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2780 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2781 {
2782   Mat_MPIBAIJ    *b;
2783   PetscErrorCode ierr;
2784   PetscInt       i, newbs = PetscAbs(bs);
2785 
2786   PetscFunctionBegin;
2787   if (bs < 0) {
2788     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr);
2789       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2790     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2791     bs   = PetscAbs(bs);
2792   }
2793   if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
2794   bs = newbs;
2795 
2796 
2797   if (bs < 1) SETERRQ(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2798   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2799   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2800   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2801   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2802 
2803   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2804   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2805   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2806   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2807 
2808   if (d_nnz) {
2809     for (i=0; i<B->rmap->n/bs; i++) {
2810       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2811     }
2812   }
2813   if (o_nnz) {
2814     for (i=0; i<B->rmap->n/bs; i++) {
2815       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2816     }
2817   }
2818 
2819   b = (Mat_MPIBAIJ*)B->data;
2820   b->bs2 = bs*bs;
2821   b->mbs = B->rmap->n/bs;
2822   b->nbs = B->cmap->n/bs;
2823   b->Mbs = B->rmap->N/bs;
2824   b->Nbs = B->cmap->N/bs;
2825 
2826   for (i=0; i<=b->size; i++) {
2827     b->rangebs[i] = B->rmap->range[i]/bs;
2828   }
2829   b->rstartbs = B->rmap->rstart/bs;
2830   b->rendbs   = B->rmap->rend/bs;
2831   b->cstartbs = B->cmap->rstart/bs;
2832   b->cendbs   = B->cmap->rend/bs;
2833 
2834   if (!B->preallocated) {
2835     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2836     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2837     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2838     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2839     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2840     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2841     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2842     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2843     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
2844   }
2845 
2846   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2847   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2848   B->preallocated = PETSC_TRUE;
2849   PetscFunctionReturn(0);
2850 }
2851 EXTERN_C_END
2852 
2853 EXTERN_C_BEGIN
2854 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2855 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2856 EXTERN_C_END
2857 
2858 
2859 EXTERN_C_BEGIN
2860 #undef __FUNCT__
2861 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
2862 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj)
2863 {
2864   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2865   PetscErrorCode ierr;
2866   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2867   PetscInt       M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2868   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2869 
2870   PetscFunctionBegin;
2871   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr);
2872   ii[0] = 0;
2873   CHKMEMQ;
2874   for (i=0; i<M; i++) {
2875     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2876     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
2877     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2878     /* remove one from count of matrix has diagonal */
2879     for (j=id[i]; j<id[i+1]; j++) {
2880       if (jd[j] == i) {ii[i+1]--;break;}
2881     }
2882   CHKMEMQ;
2883   }
2884   ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2885   cnt = 0;
2886   for (i=0; i<M; i++) {
2887     for (j=io[i]; j<io[i+1]; j++) {
2888       if (garray[jo[j]] > rstart) break;
2889       jj[cnt++] = garray[jo[j]];
2890   CHKMEMQ;
2891     }
2892     for (k=id[i]; k<id[i+1]; k++) {
2893       if (jd[k] != i) {
2894         jj[cnt++] = rstart + jd[k];
2895   CHKMEMQ;
2896       }
2897     }
2898     for (;j<io[i+1]; j++) {
2899       jj[cnt++] = garray[jo[j]];
2900   CHKMEMQ;
2901     }
2902   }
2903   ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr);
2904   PetscFunctionReturn(0);
2905 }
2906 EXTERN_C_END
2907 
2908 EXTERN_C_BEGIN
2909 #if defined(PETSC_HAVE_MUMPS)
2910 extern PetscErrorCode MatGetFactor_mpibaij_mumps(Mat,MatFactorType,Mat*);
2911 #endif
2912 EXTERN_C_END
2913 
2914 /*MC
2915    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2916 
2917    Options Database Keys:
2918 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2919 . -mat_block_size <bs> - set the blocksize used to store the matrix
2920 - -mat_use_hash_table <fact>
2921 
2922   Level: beginner
2923 
2924 .seealso: MatCreateMPIBAIJ
2925 M*/
2926 
2927 EXTERN_C_BEGIN
2928 #undef __FUNCT__
2929 #define __FUNCT__ "MatCreate_MPIBAIJ"
2930 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B)
2931 {
2932   Mat_MPIBAIJ    *b;
2933   PetscErrorCode ierr;
2934   PetscTruth     flg;
2935 
2936   PetscFunctionBegin;
2937   ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
2938   B->data = (void*)b;
2939 
2940 
2941   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2942   B->mapping    = 0;
2943   B->assembled  = PETSC_FALSE;
2944 
2945   B->insertmode = NOT_SET_VALUES;
2946   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
2947   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
2948 
2949   /* build local table of row and column ownerships */
2950   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
2951 
2952   /* build cache for off array entries formed */
2953   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
2954   b->donotstash  = PETSC_FALSE;
2955   b->colmap      = PETSC_NULL;
2956   b->garray      = PETSC_NULL;
2957   b->roworiented = PETSC_TRUE;
2958 
2959   /* stuff used in block assembly */
2960   b->barray       = 0;
2961 
2962   /* stuff used for matrix vector multiply */
2963   b->lvec         = 0;
2964   b->Mvctx        = 0;
2965 
2966   /* stuff for MatGetRow() */
2967   b->rowindices   = 0;
2968   b->rowvalues    = 0;
2969   b->getrowactive = PETSC_FALSE;
2970 
2971   /* hash table stuff */
2972   b->ht           = 0;
2973   b->hd           = 0;
2974   b->ht_size      = 0;
2975   b->ht_flag      = PETSC_FALSE;
2976   b->ht_fact      = 0;
2977   b->ht_total_ct  = 0;
2978   b->ht_insert_ct = 0;
2979 
2980   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
2981     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2982     if (flg) {
2983       PetscReal fact = 1.39;
2984       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2985       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
2986       if (fact <= 1.0) fact = 1.39;
2987       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2988       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2989     }
2990   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2991 
2992 #if defined(PETSC_HAVE_MUMPS)
2993   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_mpibaij_mumps",MatGetFactor_mpibaij_mumps);CHKERRQ(ierr);
2994 #endif
2995   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",
2996                                      "MatConvert_MPIBAIJ_MPIAdj",
2997                                       MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
2998   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2999                                      "MatStoreValues_MPIBAIJ",
3000                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3001   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3002                                      "MatRetrieveValues_MPIBAIJ",
3003                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3004   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3005                                      "MatGetDiagonalBlock_MPIBAIJ",
3006                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3007   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
3008                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
3009                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3010   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
3011 				     "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
3012 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3013   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3014                                      "MatDiagonalScaleLocal_MPIBAIJ",
3015                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3016   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
3017                                      "MatSetHashTableFactor_MPIBAIJ",
3018                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3019   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3020   PetscFunctionReturn(0);
3021 }
3022 EXTERN_C_END
3023 
3024 /*MC
3025    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3026 
3027    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3028    and MATMPIBAIJ otherwise.
3029 
3030    Options Database Keys:
3031 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3032 
3033   Level: beginner
3034 
3035 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3036 M*/
3037 
3038 EXTERN_C_BEGIN
3039 #undef __FUNCT__
3040 #define __FUNCT__ "MatCreate_BAIJ"
3041 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A)
3042 {
3043   PetscErrorCode ierr;
3044   PetscMPIInt    size;
3045 
3046   PetscFunctionBegin;
3047   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
3048   if (size == 1) {
3049     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
3050   } else {
3051     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
3052   }
3053   PetscFunctionReturn(0);
3054 }
3055 EXTERN_C_END
3056 
3057 #undef __FUNCT__
3058 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3059 /*@C
3060    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3061    (block compressed row).  For good matrix assembly performance
3062    the user should preallocate the matrix storage by setting the parameters
3063    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3064    performance can be increased by more than a factor of 50.
3065 
3066    Collective on Mat
3067 
3068    Input Parameters:
3069 +  A - the matrix
3070 .  bs   - size of blockk
3071 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3072            submatrix  (same for all local rows)
3073 .  d_nnz - array containing the number of block nonzeros in the various block rows
3074            of the in diagonal portion of the local (possibly different for each block
3075            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3076 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3077            submatrix (same for all local rows).
3078 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3079            off-diagonal portion of the local submatrix (possibly different for
3080            each block row) or PETSC_NULL.
3081 
3082    If the *_nnz parameter is given then the *_nz parameter is ignored
3083 
3084    Options Database Keys:
3085 +   -mat_block_size - size of the blocks to use
3086 -   -mat_use_hash_table <fact>
3087 
3088    Notes:
3089    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3090    than it must be used on all processors that share the object for that argument.
3091 
3092    Storage Information:
3093    For a square global matrix we define each processor's diagonal portion
3094    to be its local rows and the corresponding columns (a square submatrix);
3095    each processor's off-diagonal portion encompasses the remainder of the
3096    local matrix (a rectangular submatrix).
3097 
3098    The user can specify preallocated storage for the diagonal part of
3099    the local submatrix with either d_nz or d_nnz (not both).  Set
3100    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3101    memory allocation.  Likewise, specify preallocated storage for the
3102    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3103 
3104    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3105    the figure below we depict these three local rows and all columns (0-11).
3106 
3107 .vb
3108            0 1 2 3 4 5 6 7 8 9 10 11
3109           -------------------
3110    row 3  |  o o o d d d o o o o o o
3111    row 4  |  o o o d d d o o o o o o
3112    row 5  |  o o o d d d o o o o o o
3113           -------------------
3114 .ve
3115 
3116    Thus, any entries in the d locations are stored in the d (diagonal)
3117    submatrix, and any entries in the o locations are stored in the
3118    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3119    stored simply in the MATSEQBAIJ format for compressed row storage.
3120 
3121    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3122    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3123    In general, for PDE problems in which most nonzeros are near the diagonal,
3124    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3125    or you will get TERRIBLE performance; see the users' manual chapter on
3126    matrices.
3127 
3128    You can call MatGetInfo() to get information on how effective the preallocation was;
3129    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3130    You can also run with the option -info and look for messages with the string
3131    malloc in them to see if additional memory allocation was needed.
3132 
3133    Level: intermediate
3134 
3135 .keywords: matrix, block, aij, compressed row, sparse, parallel
3136 
3137 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
3138 @*/
3139 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3140 {
3141   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3142 
3143   PetscFunctionBegin;
3144   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3145   if (f) {
3146     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3147   }
3148   PetscFunctionReturn(0);
3149 }
3150 
3151 #undef __FUNCT__
3152 #define __FUNCT__ "MatCreateMPIBAIJ"
3153 /*@C
3154    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
3155    (block compressed row).  For good matrix assembly performance
3156    the user should preallocate the matrix storage by setting the parameters
3157    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3158    performance can be increased by more than a factor of 50.
3159 
3160    Collective on MPI_Comm
3161 
3162    Input Parameters:
3163 +  comm - MPI communicator
3164 .  bs   - size of blockk
3165 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3166            This value should be the same as the local size used in creating the
3167            y vector for the matrix-vector product y = Ax.
3168 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3169            This value should be the same as the local size used in creating the
3170            x vector for the matrix-vector product y = Ax.
3171 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3172 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3173 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3174            submatrix  (same for all local rows)
3175 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3176            of the in diagonal portion of the local (possibly different for each block
3177            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3178 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3179            submatrix (same for all local rows).
3180 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3181            off-diagonal portion of the local submatrix (possibly different for
3182            each block row) or PETSC_NULL.
3183 
3184    Output Parameter:
3185 .  A - the matrix
3186 
3187    Options Database Keys:
3188 +   -mat_block_size - size of the blocks to use
3189 -   -mat_use_hash_table <fact>
3190 
3191    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3192    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3193    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3194 
3195    Notes:
3196    If the *_nnz parameter is given then the *_nz parameter is ignored
3197 
3198    A nonzero block is any block that as 1 or more nonzeros in it
3199 
3200    The user MUST specify either the local or global matrix dimensions
3201    (possibly both).
3202 
3203    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3204    than it must be used on all processors that share the object for that argument.
3205 
3206    Storage Information:
3207    For a square global matrix we define each processor's diagonal portion
3208    to be its local rows and the corresponding columns (a square submatrix);
3209    each processor's off-diagonal portion encompasses the remainder of the
3210    local matrix (a rectangular submatrix).
3211 
3212    The user can specify preallocated storage for the diagonal part of
3213    the local submatrix with either d_nz or d_nnz (not both).  Set
3214    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3215    memory allocation.  Likewise, specify preallocated storage for the
3216    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3217 
3218    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3219    the figure below we depict these three local rows and all columns (0-11).
3220 
3221 .vb
3222            0 1 2 3 4 5 6 7 8 9 10 11
3223           -------------------
3224    row 3  |  o o o d d d o o o o o o
3225    row 4  |  o o o d d d o o o o o o
3226    row 5  |  o o o d d d o o o o o o
3227           -------------------
3228 .ve
3229 
3230    Thus, any entries in the d locations are stored in the d (diagonal)
3231    submatrix, and any entries in the o locations are stored in the
3232    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3233    stored simply in the MATSEQBAIJ format for compressed row storage.
3234 
3235    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3236    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3237    In general, for PDE problems in which most nonzeros are near the diagonal,
3238    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3239    or you will get TERRIBLE performance; see the users' manual chapter on
3240    matrices.
3241 
3242    Level: intermediate
3243 
3244 .keywords: matrix, block, aij, compressed row, sparse, parallel
3245 
3246 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3247 @*/
3248 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3249 {
3250   PetscErrorCode ierr;
3251   PetscMPIInt    size;
3252 
3253   PetscFunctionBegin;
3254   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3255   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3256   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3257   if (size > 1) {
3258     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3259     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3260   } else {
3261     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3262     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3263   }
3264   PetscFunctionReturn(0);
3265 }
3266 
3267 #undef __FUNCT__
3268 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3269 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3270 {
3271   Mat            mat;
3272   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3273   PetscErrorCode ierr;
3274   PetscInt       len=0;
3275 
3276   PetscFunctionBegin;
3277   *newmat       = 0;
3278   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3279   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3280   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3281   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3282 
3283   mat->factortype   = matin->factortype;
3284   mat->preallocated = PETSC_TRUE;
3285   mat->assembled    = PETSC_TRUE;
3286   mat->insertmode   = NOT_SET_VALUES;
3287 
3288   a      = (Mat_MPIBAIJ*)mat->data;
3289   mat->rmap->bs  = matin->rmap->bs;
3290   a->bs2   = oldmat->bs2;
3291   a->mbs   = oldmat->mbs;
3292   a->nbs   = oldmat->nbs;
3293   a->Mbs   = oldmat->Mbs;
3294   a->Nbs   = oldmat->Nbs;
3295 
3296   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3297   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3298 
3299   a->size         = oldmat->size;
3300   a->rank         = oldmat->rank;
3301   a->donotstash   = oldmat->donotstash;
3302   a->roworiented  = oldmat->roworiented;
3303   a->rowindices   = 0;
3304   a->rowvalues    = 0;
3305   a->getrowactive = PETSC_FALSE;
3306   a->barray       = 0;
3307   a->rstartbs     = oldmat->rstartbs;
3308   a->rendbs       = oldmat->rendbs;
3309   a->cstartbs     = oldmat->cstartbs;
3310   a->cendbs       = oldmat->cendbs;
3311 
3312   /* hash table stuff */
3313   a->ht           = 0;
3314   a->hd           = 0;
3315   a->ht_size      = 0;
3316   a->ht_flag      = oldmat->ht_flag;
3317   a->ht_fact      = oldmat->ht_fact;
3318   a->ht_total_ct  = 0;
3319   a->ht_insert_ct = 0;
3320 
3321   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3322   if (oldmat->colmap) {
3323 #if defined (PETSC_USE_CTABLE)
3324   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3325 #else
3326   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3327   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3328   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3329 #endif
3330   } else a->colmap = 0;
3331 
3332   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3333     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3334     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3335     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3336   } else a->garray = 0;
3337 
3338   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3339   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3340   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3341   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3342   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3343 
3344   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3345   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3346   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3347   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3348   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3349   *newmat = mat;
3350 
3351   PetscFunctionReturn(0);
3352 }
3353 
3354 #undef __FUNCT__
3355 #define __FUNCT__ "MatLoad_MPIBAIJ"
3356 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
3357 {
3358   Mat            A;
3359   PetscErrorCode ierr;
3360   int            fd;
3361   PetscInt       i,nz,j,rstart,rend;
3362   PetscScalar    *vals,*buf;
3363   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3364   MPI_Status     status;
3365   PetscMPIInt    rank,size,maxnz;
3366   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3367   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
3368   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3369   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
3370   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
3371   PetscInt       dcount,kmax,k,nzcount,tmp,mend;
3372 
3373   PetscFunctionBegin;
3374   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3375     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3376   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3377 
3378   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3379   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3380   if (!rank) {
3381     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3382     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3383     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3384   }
3385 
3386   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3387   M = header[1]; N = header[2];
3388 
3389   if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices");
3390 
3391   /*
3392      This code adds extra rows to make sure the number of rows is
3393      divisible by the blocksize
3394   */
3395   Mbs        = M/bs;
3396   extra_rows = bs - M + bs*Mbs;
3397   if (extra_rows == bs) extra_rows = 0;
3398   else                  Mbs++;
3399   if (extra_rows && !rank) {
3400     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3401   }
3402 
3403   /* determine ownership of all rows */
3404   mbs        = Mbs/size + ((Mbs % size) > rank);
3405   m          = mbs*bs;
3406   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
3407   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3408 
3409   /* process 0 needs enough room for process with most rows */
3410   if (!rank) {
3411     mmax = rowners[1];
3412     for (i=2; i<size; i++) {
3413       mmax = PetscMax(mmax,rowners[i]);
3414     }
3415     mmax*=bs;
3416   } else mmax = m;
3417 
3418   rowners[0] = 0;
3419   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
3420   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
3421   rstart = rowners[rank];
3422   rend   = rowners[rank+1];
3423 
3424   /* distribute row lengths to all processors */
3425   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
3426   if (!rank) {
3427     mend = m;
3428     if (size == 1) mend = mend - extra_rows;
3429     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3430     for (j=mend; j<m; j++) locrowlens[j] = 1;
3431     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3432     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3433     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3434     for (j=0; j<m; j++) {
3435       procsnz[0] += locrowlens[j];
3436     }
3437     for (i=1; i<size; i++) {
3438       mend = browners[i+1] - browners[i];
3439       if (i == size-1) mend = mend - extra_rows;
3440       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3441       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3442       /* calculate the number of nonzeros on each processor */
3443       for (j=0; j<browners[i+1]-browners[i]; j++) {
3444         procsnz[i] += rowlengths[j];
3445       }
3446       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3447     }
3448     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3449   } else {
3450     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3451   }
3452 
3453   if (!rank) {
3454     /* determine max buffer needed and allocate it */
3455     maxnz = procsnz[0];
3456     for (i=1; i<size; i++) {
3457       maxnz = PetscMax(maxnz,procsnz[i]);
3458     }
3459     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3460 
3461     /* read in my part of the matrix column indices  */
3462     nz     = procsnz[0];
3463     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3464     mycols = ibuf;
3465     if (size == 1)  nz -= extra_rows;
3466     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3467     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
3468 
3469     /* read in every ones (except the last) and ship off */
3470     for (i=1; i<size-1; i++) {
3471       nz   = procsnz[i];
3472       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3473       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3474     }
3475     /* read in the stuff for the last proc */
3476     if (size != 1) {
3477       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3478       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3479       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3480       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3481     }
3482     ierr = PetscFree(cols);CHKERRQ(ierr);
3483   } else {
3484     /* determine buffer space needed for message */
3485     nz = 0;
3486     for (i=0; i<m; i++) {
3487       nz += locrowlens[i];
3488     }
3489     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3490     mycols = ibuf;
3491     /* receive message of column indices*/
3492     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3493     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3494     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3495   }
3496 
3497   /* loop over local rows, determining number of off diagonal entries */
3498   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
3499   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
3500   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3501   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3502   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3503   rowcount = 0; nzcount = 0;
3504   for (i=0; i<mbs; i++) {
3505     dcount  = 0;
3506     odcount = 0;
3507     for (j=0; j<bs; j++) {
3508       kmax = locrowlens[rowcount];
3509       for (k=0; k<kmax; k++) {
3510         tmp = mycols[nzcount++]/bs;
3511         if (!mask[tmp]) {
3512           mask[tmp] = 1;
3513           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3514           else masked1[dcount++] = tmp;
3515         }
3516       }
3517       rowcount++;
3518     }
3519 
3520     dlens[i]  = dcount;
3521     odlens[i] = odcount;
3522 
3523     /* zero out the mask elements we set */
3524     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3525     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3526   }
3527 
3528   /* create our matrix */
3529   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
3530   ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3531   ierr = MatSetType(A,type);CHKERRQ(ierr)
3532   ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3533 
3534   if (!rank) {
3535     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3536     /* read in my part of the matrix numerical values  */
3537     nz = procsnz[0];
3538     vals = buf;
3539     mycols = ibuf;
3540     if (size == 1)  nz -= extra_rows;
3541     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3542     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
3543 
3544     /* insert into matrix */
3545     jj      = rstart*bs;
3546     for (i=0; i<m; i++) {
3547       ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3548       mycols += locrowlens[i];
3549       vals   += locrowlens[i];
3550       jj++;
3551     }
3552     /* read in other processors (except the last one) and ship out */
3553     for (i=1; i<size-1; i++) {
3554       nz   = procsnz[i];
3555       vals = buf;
3556       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3557       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
3558     }
3559     /* the last proc */
3560     if (size != 1){
3561       nz   = procsnz[i] - extra_rows;
3562       vals = buf;
3563       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3564       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3565       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
3566     }
3567     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3568   } else {
3569     /* receive numeric values */
3570     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3571 
3572     /* receive message of values*/
3573     vals   = buf;
3574     mycols = ibuf;
3575     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
3576     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
3577     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3578 
3579     /* insert into matrix */
3580     jj      = rstart*bs;
3581     for (i=0; i<m; i++) {
3582       ierr    = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3583       mycols += locrowlens[i];
3584       vals   += locrowlens[i];
3585       jj++;
3586     }
3587   }
3588   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3589   ierr = PetscFree(buf);CHKERRQ(ierr);
3590   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3591   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3592   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3593   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3594   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3595   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3596 
3597   *newmat = A;
3598   PetscFunctionReturn(0);
3599 }
3600 
3601 #undef __FUNCT__
3602 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3603 /*@
3604    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3605 
3606    Input Parameters:
3607 .  mat  - the matrix
3608 .  fact - factor
3609 
3610    Collective on Mat
3611 
3612    Level: advanced
3613 
3614   Notes:
3615    This can also be set by the command line option: -mat_use_hash_table <fact>
3616 
3617 .keywords: matrix, hashtable, factor, HT
3618 
3619 .seealso: MatSetOption()
3620 @*/
3621 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3622 {
3623   PetscErrorCode ierr,(*f)(Mat,PetscReal);
3624 
3625   PetscFunctionBegin;
3626   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
3627   if (f) {
3628     ierr = (*f)(mat,fact);CHKERRQ(ierr);
3629   }
3630   PetscFunctionReturn(0);
3631 }
3632 
3633 EXTERN_C_BEGIN
3634 #undef __FUNCT__
3635 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3636 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3637 {
3638   Mat_MPIBAIJ *baij;
3639 
3640   PetscFunctionBegin;
3641   baij = (Mat_MPIBAIJ*)mat->data;
3642   baij->ht_fact = fact;
3643   PetscFunctionReturn(0);
3644 }
3645 EXTERN_C_END
3646 
3647 #undef __FUNCT__
3648 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3649 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3650 {
3651   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3652   PetscFunctionBegin;
3653   *Ad     = a->A;
3654   *Ao     = a->B;
3655   *colmap = a->garray;
3656   PetscFunctionReturn(0);
3657 }
3658 
3659 /*
3660     Special version for direct calls from Fortran (to eliminate two function call overheads
3661 */
3662 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3663 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3664 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3665 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3666 #endif
3667 
3668 #undef __FUNCT__
3669 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3670 /*@C
3671   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3672 
3673   Collective on Mat
3674 
3675   Input Parameters:
3676 + mat - the matrix
3677 . min - number of input rows
3678 . im - input rows
3679 . nin - number of input columns
3680 . in - input columns
3681 . v - numerical values input
3682 - addvin - INSERT_VALUES or ADD_VALUES
3683 
3684   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3685 
3686   Level: advanced
3687 
3688 .seealso:   MatSetValuesBlocked()
3689 @*/
3690 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3691 {
3692   /* convert input arguments to C version */
3693   Mat             mat = *matin;
3694   PetscInt        m = *min, n = *nin;
3695   InsertMode      addv = *addvin;
3696 
3697   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3698   const MatScalar *value;
3699   MatScalar       *barray=baij->barray;
3700   PetscTruth      roworiented = baij->roworiented;
3701   PetscErrorCode  ierr;
3702   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3703   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3704   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3705 
3706   PetscFunctionBegin;
3707   /* tasks normally handled by MatSetValuesBlocked() */
3708   if (mat->insertmode == NOT_SET_VALUES) {
3709     mat->insertmode = addv;
3710   }
3711 #if defined(PETSC_USE_DEBUG)
3712   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3713   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3714 #endif
3715   if (mat->assembled) {
3716     mat->was_assembled = PETSC_TRUE;
3717     mat->assembled     = PETSC_FALSE;
3718   }
3719   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3720 
3721 
3722   if(!barray) {
3723     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
3724     baij->barray = barray;
3725   }
3726 
3727   if (roworiented) {
3728     stepval = (n-1)*bs;
3729   } else {
3730     stepval = (m-1)*bs;
3731   }
3732   for (i=0; i<m; i++) {
3733     if (im[i] < 0) continue;
3734 #if defined(PETSC_USE_DEBUG)
3735     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3736 #endif
3737     if (im[i] >= rstart && im[i] < rend) {
3738       row = im[i] - rstart;
3739       for (j=0; j<n; j++) {
3740         /* If NumCol = 1 then a copy is not required */
3741         if ((roworiented) && (n == 1)) {
3742           barray = (MatScalar*)v + i*bs2;
3743         } else if((!roworiented) && (m == 1)) {
3744           barray = (MatScalar*)v + j*bs2;
3745         } else { /* Here a copy is required */
3746           if (roworiented) {
3747             value = v + i*(stepval+bs)*bs + j*bs;
3748           } else {
3749             value = v + j*(stepval+bs)*bs + i*bs;
3750           }
3751           for (ii=0; ii<bs; ii++,value+=stepval) {
3752             for (jj=0; jj<bs; jj++) {
3753               *barray++  = *value++;
3754             }
3755           }
3756           barray -=bs2;
3757         }
3758 
3759         if (in[j] >= cstart && in[j] < cend){
3760           col  = in[j] - cstart;
3761           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3762         }
3763         else if (in[j] < 0) continue;
3764 #if defined(PETSC_USE_DEBUG)
3765         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
3766 #endif
3767         else {
3768           if (mat->was_assembled) {
3769             if (!baij->colmap) {
3770               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3771             }
3772 
3773 #if defined(PETSC_USE_DEBUG)
3774 #if defined (PETSC_USE_CTABLE)
3775             { PetscInt data;
3776               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3777               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3778             }
3779 #else
3780             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
3781 #endif
3782 #endif
3783 #if defined (PETSC_USE_CTABLE)
3784 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3785             col  = (col - 1)/bs;
3786 #else
3787             col = (baij->colmap[in[j]] - 1)/bs;
3788 #endif
3789             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3790               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3791               col =  in[j];
3792             }
3793           }
3794           else col = in[j];
3795           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3796         }
3797       }
3798     } else {
3799       if (!baij->donotstash) {
3800         if (roworiented) {
3801           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3802         } else {
3803           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3804         }
3805       }
3806     }
3807   }
3808 
3809   /* task normally handled by MatSetValuesBlocked() */
3810   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3811   PetscFunctionReturn(0);
3812 }
3813