xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 3b46a515b032ed3b47b8319ddc2eaec86dcc5ffc)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4 
5 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
7 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
8 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
9 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
10 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
11 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
12 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
14 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar);
15 
16 #undef __FUNCT__
17 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
18 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
19 {
20   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
21   PetscErrorCode ierr;
22   PetscInt       i,*idxb = 0;
23   PetscScalar    *va,*vb;
24   Vec            vtmp;
25 
26   PetscFunctionBegin;
27   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
28   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
29   if (idx) {
30     for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;}
31   }
32 
33   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
34   if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
35   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
36   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
37 
38   for (i=0; i<A->rmap->n; i++){
39     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);}
40   }
41 
42   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
43   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
44   if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);}
45   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
46   PetscFunctionReturn(0);
47 }
48 
49 EXTERN_C_BEGIN
50 #undef __FUNCT__
51 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
52 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat)
53 {
54   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
55   PetscErrorCode ierr;
56 
57   PetscFunctionBegin;
58   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
59   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
60   PetscFunctionReturn(0);
61 }
62 EXTERN_C_END
63 
64 EXTERN_C_BEGIN
65 #undef __FUNCT__
66 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
67 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat)
68 {
69   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
70   PetscErrorCode ierr;
71 
72   PetscFunctionBegin;
73   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
74   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
75   PetscFunctionReturn(0);
76 }
77 EXTERN_C_END
78 
79 /*
80      Local utility routine that creates a mapping from the global column
81    number to the local number in the off-diagonal part of the local
82    storage of the matrix.  This is done in a non scable way since the
83    length of colmap equals the global matrix length.
84 */
85 #undef __FUNCT__
86 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
87 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
88 {
89   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
90   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
91   PetscErrorCode ierr;
92   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
93 
94   PetscFunctionBegin;
95 #if defined (PETSC_USE_CTABLE)
96   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
97   for (i=0; i<nbs; i++){
98     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
99   }
100 #else
101   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
102   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
103   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
104   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
105 #endif
106   PetscFunctionReturn(0);
107 }
108 
109 #define CHUNKSIZE  10
110 
111 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
112 { \
113  \
114     brow = row/bs;  \
115     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
116     rmax = aimax[brow]; nrow = ailen[brow]; \
117       bcol = col/bs; \
118       ridx = row % bs; cidx = col % bs; \
119       low = 0; high = nrow; \
120       while (high-low > 3) { \
121         t = (low+high)/2; \
122         if (rp[t] > bcol) high = t; \
123         else              low  = t; \
124       } \
125       for (_i=low; _i<high; _i++) { \
126         if (rp[_i] > bcol) break; \
127         if (rp[_i] == bcol) { \
128           bap  = ap +  bs2*_i + bs*cidx + ridx; \
129           if (addv == ADD_VALUES) *bap += value;  \
130           else                    *bap  = value;  \
131           goto a_noinsert; \
132         } \
133       } \
134       if (a->nonew == 1) goto a_noinsert; \
135       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
136       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
137       N = nrow++ - 1;  \
138       /* shift up all the later entries in this row */ \
139       for (ii=N; ii>=_i; ii--) { \
140         rp[ii+1] = rp[ii]; \
141         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
142       } \
143       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
144       rp[_i]                      = bcol;  \
145       ap[bs2*_i + bs*cidx + ridx] = value;  \
146       a_noinsert:; \
147     ailen[brow] = nrow; \
148 }
149 
150 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
151 { \
152     brow = row/bs;  \
153     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
154     rmax = bimax[brow]; nrow = bilen[brow]; \
155       bcol = col/bs; \
156       ridx = row % bs; cidx = col % bs; \
157       low = 0; high = nrow; \
158       while (high-low > 3) { \
159         t = (low+high)/2; \
160         if (rp[t] > bcol) high = t; \
161         else              low  = t; \
162       } \
163       for (_i=low; _i<high; _i++) { \
164         if (rp[_i] > bcol) break; \
165         if (rp[_i] == bcol) { \
166           bap  = ap +  bs2*_i + bs*cidx + ridx; \
167           if (addv == ADD_VALUES) *bap += value;  \
168           else                    *bap  = value;  \
169           goto b_noinsert; \
170         } \
171       } \
172       if (b->nonew == 1) goto b_noinsert; \
173       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
174       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
175       CHKMEMQ;\
176       N = nrow++ - 1;  \
177       /* shift up all the later entries in this row */ \
178       for (ii=N; ii>=_i; ii--) { \
179         rp[ii+1] = rp[ii]; \
180         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
181       } \
182       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
183       rp[_i]                      = bcol;  \
184       ap[bs2*_i + bs*cidx + ridx] = value;  \
185       b_noinsert:; \
186     bilen[brow] = nrow; \
187 }
188 
189 #undef __FUNCT__
190 #define __FUNCT__ "MatSetValues_MPIBAIJ"
191 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
192 {
193   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
194   MatScalar      value;
195   PetscTruth     roworiented = baij->roworiented;
196   PetscErrorCode ierr;
197   PetscInt       i,j,row,col;
198   PetscInt       rstart_orig=mat->rmap->rstart;
199   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
200   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
201 
202   /* Some Variables required in the macro */
203   Mat            A = baij->A;
204   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
205   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
206   MatScalar      *aa=a->a;
207 
208   Mat            B = baij->B;
209   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
210   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
211   MatScalar      *ba=b->a;
212 
213   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
214   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
215   MatScalar      *ap,*bap;
216 
217   PetscFunctionBegin;
218   for (i=0; i<m; i++) {
219     if (im[i] < 0) continue;
220 #if defined(PETSC_USE_DEBUG)
221     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
222 #endif
223     if (im[i] >= rstart_orig && im[i] < rend_orig) {
224       row = im[i] - rstart_orig;
225       for (j=0; j<n; j++) {
226         if (in[j] >= cstart_orig && in[j] < cend_orig){
227           col = in[j] - cstart_orig;
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
230           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
231         } else if (in[j] < 0) continue;
232 #if defined(PETSC_USE_DEBUG)
233         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap->N-1);}
234 #endif
235         else {
236           if (mat->was_assembled) {
237             if (!baij->colmap) {
238               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
239             }
240 #if defined (PETSC_USE_CTABLE)
241             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
242             col  = col - 1;
243 #else
244             col = baij->colmap[in[j]/bs] - 1;
245 #endif
246             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
247               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
248               col =  in[j];
249               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
250               B = baij->B;
251               b = (Mat_SeqBAIJ*)(B)->data;
252               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
253               ba=b->a;
254             } else col += in[j]%bs;
255           } else col = in[j];
256           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
257           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
258           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
259         }
260       }
261     } else {
262       if (!baij->donotstash) {
263         if (roworiented) {
264           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
265         } else {
266           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
267         }
268       }
269     }
270   }
271   PetscFunctionReturn(0);
272 }
273 
274 #undef __FUNCT__
275 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
276 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
277 {
278   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
279   const PetscScalar *value;
280   MatScalar         *barray=baij->barray;
281   PetscTruth        roworiented = baij->roworiented;
282   PetscErrorCode    ierr;
283   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
284   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
285   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
286 
287   PetscFunctionBegin;
288   if(!barray) {
289     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
290     baij->barray = barray;
291   }
292 
293   if (roworiented) {
294     stepval = (n-1)*bs;
295   } else {
296     stepval = (m-1)*bs;
297   }
298   for (i=0; i<m; i++) {
299     if (im[i] < 0) continue;
300 #if defined(PETSC_USE_DEBUG)
301     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
302 #endif
303     if (im[i] >= rstart && im[i] < rend) {
304       row = im[i] - rstart;
305       for (j=0; j<n; j++) {
306         /* If NumCol = 1 then a copy is not required */
307         if ((roworiented) && (n == 1)) {
308           barray = (MatScalar*)v + i*bs2;
309         } else if((!roworiented) && (m == 1)) {
310           barray = (MatScalar*)v + j*bs2;
311         } else { /* Here a copy is required */
312           if (roworiented) {
313             value = v + i*(stepval+bs)*bs + j*bs;
314           } else {
315             value = v + j*(stepval+bs)*bs + i*bs;
316           }
317           for (ii=0; ii<bs; ii++,value+=stepval) {
318             for (jj=0; jj<bs; jj++) {
319               *barray++  = *value++;
320             }
321           }
322           barray -=bs2;
323         }
324 
325         if (in[j] >= cstart && in[j] < cend){
326           col  = in[j] - cstart;
327           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
328         }
329         else if (in[j] < 0) continue;
330 #if defined(PETSC_USE_DEBUG)
331         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
332 #endif
333         else {
334           if (mat->was_assembled) {
335             if (!baij->colmap) {
336               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
337             }
338 
339 #if defined(PETSC_USE_DEBUG)
340 #if defined (PETSC_USE_CTABLE)
341             { PetscInt data;
342               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
343               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
344             }
345 #else
346             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
347 #endif
348 #endif
349 #if defined (PETSC_USE_CTABLE)
350 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
351             col  = (col - 1)/bs;
352 #else
353             col = (baij->colmap[in[j]] - 1)/bs;
354 #endif
355             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
356               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
357               col =  in[j];
358             }
359           }
360           else col = in[j];
361           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
362         }
363       }
364     } else {
365       if (!baij->donotstash) {
366         if (roworiented) {
367           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
368         } else {
369           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
370         }
371       }
372     }
373   }
374   PetscFunctionReturn(0);
375 }
376 
377 #define HASH_KEY 0.6180339887
378 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
379 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
380 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
381 #undef __FUNCT__
382 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
383 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
384 {
385   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
386   PetscTruth     roworiented = baij->roworiented;
387   PetscErrorCode ierr;
388   PetscInt       i,j,row,col;
389   PetscInt       rstart_orig=mat->rmap->rstart;
390   PetscInt       rend_orig=mat->rmap->rend,Nbs=baij->Nbs;
391   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
392   PetscReal      tmp;
393   MatScalar      **HD = baij->hd,value;
394 #if defined(PETSC_USE_DEBUG)
395   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
396 #endif
397 
398   PetscFunctionBegin;
399 
400   for (i=0; i<m; i++) {
401 #if defined(PETSC_USE_DEBUG)
402     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
403     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
404 #endif
405       row = im[i];
406     if (row >= rstart_orig && row < rend_orig) {
407       for (j=0; j<n; j++) {
408         col = in[j];
409         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
410         /* Look up PetscInto the Hash Table */
411         key = (row/bs)*Nbs+(col/bs)+1;
412         h1  = HASH(size,key,tmp);
413 
414 
415         idx = h1;
416 #if defined(PETSC_USE_DEBUG)
417         insert_ct++;
418         total_ct++;
419         if (HT[idx] != key) {
420           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
421           if (idx == size) {
422             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
423             if (idx == h1) {
424               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
425             }
426           }
427         }
428 #else
429         if (HT[idx] != key) {
430           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
431           if (idx == size) {
432             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
433             if (idx == h1) {
434               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
435             }
436           }
437         }
438 #endif
439         /* A HASH table entry is found, so insert the values at the correct address */
440         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
441         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
442       }
443     } else {
444       if (!baij->donotstash) {
445         if (roworiented) {
446           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
447         } else {
448           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
449         }
450       }
451     }
452   }
453 #if defined(PETSC_USE_DEBUG)
454   baij->ht_total_ct = total_ct;
455   baij->ht_insert_ct = insert_ct;
456 #endif
457   PetscFunctionReturn(0);
458 }
459 
460 #undef __FUNCT__
461 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
462 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
463 {
464   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
465   PetscTruth        roworiented = baij->roworiented;
466   PetscErrorCode    ierr;
467   PetscInt          i,j,ii,jj,row,col;
468   PetscInt          rstart=baij->rstartbs;
469   PetscInt          rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
470   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
471   PetscReal         tmp;
472   MatScalar         **HD = baij->hd,*baij_a;
473   const PetscScalar *v_t,*value;
474 #if defined(PETSC_USE_DEBUG)
475   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
476 #endif
477 
478   PetscFunctionBegin;
479 
480   if (roworiented) {
481     stepval = (n-1)*bs;
482   } else {
483     stepval = (m-1)*bs;
484   }
485   for (i=0; i<m; i++) {
486 #if defined(PETSC_USE_DEBUG)
487     if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
488     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
489 #endif
490     row   = im[i];
491     v_t   = v + i*nbs2;
492     if (row >= rstart && row < rend) {
493       for (j=0; j<n; j++) {
494         col = in[j];
495 
496         /* Look up into the Hash Table */
497         key = row*Nbs+col+1;
498         h1  = HASH(size,key,tmp);
499 
500         idx = h1;
501 #if defined(PETSC_USE_DEBUG)
502         total_ct++;
503         insert_ct++;
504        if (HT[idx] != key) {
505           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
506           if (idx == size) {
507             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
508             if (idx == h1) {
509               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
510             }
511           }
512         }
513 #else
514         if (HT[idx] != key) {
515           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
516           if (idx == size) {
517             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
518             if (idx == h1) {
519               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
520             }
521           }
522         }
523 #endif
524         baij_a = HD[idx];
525         if (roworiented) {
526           /*value = v + i*(stepval+bs)*bs + j*bs;*/
527           /* value = v + (i*(stepval+bs)+j)*bs; */
528           value = v_t;
529           v_t  += bs;
530           if (addv == ADD_VALUES) {
531             for (ii=0; ii<bs; ii++,value+=stepval) {
532               for (jj=ii; jj<bs2; jj+=bs) {
533                 baij_a[jj]  += *value++;
534               }
535             }
536           } else {
537             for (ii=0; ii<bs; ii++,value+=stepval) {
538               for (jj=ii; jj<bs2; jj+=bs) {
539                 baij_a[jj]  = *value++;
540               }
541             }
542           }
543         } else {
544           value = v + j*(stepval+bs)*bs + i*bs;
545           if (addv == ADD_VALUES) {
546             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
547               for (jj=0; jj<bs; jj++) {
548                 baij_a[jj]  += *value++;
549               }
550             }
551           } else {
552             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
553               for (jj=0; jj<bs; jj++) {
554                 baij_a[jj]  = *value++;
555               }
556             }
557           }
558         }
559       }
560     } else {
561       if (!baij->donotstash) {
562         if (roworiented) {
563           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
564         } else {
565           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
566         }
567       }
568     }
569   }
570 #if defined(PETSC_USE_DEBUG)
571   baij->ht_total_ct = total_ct;
572   baij->ht_insert_ct = insert_ct;
573 #endif
574   PetscFunctionReturn(0);
575 }
576 
577 #undef __FUNCT__
578 #define __FUNCT__ "MatGetValues_MPIBAIJ"
579 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
580 {
581   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
582   PetscErrorCode ierr;
583   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
584   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
585 
586   PetscFunctionBegin;
587   for (i=0; i<m; i++) {
588     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
589     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
590     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
591       row = idxm[i] - bsrstart;
592       for (j=0; j<n; j++) {
593         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
594         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
595         if (idxn[j] >= bscstart && idxn[j] < bscend){
596           col = idxn[j] - bscstart;
597           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
598         } else {
599           if (!baij->colmap) {
600             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
601           }
602 #if defined (PETSC_USE_CTABLE)
603           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
604           data --;
605 #else
606           data = baij->colmap[idxn[j]/bs]-1;
607 #endif
608           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
609           else {
610             col  = data + idxn[j]%bs;
611             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
612           }
613         }
614       }
615     } else {
616       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
617     }
618   }
619  PetscFunctionReturn(0);
620 }
621 
622 #undef __FUNCT__
623 #define __FUNCT__ "MatNorm_MPIBAIJ"
624 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
625 {
626   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
627   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
628   PetscErrorCode ierr;
629   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
630   PetscReal      sum = 0.0;
631   MatScalar      *v;
632 
633   PetscFunctionBegin;
634   if (baij->size == 1) {
635     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
636   } else {
637     if (type == NORM_FROBENIUS) {
638       v = amat->a;
639       nz = amat->nz*bs2;
640       for (i=0; i<nz; i++) {
641 #if defined(PETSC_USE_COMPLEX)
642         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
643 #else
644         sum += (*v)*(*v); v++;
645 #endif
646       }
647       v = bmat->a;
648       nz = bmat->nz*bs2;
649       for (i=0; i<nz; i++) {
650 #if defined(PETSC_USE_COMPLEX)
651         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
652 #else
653         sum += (*v)*(*v); v++;
654 #endif
655       }
656       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
657       *nrm = sqrt(*nrm);
658     } else if (type == NORM_1) { /* max column sum */
659       PetscReal *tmp,*tmp2;
660       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
661       ierr = PetscMalloc((2*mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
662       tmp2 = tmp + mat->cmap->N;
663       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
664       v = amat->a; jj = amat->j;
665       for (i=0; i<amat->nz; i++) {
666         for (j=0; j<bs; j++){
667           col = bs*(cstart + *jj) + j; /* column index */
668           for (row=0; row<bs; row++){
669             tmp[col] += PetscAbsScalar(*v);  v++;
670           }
671         }
672         jj++;
673       }
674       v = bmat->a; jj = bmat->j;
675       for (i=0; i<bmat->nz; i++) {
676         for (j=0; j<bs; j++){
677           col = bs*garray[*jj] + j;
678           for (row=0; row<bs; row++){
679             tmp[col] += PetscAbsScalar(*v); v++;
680           }
681         }
682         jj++;
683       }
684       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
685       *nrm = 0.0;
686       for (j=0; j<mat->cmap->N; j++) {
687         if (tmp2[j] > *nrm) *nrm = tmp2[j];
688       }
689       ierr = PetscFree(tmp);CHKERRQ(ierr);
690     } else if (type == NORM_INFINITY) { /* max row sum */
691       PetscReal *sums;
692       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr)
693       sum = 0.0;
694       for (j=0; j<amat->mbs; j++) {
695         for (row=0; row<bs; row++) sums[row] = 0.0;
696         v = amat->a + bs2*amat->i[j];
697         nz = amat->i[j+1]-amat->i[j];
698         for (i=0; i<nz; i++) {
699           for (col=0; col<bs; col++){
700             for (row=0; row<bs; row++){
701               sums[row] += PetscAbsScalar(*v); v++;
702             }
703           }
704         }
705         v = bmat->a + bs2*bmat->i[j];
706         nz = bmat->i[j+1]-bmat->i[j];
707         for (i=0; i<nz; i++) {
708           for (col=0; col<bs; col++){
709             for (row=0; row<bs; row++){
710               sums[row] += PetscAbsScalar(*v); v++;
711             }
712           }
713         }
714         for (row=0; row<bs; row++){
715           if (sums[row] > sum) sum = sums[row];
716         }
717       }
718       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
719       ierr = PetscFree(sums);CHKERRQ(ierr);
720     } else {
721       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
722     }
723   }
724   PetscFunctionReturn(0);
725 }
726 
727 /*
728   Creates the hash table, and sets the table
729   This table is created only once.
730   If new entried need to be added to the matrix
731   then the hash table has to be destroyed and
732   recreated.
733 */
734 #undef __FUNCT__
735 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
736 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
737 {
738   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
739   Mat            A = baij->A,B=baij->B;
740   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
741   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
742   PetscErrorCode ierr;
743   PetscInt       size,bs2=baij->bs2,rstart=baij->rstartbs;
744   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
745   PetscInt       *HT,key;
746   MatScalar      **HD;
747   PetscReal      tmp;
748 #if defined(PETSC_USE_INFO)
749   PetscInt       ct=0,max=0;
750 #endif
751 
752   PetscFunctionBegin;
753   baij->ht_size=(PetscInt)(factor*nz);
754   size = baij->ht_size;
755 
756   if (baij->ht) {
757     PetscFunctionReturn(0);
758   }
759 
760   /* Allocate Memory for Hash Table */
761   ierr     = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr);
762   baij->ht = (PetscInt*)(baij->hd + size);
763   HD       = baij->hd;
764   HT       = baij->ht;
765 
766 
767   ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr);
768 
769 
770   /* Loop Over A */
771   for (i=0; i<a->mbs; i++) {
772     for (j=ai[i]; j<ai[i+1]; j++) {
773       row = i+rstart;
774       col = aj[j]+cstart;
775 
776       key = row*Nbs + col + 1;
777       h1  = HASH(size,key,tmp);
778       for (k=0; k<size; k++){
779         if (!HT[(h1+k)%size]) {
780           HT[(h1+k)%size] = key;
781           HD[(h1+k)%size] = a->a + j*bs2;
782           break;
783 #if defined(PETSC_USE_INFO)
784         } else {
785           ct++;
786 #endif
787         }
788       }
789 #if defined(PETSC_USE_INFO)
790       if (k> max) max = k;
791 #endif
792     }
793   }
794   /* Loop Over B */
795   for (i=0; i<b->mbs; i++) {
796     for (j=bi[i]; j<bi[i+1]; j++) {
797       row = i+rstart;
798       col = garray[bj[j]];
799       key = row*Nbs + col + 1;
800       h1  = HASH(size,key,tmp);
801       for (k=0; k<size; k++){
802         if (!HT[(h1+k)%size]) {
803           HT[(h1+k)%size] = key;
804           HD[(h1+k)%size] = b->a + j*bs2;
805           break;
806 #if defined(PETSC_USE_INFO)
807         } else {
808           ct++;
809 #endif
810         }
811       }
812 #if defined(PETSC_USE_INFO)
813       if (k> max) max = k;
814 #endif
815     }
816   }
817 
818   /* Print Summary */
819 #if defined(PETSC_USE_INFO)
820   for (i=0,j=0; i<size; i++) {
821     if (HT[i]) {j++;}
822   }
823   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
824 #endif
825   PetscFunctionReturn(0);
826 }
827 
828 #undef __FUNCT__
829 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
830 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
831 {
832   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
833   PetscErrorCode ierr;
834   PetscInt       nstash,reallocs;
835   InsertMode     addv;
836 
837   PetscFunctionBegin;
838   if (baij->donotstash) {
839     PetscFunctionReturn(0);
840   }
841 
842   /* make sure all processors are either in INSERTMODE or ADDMODE */
843   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
844   if (addv == (ADD_VALUES|INSERT_VALUES)) {
845     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
846   }
847   mat->insertmode = addv; /* in case this processor had no cache */
848 
849   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
850   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
851   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
852   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
853   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
854   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
855   PetscFunctionReturn(0);
856 }
857 
858 #undef __FUNCT__
859 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
860 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
861 {
862   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
863   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
864   PetscErrorCode ierr;
865   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
866   PetscInt       *row,*col;
867   PetscTruth     r1,r2,r3,other_disassembled;
868   MatScalar      *val;
869   InsertMode     addv = mat->insertmode;
870   PetscMPIInt    n;
871 
872   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
873   PetscFunctionBegin;
874   if (!baij->donotstash) {
875     while (1) {
876       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
877       if (!flg) break;
878 
879       for (i=0; i<n;) {
880         /* Now identify the consecutive vals belonging to the same row */
881         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
882         if (j < n) ncols = j-i;
883         else       ncols = n-i;
884         /* Now assemble all these values with a single function call */
885         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
886         i = j;
887       }
888     }
889     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
890     /* Now process the block-stash. Since the values are stashed column-oriented,
891        set the roworiented flag to column oriented, and after MatSetValues()
892        restore the original flags */
893     r1 = baij->roworiented;
894     r2 = a->roworiented;
895     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
896     baij->roworiented = PETSC_FALSE;
897     a->roworiented    = PETSC_FALSE;
898     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
899     while (1) {
900       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
901       if (!flg) break;
902 
903       for (i=0; i<n;) {
904         /* Now identify the consecutive vals belonging to the same row */
905         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
906         if (j < n) ncols = j-i;
907         else       ncols = n-i;
908         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
909         i = j;
910       }
911     }
912     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
913     baij->roworiented = r1;
914     a->roworiented    = r2;
915     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
916   }
917 
918   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
919   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
920 
921   /* determine if any processor has disassembled, if so we must
922      also disassemble ourselfs, in order that we may reassemble. */
923   /*
924      if nonzero structure of submatrix B cannot change then we know that
925      no processor disassembled thus we can skip this stuff
926   */
927   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
928     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
929     if (mat->was_assembled && !other_disassembled) {
930       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
931     }
932   }
933 
934   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
935     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
936   }
937   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
938   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
939   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
940 
941 #if defined(PETSC_USE_INFO)
942   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
943     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
944     baij->ht_total_ct  = 0;
945     baij->ht_insert_ct = 0;
946   }
947 #endif
948   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
949     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
950     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
951     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
952   }
953 
954   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
955   baij->rowvalues = 0;
956   PetscFunctionReturn(0);
957 }
958 
959 #undef __FUNCT__
960 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
961 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
962 {
963   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
964   PetscErrorCode    ierr;
965   PetscMPIInt       size = baij->size,rank = baij->rank;
966   PetscInt          bs = mat->rmap->bs;
967   PetscTruth        iascii,isdraw;
968   PetscViewer       sviewer;
969   PetscViewerFormat format;
970 
971   PetscFunctionBegin;
972   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
973   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
974   if (iascii) {
975     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
976     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
977       MatInfo info;
978       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
979       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
980       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
981               rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
982               mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
983       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
984       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
985       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
986       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
987       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
988       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
989       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
990       PetscFunctionReturn(0);
991     } else if (format == PETSC_VIEWER_ASCII_INFO) {
992       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
993       PetscFunctionReturn(0);
994     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
995       PetscFunctionReturn(0);
996     }
997   }
998 
999   if (isdraw) {
1000     PetscDraw       draw;
1001     PetscTruth isnull;
1002     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1003     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1004   }
1005 
1006   if (size == 1) {
1007     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1008     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1009   } else {
1010     /* assemble the entire matrix onto first processor. */
1011     Mat         A;
1012     Mat_SeqBAIJ *Aloc;
1013     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1014     MatScalar   *a;
1015 
1016     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1017     /* Perhaps this should be the type of mat? */
1018     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1019     if (!rank) {
1020       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1021     } else {
1022       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1023     }
1024     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1025     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1026     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1027 
1028     /* copy over the A part */
1029     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1030     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1031     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1032 
1033     for (i=0; i<mbs; i++) {
1034       rvals[0] = bs*(baij->rstartbs + i);
1035       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1036       for (j=ai[i]; j<ai[i+1]; j++) {
1037         col = (baij->cstartbs+aj[j])*bs;
1038         for (k=0; k<bs; k++) {
1039           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1040           col++; a += bs;
1041         }
1042       }
1043     }
1044     /* copy over the B part */
1045     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1046     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1047     for (i=0; i<mbs; i++) {
1048       rvals[0] = bs*(baij->rstartbs + i);
1049       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1050       for (j=ai[i]; j<ai[i+1]; j++) {
1051         col = baij->garray[aj[j]]*bs;
1052         for (k=0; k<bs; k++) {
1053           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1054           col++; a += bs;
1055         }
1056       }
1057     }
1058     ierr = PetscFree(rvals);CHKERRQ(ierr);
1059     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1060     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1061     /*
1062        Everyone has to call to draw the matrix since the graphics waits are
1063        synchronized across all processors that share the PetscDraw object
1064     */
1065     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1066     if (!rank) {
1067       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1068       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1069     }
1070     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1071     ierr = MatDestroy(A);CHKERRQ(ierr);
1072   }
1073   PetscFunctionReturn(0);
1074 }
1075 
1076 #undef __FUNCT__
1077 #define __FUNCT__ "MatView_MPIBAIJ"
1078 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1079 {
1080   PetscErrorCode ierr;
1081   PetscTruth     iascii,isdraw,issocket,isbinary;
1082 
1083   PetscFunctionBegin;
1084   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1085   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1086   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1087   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1088   if (iascii || isdraw || issocket || isbinary) {
1089     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1090   } else {
1091     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1092   }
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 #undef __FUNCT__
1097 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1098 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1099 {
1100   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1101   PetscErrorCode ierr;
1102 
1103   PetscFunctionBegin;
1104 #if defined(PETSC_USE_LOG)
1105   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1106 #endif
1107   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1108   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1109   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1110   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1111 #if defined (PETSC_USE_CTABLE)
1112   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
1113 #else
1114   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1115 #endif
1116   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1117   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1118   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1119   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1120   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1121   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
1122   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1123   ierr = PetscFree(baij);CHKERRQ(ierr);
1124 
1125   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1126   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1127   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1128   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1129   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1130   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1131   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1132   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1133   PetscFunctionReturn(0);
1134 }
1135 
1136 #undef __FUNCT__
1137 #define __FUNCT__ "MatMult_MPIBAIJ"
1138 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1139 {
1140   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1141   PetscErrorCode ierr;
1142   PetscInt       nt;
1143 
1144   PetscFunctionBegin;
1145   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1146   if (nt != A->cmap->n) {
1147     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1148   }
1149   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1150   if (nt != A->rmap->n) {
1151     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1152   }
1153   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1154   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1155   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1156   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1157   PetscFunctionReturn(0);
1158 }
1159 
1160 #undef __FUNCT__
1161 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1162 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1163 {
1164   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1165   PetscErrorCode ierr;
1166 
1167   PetscFunctionBegin;
1168   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1169   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1170   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1171   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1172   PetscFunctionReturn(0);
1173 }
1174 
1175 #undef __FUNCT__
1176 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1177 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1178 {
1179   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1180   PetscErrorCode ierr;
1181   PetscTruth     merged;
1182 
1183   PetscFunctionBegin;
1184   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1185   /* do nondiagonal part */
1186   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1187   if (!merged) {
1188     /* send it on its way */
1189     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1190     /* do local part */
1191     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1192     /* receive remote parts: note this assumes the values are not actually */
1193     /* inserted in yy until the next line */
1194     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1195   } else {
1196     /* do local part */
1197     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1198     /* send it on its way */
1199     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1200     /* values actually were received in the Begin() but we need to call this nop */
1201     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1202   }
1203   PetscFunctionReturn(0);
1204 }
1205 
1206 #undef __FUNCT__
1207 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1208 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1209 {
1210   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1211   PetscErrorCode ierr;
1212 
1213   PetscFunctionBegin;
1214   /* do nondiagonal part */
1215   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1216   /* send it on its way */
1217   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1218   /* do local part */
1219   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1220   /* receive remote parts: note this assumes the values are not actually */
1221   /* inserted in yy until the next line, which is true for my implementation*/
1222   /* but is not perhaps always true. */
1223   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1224   PetscFunctionReturn(0);
1225 }
1226 
1227 /*
1228   This only works correctly for square matrices where the subblock A->A is the
1229    diagonal block
1230 */
1231 #undef __FUNCT__
1232 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1233 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1234 {
1235   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1236   PetscErrorCode ierr;
1237 
1238   PetscFunctionBegin;
1239   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1240   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1241   PetscFunctionReturn(0);
1242 }
1243 
1244 #undef __FUNCT__
1245 #define __FUNCT__ "MatScale_MPIBAIJ"
1246 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1247 {
1248   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1249   PetscErrorCode ierr;
1250 
1251   PetscFunctionBegin;
1252   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1253   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1254   PetscFunctionReturn(0);
1255 }
1256 
1257 #undef __FUNCT__
1258 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1259 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1260 {
1261   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1262   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1263   PetscErrorCode ierr;
1264   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1265   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1266   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1267 
1268   PetscFunctionBegin;
1269   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1270   mat->getrowactive = PETSC_TRUE;
1271 
1272   if (!mat->rowvalues && (idx || v)) {
1273     /*
1274         allocate enough space to hold information from the longest row.
1275     */
1276     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1277     PetscInt     max = 1,mbs = mat->mbs,tmp;
1278     for (i=0; i<mbs; i++) {
1279       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1280       if (max < tmp) { max = tmp; }
1281     }
1282     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1283     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1284   }
1285 
1286   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1287   lrow = row - brstart;
1288 
1289   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1290   if (!v)   {pvA = 0; pvB = 0;}
1291   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1292   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1293   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1294   nztot = nzA + nzB;
1295 
1296   cmap  = mat->garray;
1297   if (v  || idx) {
1298     if (nztot) {
1299       /* Sort by increasing column numbers, assuming A and B already sorted */
1300       PetscInt imark = -1;
1301       if (v) {
1302         *v = v_p = mat->rowvalues;
1303         for (i=0; i<nzB; i++) {
1304           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1305           else break;
1306         }
1307         imark = i;
1308         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1309         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1310       }
1311       if (idx) {
1312         *idx = idx_p = mat->rowindices;
1313         if (imark > -1) {
1314           for (i=0; i<imark; i++) {
1315             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1316           }
1317         } else {
1318           for (i=0; i<nzB; i++) {
1319             if (cmap[cworkB[i]/bs] < cstart)
1320               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1321             else break;
1322           }
1323           imark = i;
1324         }
1325         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1326         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1327       }
1328     } else {
1329       if (idx) *idx = 0;
1330       if (v)   *v   = 0;
1331     }
1332   }
1333   *nz = nztot;
1334   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1335   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1336   PetscFunctionReturn(0);
1337 }
1338 
1339 #undef __FUNCT__
1340 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1341 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1342 {
1343   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1344 
1345   PetscFunctionBegin;
1346   if (!baij->getrowactive) {
1347     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1348   }
1349   baij->getrowactive = PETSC_FALSE;
1350   PetscFunctionReturn(0);
1351 }
1352 
1353 #undef __FUNCT__
1354 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1355 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1356 {
1357   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1358   PetscErrorCode ierr;
1359 
1360   PetscFunctionBegin;
1361   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1362   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1363   PetscFunctionReturn(0);
1364 }
1365 
1366 #undef __FUNCT__
1367 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1368 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1369 {
1370   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1371   Mat            A = a->A,B = a->B;
1372   PetscErrorCode ierr;
1373   PetscReal      isend[5],irecv[5];
1374 
1375   PetscFunctionBegin;
1376   info->block_size     = (PetscReal)matin->rmap->bs;
1377   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1378   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1379   isend[3] = info->memory;  isend[4] = info->mallocs;
1380   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1381   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1382   isend[3] += info->memory;  isend[4] += info->mallocs;
1383   if (flag == MAT_LOCAL) {
1384     info->nz_used      = isend[0];
1385     info->nz_allocated = isend[1];
1386     info->nz_unneeded  = isend[2];
1387     info->memory       = isend[3];
1388     info->mallocs      = isend[4];
1389   } else if (flag == MAT_GLOBAL_MAX) {
1390     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1391     info->nz_used      = irecv[0];
1392     info->nz_allocated = irecv[1];
1393     info->nz_unneeded  = irecv[2];
1394     info->memory       = irecv[3];
1395     info->mallocs      = irecv[4];
1396   } else if (flag == MAT_GLOBAL_SUM) {
1397     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1398     info->nz_used      = irecv[0];
1399     info->nz_allocated = irecv[1];
1400     info->nz_unneeded  = irecv[2];
1401     info->memory       = irecv[3];
1402     info->mallocs      = irecv[4];
1403   } else {
1404     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1405   }
1406   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1407   info->fill_ratio_needed = 0;
1408   info->factor_mallocs    = 0;
1409   PetscFunctionReturn(0);
1410 }
1411 
1412 #undef __FUNCT__
1413 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1414 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscTruth flg)
1415 {
1416   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1417   PetscErrorCode ierr;
1418 
1419   PetscFunctionBegin;
1420   switch (op) {
1421   case MAT_NEW_NONZERO_LOCATIONS:
1422   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1423   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1424   case MAT_KEEP_NONZERO_PATTERN:
1425   case MAT_NEW_NONZERO_LOCATION_ERR:
1426     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1427     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1428     break;
1429   case MAT_ROW_ORIENTED:
1430     a->roworiented = flg;
1431     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1432     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1433     break;
1434   case MAT_NEW_DIAGONALS:
1435     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1436     break;
1437   case MAT_IGNORE_OFF_PROC_ENTRIES:
1438     a->donotstash = flg;
1439     break;
1440   case MAT_USE_HASH_TABLE:
1441     a->ht_flag = flg;
1442     break;
1443   case MAT_SYMMETRIC:
1444   case MAT_STRUCTURALLY_SYMMETRIC:
1445   case MAT_HERMITIAN:
1446   case MAT_SYMMETRY_ETERNAL:
1447     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1448     break;
1449   default:
1450     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1451   }
1452   PetscFunctionReturn(0);
1453 }
1454 
1455 #undef __FUNCT__
1456 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1457 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1458 {
1459   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1460   Mat_SeqBAIJ    *Aloc;
1461   Mat            B;
1462   PetscErrorCode ierr;
1463   PetscInt       M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1464   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1465   MatScalar      *a;
1466 
1467   PetscFunctionBegin;
1468   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1469   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1470     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1471     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1472     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1473     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1474   } else {
1475     B = *matout;
1476   }
1477 
1478   /* copy over the A part */
1479   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1480   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1481   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1482 
1483   for (i=0; i<mbs; i++) {
1484     rvals[0] = bs*(baij->rstartbs + i);
1485     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1486     for (j=ai[i]; j<ai[i+1]; j++) {
1487       col = (baij->cstartbs+aj[j])*bs;
1488       for (k=0; k<bs; k++) {
1489         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1490         col++; a += bs;
1491       }
1492     }
1493   }
1494   /* copy over the B part */
1495   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1496   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1497   for (i=0; i<mbs; i++) {
1498     rvals[0] = bs*(baij->rstartbs + i);
1499     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1500     for (j=ai[i]; j<ai[i+1]; j++) {
1501       col = baij->garray[aj[j]]*bs;
1502       for (k=0; k<bs; k++) {
1503         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1504         col++; a += bs;
1505       }
1506     }
1507   }
1508   ierr = PetscFree(rvals);CHKERRQ(ierr);
1509   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1510   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1511 
1512   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1513     *matout = B;
1514   } else {
1515     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1516   }
1517   PetscFunctionReturn(0);
1518 }
1519 
1520 #undef __FUNCT__
1521 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1522 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1523 {
1524   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1525   Mat            a = baij->A,b = baij->B;
1526   PetscErrorCode ierr;
1527   PetscInt       s1,s2,s3;
1528 
1529   PetscFunctionBegin;
1530   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1531   if (rr) {
1532     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1533     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1534     /* Overlap communication with computation. */
1535     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1536   }
1537   if (ll) {
1538     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1539     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1540     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1541   }
1542   /* scale  the diagonal block */
1543   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1544 
1545   if (rr) {
1546     /* Do a scatter end and then right scale the off-diagonal block */
1547     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1548     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1549   }
1550 
1551   PetscFunctionReturn(0);
1552 }
1553 
1554 #undef __FUNCT__
1555 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1556 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
1557 {
1558   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1559   PetscErrorCode ierr;
1560   PetscMPIInt    imdex,size = l->size,n,rank = l->rank;
1561   PetscInt       i,*owners = A->rmap->range;
1562   PetscInt       *nprocs,j,idx,nsends,row;
1563   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
1564   PetscInt       *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1565   PetscInt       *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1566   MPI_Comm       comm = ((PetscObject)A)->comm;
1567   MPI_Request    *send_waits,*recv_waits;
1568   MPI_Status     recv_status,*send_status;
1569 #if defined(PETSC_DEBUG)
1570   PetscTruth     found = PETSC_FALSE;
1571 #endif
1572 
1573   PetscFunctionBegin;
1574   /*  first count number of contributors to each processor */
1575   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1576   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1577   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1578   j = 0;
1579   for (i=0; i<N; i++) {
1580     if (lastidx > (idx = rows[i])) j = 0;
1581     lastidx = idx;
1582     for (; j<size; j++) {
1583       if (idx >= owners[j] && idx < owners[j+1]) {
1584         nprocs[2*j]++;
1585         nprocs[2*j+1] = 1;
1586         owner[i] = j;
1587 #if defined(PETSC_DEBUG)
1588         found = PETSC_TRUE;
1589 #endif
1590         break;
1591       }
1592     }
1593 #if defined(PETSC_DEBUG)
1594     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1595     found = PETSC_FALSE;
1596 #endif
1597   }
1598   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1599 
1600   /* inform other processors of number of messages and max length*/
1601   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1602 
1603   /* post receives:   */
1604   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1605   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1606   for (i=0; i<nrecvs; i++) {
1607     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1608   }
1609 
1610   /* do sends:
1611      1) starts[i] gives the starting index in svalues for stuff going to
1612      the ith processor
1613   */
1614   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1615   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1616   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1617   starts[0]  = 0;
1618   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1619   for (i=0; i<N; i++) {
1620     svalues[starts[owner[i]]++] = rows[i];
1621   }
1622 
1623   starts[0] = 0;
1624   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1625   count = 0;
1626   for (i=0; i<size; i++) {
1627     if (nprocs[2*i+1]) {
1628       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1629     }
1630   }
1631   ierr = PetscFree(starts);CHKERRQ(ierr);
1632 
1633   base = owners[rank];
1634 
1635   /*  wait on receives */
1636   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
1637   source = lens + nrecvs;
1638   count  = nrecvs; slen = 0;
1639   while (count) {
1640     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1641     /* unpack receives into our local space */
1642     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1643     source[imdex]  = recv_status.MPI_SOURCE;
1644     lens[imdex]    = n;
1645     slen          += n;
1646     count--;
1647   }
1648   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1649 
1650   /* move the data into the send scatter */
1651   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1652   count = 0;
1653   for (i=0; i<nrecvs; i++) {
1654     values = rvalues + i*nmax;
1655     for (j=0; j<lens[i]; j++) {
1656       lrows[count++] = values[j] - base;
1657     }
1658   }
1659   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1660   ierr = PetscFree(lens);CHKERRQ(ierr);
1661   ierr = PetscFree(owner);CHKERRQ(ierr);
1662   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1663 
1664   /* actually zap the local rows */
1665   /*
1666         Zero the required rows. If the "diagonal block" of the matrix
1667      is square and the user wishes to set the diagonal we use separate
1668      code so that MatSetValues() is not called for each diagonal allocating
1669      new memory, thus calling lots of mallocs and slowing things down.
1670 
1671        Contributed by: Matthew Knepley
1672   */
1673   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1674   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr);
1675   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1676     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr);
1677   } else if (diag != 0.0) {
1678     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1679     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1680       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1681 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1682     }
1683     for (i=0; i<slen; i++) {
1684       row  = lrows[i] + rstart_bs;
1685       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1686     }
1687     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1688     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1689   } else {
1690     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1691   }
1692 
1693   ierr = PetscFree(lrows);CHKERRQ(ierr);
1694 
1695   /* wait on sends */
1696   if (nsends) {
1697     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1698     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1699     ierr = PetscFree(send_status);CHKERRQ(ierr);
1700   }
1701   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1702   ierr = PetscFree(svalues);CHKERRQ(ierr);
1703 
1704   PetscFunctionReturn(0);
1705 }
1706 
1707 #undef __FUNCT__
1708 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1709 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1710 {
1711   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1712   PetscErrorCode ierr;
1713 
1714   PetscFunctionBegin;
1715   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1716   PetscFunctionReturn(0);
1717 }
1718 
1719 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1720 
1721 #undef __FUNCT__
1722 #define __FUNCT__ "MatEqual_MPIBAIJ"
1723 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1724 {
1725   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1726   Mat            a,b,c,d;
1727   PetscTruth     flg;
1728   PetscErrorCode ierr;
1729 
1730   PetscFunctionBegin;
1731   a = matA->A; b = matA->B;
1732   c = matB->A; d = matB->B;
1733 
1734   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1735   if (flg) {
1736     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1737   }
1738   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1739   PetscFunctionReturn(0);
1740 }
1741 
1742 #undef __FUNCT__
1743 #define __FUNCT__ "MatCopy_MPIBAIJ"
1744 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1745 {
1746   PetscErrorCode ierr;
1747   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1748   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1749 
1750   PetscFunctionBegin;
1751   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1752   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1753     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1754   } else {
1755     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1756     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1757   }
1758   PetscFunctionReturn(0);
1759 }
1760 
1761 #undef __FUNCT__
1762 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1763 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1764 {
1765   PetscErrorCode ierr;
1766 
1767   PetscFunctionBegin;
1768   ierr =  MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1769   PetscFunctionReturn(0);
1770 }
1771 
1772 #include "petscblaslapack.h"
1773 #undef __FUNCT__
1774 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1775 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1776 {
1777   PetscErrorCode ierr;
1778   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1779   PetscBLASInt   bnz,one=1;
1780   Mat_SeqBAIJ    *x,*y;
1781 
1782   PetscFunctionBegin;
1783   if (str == SAME_NONZERO_PATTERN) {
1784     PetscScalar alpha = a;
1785     x = (Mat_SeqBAIJ *)xx->A->data;
1786     y = (Mat_SeqBAIJ *)yy->A->data;
1787     bnz = PetscBLASIntCast(x->nz);
1788     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1789     x = (Mat_SeqBAIJ *)xx->B->data;
1790     y = (Mat_SeqBAIJ *)yy->B->data;
1791     bnz = PetscBLASIntCast(x->nz);
1792     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1793   } else {
1794     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1795   }
1796   PetscFunctionReturn(0);
1797 }
1798 
1799 #undef __FUNCT__
1800 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1801 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1802 {
1803   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1804   PetscErrorCode ierr;
1805 
1806   PetscFunctionBegin;
1807   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1808   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1809   PetscFunctionReturn(0);
1810 }
1811 
1812 #undef __FUNCT__
1813 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
1814 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1815 {
1816   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1817   PetscErrorCode ierr;
1818 
1819   PetscFunctionBegin;
1820   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1821   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1822   PetscFunctionReturn(0);
1823 }
1824 
1825 #undef __FUNCT__
1826 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
1827 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1828 {
1829   PetscErrorCode ierr;
1830   IS             iscol_local;
1831   PetscInt       csize;
1832 
1833   PetscFunctionBegin;
1834   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1835   if (call == MAT_REUSE_MATRIX) {
1836     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1837     if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1838   } else {
1839     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1840   }
1841   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1842   if (call == MAT_INITIAL_MATRIX) {
1843     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1844     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
1845   }
1846   PetscFunctionReturn(0);
1847 }
1848 
1849 #undef __FUNCT__
1850 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
1851 /*
1852     Not great since it makes two copies of the submatrix, first an SeqBAIJ
1853   in local and then by concatenating the local matrices the end result.
1854   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
1855 */
1856 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
1857 {
1858   PetscErrorCode ierr;
1859   PetscMPIInt    rank,size;
1860   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
1861   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
1862   Mat            *local,M,Mreuse;
1863   MatScalar      *vwork,*aa;
1864   MPI_Comm       comm = ((PetscObject)mat)->comm;
1865   Mat_SeqBAIJ    *aij;
1866 
1867 
1868   PetscFunctionBegin;
1869   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1870   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1871 
1872   if (call ==  MAT_REUSE_MATRIX) {
1873     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
1874     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1875     local = &Mreuse;
1876     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
1877   } else {
1878     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
1879     Mreuse = *local;
1880     ierr   = PetscFree(local);CHKERRQ(ierr);
1881   }
1882 
1883   /*
1884       m - number of local rows
1885       n - number of columns (same on all processors)
1886       rstart - first row in new global matrix generated
1887   */
1888   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
1889   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1890   m    = m/bs;
1891   n    = n/bs;
1892 
1893   if (call == MAT_INITIAL_MATRIX) {
1894     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
1895     ii  = aij->i;
1896     jj  = aij->j;
1897 
1898     /*
1899         Determine the number of non-zeros in the diagonal and off-diagonal
1900         portions of the matrix in order to do correct preallocation
1901     */
1902 
1903     /* first get start and end of "diagonal" columns */
1904     if (csize == PETSC_DECIDE) {
1905       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
1906       if (mglobal == n*bs) { /* square matrix */
1907 	nlocal = m;
1908       } else {
1909         nlocal = n/size + ((n % size) > rank);
1910       }
1911     } else {
1912       nlocal = csize/bs;
1913     }
1914     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1915     rstart = rend - nlocal;
1916     if (rank == size - 1 && rend != n) {
1917       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
1918     }
1919 
1920     /* next, compute all the lengths */
1921     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
1922     olens = dlens + m;
1923     for (i=0; i<m; i++) {
1924       jend = ii[i+1] - ii[i];
1925       olen = 0;
1926       dlen = 0;
1927       for (j=0; j<jend; j++) {
1928         if (*jj < rstart || *jj >= rend) olen++;
1929         else dlen++;
1930         jj++;
1931       }
1932       olens[i] = olen;
1933       dlens[i] = dlen;
1934     }
1935     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
1936     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
1937     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
1938     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
1939     ierr = PetscFree(dlens);CHKERRQ(ierr);
1940   } else {
1941     PetscInt ml,nl;
1942 
1943     M = *newmat;
1944     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
1945     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
1946     ierr = MatZeroEntries(M);CHKERRQ(ierr);
1947     /*
1948          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
1949        rather than the slower MatSetValues().
1950     */
1951     M->was_assembled = PETSC_TRUE;
1952     M->assembled     = PETSC_FALSE;
1953   }
1954   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1955   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
1956   aij = (Mat_SeqBAIJ*)(Mreuse)->data;
1957   ii  = aij->i;
1958   jj  = aij->j;
1959   aa  = aij->a;
1960   for (i=0; i<m; i++) {
1961     row   = rstart/bs + i;
1962     nz    = ii[i+1] - ii[i];
1963     cwork = jj;     jj += nz;
1964     vwork = aa;     aa += nz;
1965     ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
1966   }
1967 
1968   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1969   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1970   *newmat = M;
1971 
1972   /* save submatrix used in processor for next request */
1973   if (call ==  MAT_INITIAL_MATRIX) {
1974     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
1975     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
1976   }
1977 
1978   PetscFunctionReturn(0);
1979 }
1980 
1981 #undef __FUNCT__
1982 #define __FUNCT__ "MatPermute_MPIBAIJ"
1983 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
1984 {
1985   MPI_Comm       comm,pcomm;
1986   PetscInt       first,local_size,nrows;
1987   const PetscInt *rows;
1988   PetscMPIInt    size;
1989   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1990   PetscErrorCode ierr;
1991 
1992   PetscFunctionBegin;
1993   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1994   /* make a collective version of 'rowp' */
1995   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1996   if (pcomm==comm) {
1997     crowp = rowp;
1998   } else {
1999     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2000     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2001     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
2002     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2003   }
2004   /* collect the global row permutation and invert it */
2005   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
2006   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
2007   if (pcomm!=comm) {
2008     ierr = ISDestroy(crowp);CHKERRQ(ierr);
2009   }
2010   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
2011   /* get the local target indices */
2012   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
2013   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
2014   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
2015   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
2016   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
2017   ierr = ISDestroy(irowp);CHKERRQ(ierr);
2018   /* the column permutation is so much easier;
2019      make a local version of 'colp' and invert it */
2020   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2021   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2022   if (size==1) {
2023     lcolp = colp;
2024   } else {
2025     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
2026     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
2027     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
2028   }
2029   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2030   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
2031   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
2032   if (size>1) {
2033     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
2034     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
2035   }
2036   /* now we just get the submatrix */
2037   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2038   /* clean up */
2039   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
2040   ierr = ISDestroy(icolp);CHKERRQ(ierr);
2041   PetscFunctionReturn(0);
2042 }
2043 
2044 #undef __FUNCT__
2045 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2046 PetscErrorCode PETSCMAT_DLLEXPORT MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2047 {
2048   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*) mat->data;
2049   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
2050 
2051   PetscFunctionBegin;
2052   if (nghosts) { *nghosts = B->nbs;}
2053   if (ghosts) {*ghosts = baij->garray;}
2054   PetscFunctionReturn(0);
2055 }
2056 
2057 EXTERN PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat);
2058 
2059 #undef __FUNCT__
2060 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ"
2061 /*
2062     This routine is almost identical to MatFDColoringCreate_MPIBAIJ()!
2063 */
2064 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
2065 {
2066   Mat_MPIBAIJ            *baij = (Mat_MPIBAIJ*)mat->data;
2067   PetscErrorCode        ierr;
2068   PetscMPIInt           size,*ncolsonproc,*disp,nn;
2069   PetscInt              bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col;
2070   const PetscInt        *is;
2071   PetscInt              nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj;
2072   PetscInt              *rowhit,M,cstart,cend,colb;
2073   PetscInt              *columnsforrow,l;
2074   IS                    *isa;
2075   PetscTruth             done,flg;
2076   ISLocalToGlobalMapping map = mat->bmapping;
2077   PetscInt               *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype;
2078 
2079   PetscFunctionBegin;
2080   if (!mat->assembled) {
2081     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();");
2082   }
2083   if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock");
2084 
2085   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
2086 
2087   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2088   M                = mat->rmap->n/bs;
2089   cstart           = mat->cmap->rstart/bs;
2090   cend             = mat->cmap->rend/bs;
2091   c->M             = mat->rmap->N/bs;  /* set the global rows and columns and local rows */
2092   c->N             = mat->cmap->N/bs;
2093   c->m             = mat->rmap->n/bs;
2094   c->rstart        = mat->rmap->rstart/bs;
2095 
2096   c->ncolors       = nis;
2097   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
2098   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
2099   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
2100   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
2101   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
2102   ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
2103 
2104   /* Allow access to data structures of local part of matrix */
2105   if (!baij->colmap) {
2106     ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
2107   }
2108   ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2109   ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2110 
2111   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
2112   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
2113 
2114   for (i=0; i<nis; i++) {
2115     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
2116     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
2117     c->ncolumns[i] = n;
2118     if (n) {
2119       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
2120       ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr);
2121       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
2122     } else {
2123       c->columns[i]  = 0;
2124     }
2125 
2126     if (ctype == IS_COLORING_GLOBAL){
2127       /* Determine the total (parallel) number of columns of this color */
2128       ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
2129       ierr = PetscMalloc(2*size*sizeof(PetscInt*),&ncolsonproc);CHKERRQ(ierr);
2130       disp = ncolsonproc + size;
2131 
2132       nn   = PetscMPIIntCast(n);
2133       ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2134       nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];}
2135       if (!nctot) {
2136         ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
2137       }
2138 
2139       disp[0] = 0;
2140       for (j=1; j<size; j++) {
2141         disp[j] = disp[j-1] + ncolsonproc[j-1];
2142       }
2143 
2144       /* Get complete list of columns for color on each processor */
2145       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2146       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2147       ierr = PetscFree(ncolsonproc);CHKERRQ(ierr);
2148     } else if (ctype == IS_COLORING_GHOSTED){
2149       /* Determine local number of columns of this color on this process, including ghost points */
2150       nctot = n;
2151       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2152       ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
2153     } else {
2154       SETERRQ(PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
2155     }
2156 
2157     /*
2158        Mark all rows affect by these columns
2159     */
2160     /* Temporary option to allow for debugging/testing */
2161     flg  = PETSC_FALSE;
2162     ierr = PetscOptionsGetTruth(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr);
2163     if (!flg) {/*-----------------------------------------------------------------------------*/
2164       /* crude, fast version */
2165       ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
2166       /* loop over columns*/
2167       for (j=0; j<nctot; j++) {
2168         if (ctype == IS_COLORING_GHOSTED) {
2169           col = ltog[cols[j]];
2170         } else {
2171           col  = cols[j];
2172         }
2173         if (col >= cstart && col < cend) {
2174           /* column is in diagonal block of matrix */
2175           rows = A_cj + A_ci[col-cstart];
2176           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2177         } else {
2178 #if defined (PETSC_USE_CTABLE)
2179           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr)
2180 	  colb --;
2181 #else
2182           colb = baij->colmap[col] - 1;
2183 #endif
2184           if (colb == -1) {
2185             m = 0;
2186           } else {
2187             colb = colb/bs;
2188             rows = B_cj + B_ci[colb];
2189             m    = B_ci[colb+1] - B_ci[colb];
2190           }
2191         }
2192         /* loop over columns marking them in rowhit */
2193         for (k=0; k<m; k++) {
2194           rowhit[*rows++] = col + 1;
2195         }
2196       }
2197 
2198       /* count the number of hits */
2199       nrows = 0;
2200       for (j=0; j<M; j++) {
2201         if (rowhit[j]) nrows++;
2202       }
2203       c->nrows[i]         = nrows;
2204       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2205       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2206       ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2207       nrows = 0;
2208       for (j=0; j<M; j++) {
2209         if (rowhit[j]) {
2210           c->rows[i][nrows]           = j;
2211           c->columnsforrow[i][nrows] = rowhit[j] - 1;
2212           nrows++;
2213         }
2214       }
2215     } else {/*-------------------------------------------------------------------------------*/
2216       /* slow version, using rowhit as a linked list */
2217       PetscInt currentcol,fm,mfm;
2218       rowhit[M] = M;
2219       nrows     = 0;
2220       /* loop over columns*/
2221       for (j=0; j<nctot; j++) {
2222         if (ctype == IS_COLORING_GHOSTED) {
2223           col = ltog[cols[j]];
2224         } else {
2225           col  = cols[j];
2226         }
2227         if (col >= cstart && col < cend) {
2228           /* column is in diagonal block of matrix */
2229           rows = A_cj + A_ci[col-cstart];
2230           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2231         } else {
2232 #if defined (PETSC_USE_CTABLE)
2233           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2234           colb --;
2235 #else
2236           colb = baij->colmap[col] - 1;
2237 #endif
2238           if (colb == -1) {
2239             m = 0;
2240           } else {
2241             colb = colb/bs;
2242             rows = B_cj + B_ci[colb];
2243             m    = B_ci[colb+1] - B_ci[colb];
2244           }
2245         }
2246 
2247         /* loop over columns marking them in rowhit */
2248         fm    = M; /* fm points to first entry in linked list */
2249         for (k=0; k<m; k++) {
2250           currentcol = *rows++;
2251 	  /* is it already in the list? */
2252           do {
2253             mfm  = fm;
2254             fm   = rowhit[fm];
2255           } while (fm < currentcol);
2256           /* not in list so add it */
2257           if (fm != currentcol) {
2258             nrows++;
2259             columnsforrow[currentcol] = col;
2260             /* next three lines insert new entry into linked list */
2261             rowhit[mfm]               = currentcol;
2262             rowhit[currentcol]        = fm;
2263             fm                        = currentcol;
2264             /* fm points to present position in list since we know the columns are sorted */
2265           } else {
2266             SETERRQ(PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
2267           }
2268         }
2269       }
2270       c->nrows[i]         = nrows;
2271       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2272       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2273       ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2274       /* now store the linked list of rows into c->rows[i] */
2275       nrows = 0;
2276       fm    = rowhit[M];
2277       do {
2278         c->rows[i][nrows]            = fm;
2279         c->columnsforrow[i][nrows++] = columnsforrow[fm];
2280         fm                           = rowhit[fm];
2281       } while (fm < M);
2282     } /* ---------------------------------------------------------------------------------------*/
2283     ierr = PetscFree(cols);CHKERRQ(ierr);
2284   }
2285 
2286   /* Optimize by adding the vscale, and scaleforrow[][] fields */
2287   /*
2288        vscale will contain the "diagonal" on processor scalings followed by the off processor
2289   */
2290   if (ctype == IS_COLORING_GLOBAL) {
2291     PetscInt *garray;
2292     ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2293     for (i=0; i<baij->B->cmap->n/bs; i++) {
2294       for (j=0; j<bs; j++) {
2295         garray[i*bs+j] = bs*baij->garray[i]+j;
2296       }
2297     }
2298     ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr);
2299     ierr = PetscFree(garray);CHKERRQ(ierr);
2300     CHKMEMQ;
2301     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2302     for (k=0; k<c->ncolors; k++) {
2303       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2304       for (l=0; l<c->nrows[k]; l++) {
2305         col = c->columnsforrow[k][l];
2306         if (col >= cstart && col < cend) {
2307           /* column is in diagonal block of matrix */
2308           colb = col - cstart;
2309         } else {
2310           /* column  is in "off-processor" part */
2311 #if defined (PETSC_USE_CTABLE)
2312           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2313           colb --;
2314 #else
2315           colb = baij->colmap[col] - 1;
2316 #endif
2317           colb = colb/bs;
2318           colb += cend - cstart;
2319         }
2320         c->vscaleforrow[k][l] = colb;
2321       }
2322     }
2323   } else if (ctype == IS_COLORING_GHOSTED) {
2324     /* Get gtol mapping */
2325     PetscInt N = mat->cmap->N, *gtol;
2326     ierr = PetscMalloc((N+1)*sizeof(PetscInt),&gtol);CHKERRQ(ierr);
2327     for (i=0; i<N; i++) gtol[i] = -1;
2328     for (i=0; i<map->n; i++) gtol[ltog[i]] = i;
2329 
2330     c->vscale = 0; /* will be created in MatFDColoringApply() */
2331     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2332     for (k=0; k<c->ncolors; k++) {
2333       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2334       for (l=0; l<c->nrows[k]; l++) {
2335         col = c->columnsforrow[k][l];      /* global column index */
2336         c->vscaleforrow[k][l] = gtol[col]; /* local column index */
2337       }
2338     }
2339     ierr = PetscFree(gtol);CHKERRQ(ierr);
2340   }
2341   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
2342 
2343   ierr = PetscFree(rowhit);CHKERRQ(ierr);
2344   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
2345   ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2346   ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2347     CHKMEMQ;
2348   PetscFunctionReturn(0);
2349 }
2350 
2351 #undef __FUNCT__
2352 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ"
2353 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat)
2354 {
2355   Mat            B;
2356   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
2357   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2358   Mat_SeqAIJ     *b;
2359   PetscErrorCode ierr;
2360   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2361   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2362   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2363 
2364   PetscFunctionBegin;
2365   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2366   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2367 
2368   /* ----------------------------------------------------------------
2369      Tell every processor the number of nonzeros per row
2370   */
2371   ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
2372   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2373     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2374   }
2375   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2376   ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr);
2377   displs     = recvcounts + size;
2378   for (i=0; i<size; i++) {
2379     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2380     displs[i]     = A->rmap->range[i]/bs;
2381   }
2382 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2383   ierr  = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2384 #else
2385   ierr  = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2386 #endif
2387   /* ---------------------------------------------------------------
2388      Create the sequential matrix of the same type as the local block diagonal
2389   */
2390   ierr  = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2391   ierr  = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2392   ierr  = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2393   ierr  = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2394   b = (Mat_SeqAIJ *)B->data;
2395 
2396   /*--------------------------------------------------------------------
2397     Copy my part of matrix column indices over
2398   */
2399   sendcount  = ad->nz + bd->nz;
2400   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2401   a_jsendbuf = ad->j;
2402   b_jsendbuf = bd->j;
2403   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2404   cnt        = 0;
2405   for (i=0; i<n; i++) {
2406 
2407     /* put in lower diagonal portion */
2408     m = bd->i[i+1] - bd->i[i];
2409     while (m > 0) {
2410       /* is it above diagonal (in bd (compressed) numbering) */
2411       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2412       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2413       m--;
2414     }
2415 
2416     /* put in diagonal portion */
2417     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2418       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2419     }
2420 
2421     /* put in upper diagonal portion */
2422     while (m-- > 0) {
2423       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2424     }
2425   }
2426   if (cnt != sendcount) SETERRQ2(PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2427 
2428   /*--------------------------------------------------------------------
2429     Gather all column indices to all processors
2430   */
2431   for (i=0; i<size; i++) {
2432     recvcounts[i] = 0;
2433     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2434       recvcounts[i] += lens[j];
2435     }
2436   }
2437   displs[0]  = 0;
2438   for (i=1; i<size; i++) {
2439     displs[i] = displs[i-1] + recvcounts[i-1];
2440   }
2441 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2442   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2443 #else
2444   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2445 #endif
2446   /*--------------------------------------------------------------------
2447     Assemble the matrix into useable form (note numerical values not yet set)
2448   */
2449   /* set the b->ilen (length of each row) values */
2450   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2451   /* set the b->i indices */
2452   b->i[0] = 0;
2453   for (i=1; i<=A->rmap->N/bs; i++) {
2454     b->i[i] = b->i[i-1] + lens[i-1];
2455   }
2456   ierr = PetscFree(lens);CHKERRQ(ierr);
2457   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2458   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2459   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2460 
2461   if (A->symmetric){
2462     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2463   } else if (A->hermitian) {
2464     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2465   } else if (A->structurally_symmetric) {
2466     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2467   }
2468   *newmat = B;
2469   PetscFunctionReturn(0);
2470 }
2471 
2472 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2473 
2474 
2475 /* -------------------------------------------------------------------*/
2476 static struct _MatOps MatOps_Values = {
2477        MatSetValues_MPIBAIJ,
2478        MatGetRow_MPIBAIJ,
2479        MatRestoreRow_MPIBAIJ,
2480        MatMult_MPIBAIJ,
2481 /* 4*/ MatMultAdd_MPIBAIJ,
2482        MatMultTranspose_MPIBAIJ,
2483        MatMultTransposeAdd_MPIBAIJ,
2484        0,
2485        0,
2486        0,
2487 /*10*/ 0,
2488        0,
2489        0,
2490        0,
2491        MatTranspose_MPIBAIJ,
2492 /*15*/ MatGetInfo_MPIBAIJ,
2493        MatEqual_MPIBAIJ,
2494        MatGetDiagonal_MPIBAIJ,
2495        MatDiagonalScale_MPIBAIJ,
2496        MatNorm_MPIBAIJ,
2497 /*20*/ MatAssemblyBegin_MPIBAIJ,
2498        MatAssemblyEnd_MPIBAIJ,
2499        MatSetOption_MPIBAIJ,
2500        MatZeroEntries_MPIBAIJ,
2501 /*24*/ MatZeroRows_MPIBAIJ,
2502        0,
2503        0,
2504        0,
2505        0,
2506 /*29*/ MatSetUpPreallocation_MPIBAIJ,
2507        0,
2508        0,
2509        0,
2510        0,
2511 /*34*/ MatDuplicate_MPIBAIJ,
2512        0,
2513        0,
2514        0,
2515        0,
2516 /*39*/ MatAXPY_MPIBAIJ,
2517        MatGetSubMatrices_MPIBAIJ,
2518        MatIncreaseOverlap_MPIBAIJ,
2519        MatGetValues_MPIBAIJ,
2520        MatCopy_MPIBAIJ,
2521 /*44*/ 0,
2522        MatScale_MPIBAIJ,
2523        0,
2524        0,
2525        0,
2526 /*49*/ 0,
2527        0,
2528        0,
2529        0,
2530        0,
2531 /*54*/ MatFDColoringCreate_MPIBAIJ,
2532        0,
2533        MatSetUnfactored_MPIBAIJ,
2534        MatPermute_MPIBAIJ,
2535        MatSetValuesBlocked_MPIBAIJ,
2536 /*59*/ MatGetSubMatrix_MPIBAIJ,
2537        MatDestroy_MPIBAIJ,
2538        MatView_MPIBAIJ,
2539        0,
2540        0,
2541 /*64*/ 0,
2542        0,
2543        0,
2544        0,
2545        0,
2546 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2547        0,
2548        0,
2549        0,
2550        0,
2551 /*74*/ 0,
2552        MatFDColoringApply_BAIJ,
2553        0,
2554        0,
2555        0,
2556 /*79*/ 0,
2557        0,
2558        0,
2559        0,
2560        MatLoad_MPIBAIJ,
2561 /*84*/ 0,
2562        0,
2563        0,
2564        0,
2565        0,
2566 /*89*/ 0,
2567        0,
2568        0,
2569        0,
2570        0,
2571 /*94*/ 0,
2572        0,
2573        0,
2574        0,
2575        0,
2576 /*99*/ 0,
2577        0,
2578        0,
2579        0,
2580        0,
2581 /*104*/0,
2582        MatRealPart_MPIBAIJ,
2583        MatImaginaryPart_MPIBAIJ,
2584        0,
2585        0,
2586 /*109*/0,
2587        0,
2588        0,
2589        0,
2590        0,
2591 /*114*/MatGetSeqNonzerostructure_MPIBAIJ,
2592        0,
2593        MatGetGhosts_MPIBAIJ
2594 };
2595 
2596 EXTERN_C_BEGIN
2597 #undef __FUNCT__
2598 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2599 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
2600 {
2601   PetscFunctionBegin;
2602   *a      = ((Mat_MPIBAIJ *)A->data)->A;
2603   *iscopy = PETSC_FALSE;
2604   PetscFunctionReturn(0);
2605 }
2606 EXTERN_C_END
2607 
2608 EXTERN_C_BEGIN
2609 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2610 EXTERN_C_END
2611 
2612 EXTERN_C_BEGIN
2613 #undef __FUNCT__
2614 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2615 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2616 {
2617   PetscInt       m,rstart,cstart,cend;
2618   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2619   const PetscInt *JJ=0;
2620   PetscScalar    *values=0;
2621   PetscErrorCode ierr;
2622 
2623   PetscFunctionBegin;
2624 
2625   if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2626   ierr = PetscMapSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2627   ierr = PetscMapSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2628   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
2629   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
2630   m      = B->rmap->n/bs;
2631   rstart = B->rmap->rstart/bs;
2632   cstart = B->cmap->rstart/bs;
2633   cend   = B->cmap->rend/bs;
2634 
2635   if (ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2636   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2637   o_nnz = d_nnz + m;
2638   for (i=0; i<m; i++) {
2639     nz = ii[i+1] - ii[i];
2640     if (nz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2641     nz_max = PetscMax(nz_max,nz);
2642     JJ  = jj + ii[i];
2643     for (j=0; j<nz; j++) {
2644       if (*JJ >= cstart) break;
2645       JJ++;
2646     }
2647     d = 0;
2648     for (; j<nz; j++) {
2649       if (*JJ++ >= cend) break;
2650       d++;
2651     }
2652     d_nnz[i] = d;
2653     o_nnz[i] = nz - d;
2654   }
2655   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2656   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2657 
2658   values = (PetscScalar*)V;
2659   if (!values) {
2660     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2661     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2662   }
2663   for (i=0; i<m; i++) {
2664     PetscInt          row    = i + rstart;
2665     PetscInt          ncols  = ii[i+1] - ii[i];
2666     const PetscInt    *icols = jj + ii[i];
2667     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2668     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2669   }
2670 
2671   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2672   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2673   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2674 
2675   PetscFunctionReturn(0);
2676 }
2677 EXTERN_C_END
2678 
2679 #undef __FUNCT__
2680 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2681 /*@C
2682    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2683    (the default parallel PETSc format).
2684 
2685    Collective on MPI_Comm
2686 
2687    Input Parameters:
2688 +  A - the matrix
2689 .  i - the indices into j for the start of each local row (starts with zero)
2690 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2691 -  v - optional values in the matrix
2692 
2693    Level: developer
2694 
2695 .keywords: matrix, aij, compressed row, sparse, parallel
2696 
2697 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2698 @*/
2699 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2700 {
2701   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
2702 
2703   PetscFunctionBegin;
2704   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2705   if (f) {
2706     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
2707   }
2708   PetscFunctionReturn(0);
2709 }
2710 
2711 EXTERN_C_BEGIN
2712 #undef __FUNCT__
2713 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2714 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2715 {
2716   Mat_MPIBAIJ    *b;
2717   PetscErrorCode ierr;
2718   PetscInt       i, newbs = PetscAbs(bs);
2719 
2720   PetscFunctionBegin;
2721   if (bs < 0) {
2722     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr);
2723       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2724     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2725     bs   = PetscAbs(bs);
2726   }
2727   if ((d_nnz || o_nnz) && newbs != bs) {
2728     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
2729   }
2730   bs = newbs;
2731 
2732 
2733   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2734   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2735   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2736   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2737   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2738 
2739   ierr = PetscMapSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2740   ierr = PetscMapSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2741   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
2742   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
2743 
2744   if (d_nnz) {
2745     for (i=0; i<B->rmap->n/bs; i++) {
2746       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2747     }
2748   }
2749   if (o_nnz) {
2750     for (i=0; i<B->rmap->n/bs; i++) {
2751       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2752     }
2753   }
2754 
2755   b = (Mat_MPIBAIJ*)B->data;
2756   b->bs2 = bs*bs;
2757   b->mbs = B->rmap->n/bs;
2758   b->nbs = B->cmap->n/bs;
2759   b->Mbs = B->rmap->N/bs;
2760   b->Nbs = B->cmap->N/bs;
2761 
2762   for (i=0; i<=b->size; i++) {
2763     b->rangebs[i] = B->rmap->range[i]/bs;
2764   }
2765   b->rstartbs = B->rmap->rstart/bs;
2766   b->rendbs   = B->rmap->rend/bs;
2767   b->cstartbs = B->cmap->rstart/bs;
2768   b->cendbs   = B->cmap->rend/bs;
2769 
2770   if (!B->preallocated) {
2771     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2772     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2773     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2774     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2775     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2776     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2777     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2778     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2779     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
2780   }
2781 
2782   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2783   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2784   B->preallocated = PETSC_TRUE;
2785   PetscFunctionReturn(0);
2786 }
2787 EXTERN_C_END
2788 
2789 EXTERN_C_BEGIN
2790 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2791 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2792 EXTERN_C_END
2793 
2794 
2795 EXTERN_C_BEGIN
2796 #undef __FUNCT__
2797 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
2798 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj)
2799 {
2800   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2801   PetscErrorCode ierr;
2802   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2803   PetscInt       M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2804   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2805 
2806   PetscFunctionBegin;
2807   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr);
2808   ii[0] = 0;
2809   CHKMEMQ;
2810   for (i=0; i<M; i++) {
2811     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2812     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
2813     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2814     /* remove one from count of matrix has diagonal */
2815     for (j=id[i]; j<id[i+1]; j++) {
2816       if (jd[j] == i) {ii[i+1]--;break;}
2817     }
2818   CHKMEMQ;
2819   }
2820   ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2821   cnt = 0;
2822   for (i=0; i<M; i++) {
2823     for (j=io[i]; j<io[i+1]; j++) {
2824       if (garray[jo[j]] > rstart) break;
2825       jj[cnt++] = garray[jo[j]];
2826   CHKMEMQ;
2827     }
2828     for (k=id[i]; k<id[i+1]; k++) {
2829       if (jd[k] != i) {
2830         jj[cnt++] = rstart + jd[k];
2831   CHKMEMQ;
2832       }
2833     }
2834     for (;j<io[i+1]; j++) {
2835       jj[cnt++] = garray[jo[j]];
2836   CHKMEMQ;
2837     }
2838   }
2839   ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr);
2840   PetscFunctionReturn(0);
2841 }
2842 EXTERN_C_END
2843 
2844 /*MC
2845    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2846 
2847    Options Database Keys:
2848 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2849 . -mat_block_size <bs> - set the blocksize used to store the matrix
2850 - -mat_use_hash_table <fact>
2851 
2852   Level: beginner
2853 
2854 .seealso: MatCreateMPIBAIJ
2855 M*/
2856 
2857 EXTERN_C_BEGIN
2858 #undef __FUNCT__
2859 #define __FUNCT__ "MatCreate_MPIBAIJ"
2860 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B)
2861 {
2862   Mat_MPIBAIJ    *b;
2863   PetscErrorCode ierr;
2864   PetscTruth     flg;
2865 
2866   PetscFunctionBegin;
2867   ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
2868   B->data = (void*)b;
2869 
2870 
2871   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2872   B->mapping    = 0;
2873   B->assembled  = PETSC_FALSE;
2874 
2875   B->insertmode = NOT_SET_VALUES;
2876   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
2877   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
2878 
2879   /* build local table of row and column ownerships */
2880   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
2881 
2882   /* build cache for off array entries formed */
2883   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
2884   b->donotstash  = PETSC_FALSE;
2885   b->colmap      = PETSC_NULL;
2886   b->garray      = PETSC_NULL;
2887   b->roworiented = PETSC_TRUE;
2888 
2889   /* stuff used in block assembly */
2890   b->barray       = 0;
2891 
2892   /* stuff used for matrix vector multiply */
2893   b->lvec         = 0;
2894   b->Mvctx        = 0;
2895 
2896   /* stuff for MatGetRow() */
2897   b->rowindices   = 0;
2898   b->rowvalues    = 0;
2899   b->getrowactive = PETSC_FALSE;
2900 
2901   /* hash table stuff */
2902   b->ht           = 0;
2903   b->hd           = 0;
2904   b->ht_size      = 0;
2905   b->ht_flag      = PETSC_FALSE;
2906   b->ht_fact      = 0;
2907   b->ht_total_ct  = 0;
2908   b->ht_insert_ct = 0;
2909 
2910   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
2911     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2912     if (flg) {
2913       PetscReal fact = 1.39;
2914       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2915       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
2916       if (fact <= 1.0) fact = 1.39;
2917       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2918       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2919     }
2920   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2921 
2922   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",
2923                                      "MatConvert_MPIBAIJ_MPIAdj",
2924                                       MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
2925   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2926                                      "MatStoreValues_MPIBAIJ",
2927                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2928   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2929                                      "MatRetrieveValues_MPIBAIJ",
2930                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2931   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2932                                      "MatGetDiagonalBlock_MPIBAIJ",
2933                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2934   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
2935                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
2936                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2937   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
2938 				     "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
2939 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
2940   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
2941                                      "MatDiagonalScaleLocal_MPIBAIJ",
2942                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
2943   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
2944                                      "MatSetHashTableFactor_MPIBAIJ",
2945                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2946   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
2947   PetscFunctionReturn(0);
2948 }
2949 EXTERN_C_END
2950 
2951 /*MC
2952    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2953 
2954    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2955    and MATMPIBAIJ otherwise.
2956 
2957    Options Database Keys:
2958 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2959 
2960   Level: beginner
2961 
2962 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
2963 M*/
2964 
2965 EXTERN_C_BEGIN
2966 #undef __FUNCT__
2967 #define __FUNCT__ "MatCreate_BAIJ"
2968 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A)
2969 {
2970   PetscErrorCode ierr;
2971   PetscMPIInt    size;
2972 
2973   PetscFunctionBegin;
2974   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2975   if (size == 1) {
2976     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
2977   } else {
2978     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
2979   }
2980   PetscFunctionReturn(0);
2981 }
2982 EXTERN_C_END
2983 
2984 #undef __FUNCT__
2985 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
2986 /*@C
2987    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
2988    (block compressed row).  For good matrix assembly performance
2989    the user should preallocate the matrix storage by setting the parameters
2990    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2991    performance can be increased by more than a factor of 50.
2992 
2993    Collective on Mat
2994 
2995    Input Parameters:
2996 +  A - the matrix
2997 .  bs   - size of blockk
2998 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2999            submatrix  (same for all local rows)
3000 .  d_nnz - array containing the number of block nonzeros in the various block rows
3001            of the in diagonal portion of the local (possibly different for each block
3002            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3003 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3004            submatrix (same for all local rows).
3005 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3006            off-diagonal portion of the local submatrix (possibly different for
3007            each block row) or PETSC_NULL.
3008 
3009    If the *_nnz parameter is given then the *_nz parameter is ignored
3010 
3011    Options Database Keys:
3012 +   -mat_block_size - size of the blocks to use
3013 -   -mat_use_hash_table <fact>
3014 
3015    Notes:
3016    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3017    than it must be used on all processors that share the object for that argument.
3018 
3019    Storage Information:
3020    For a square global matrix we define each processor's diagonal portion
3021    to be its local rows and the corresponding columns (a square submatrix);
3022    each processor's off-diagonal portion encompasses the remainder of the
3023    local matrix (a rectangular submatrix).
3024 
3025    The user can specify preallocated storage for the diagonal part of
3026    the local submatrix with either d_nz or d_nnz (not both).  Set
3027    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3028    memory allocation.  Likewise, specify preallocated storage for the
3029    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3030 
3031    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3032    the figure below we depict these three local rows and all columns (0-11).
3033 
3034 .vb
3035            0 1 2 3 4 5 6 7 8 9 10 11
3036           -------------------
3037    row 3  |  o o o d d d o o o o o o
3038    row 4  |  o o o d d d o o o o o o
3039    row 5  |  o o o d d d o o o o o o
3040           -------------------
3041 .ve
3042 
3043    Thus, any entries in the d locations are stored in the d (diagonal)
3044    submatrix, and any entries in the o locations are stored in the
3045    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3046    stored simply in the MATSEQBAIJ format for compressed row storage.
3047 
3048    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3049    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3050    In general, for PDE problems in which most nonzeros are near the diagonal,
3051    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3052    or you will get TERRIBLE performance; see the users' manual chapter on
3053    matrices.
3054 
3055    You can call MatGetInfo() to get information on how effective the preallocation was;
3056    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3057    You can also run with the option -info and look for messages with the string
3058    malloc in them to see if additional memory allocation was needed.
3059 
3060    Level: intermediate
3061 
3062 .keywords: matrix, block, aij, compressed row, sparse, parallel
3063 
3064 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
3065 @*/
3066 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3067 {
3068   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3069 
3070   PetscFunctionBegin;
3071   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3072   if (f) {
3073     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3074   }
3075   PetscFunctionReturn(0);
3076 }
3077 
3078 #undef __FUNCT__
3079 #define __FUNCT__ "MatCreateMPIBAIJ"
3080 /*@C
3081    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
3082    (block compressed row).  For good matrix assembly performance
3083    the user should preallocate the matrix storage by setting the parameters
3084    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3085    performance can be increased by more than a factor of 50.
3086 
3087    Collective on MPI_Comm
3088 
3089    Input Parameters:
3090 +  comm - MPI communicator
3091 .  bs   - size of blockk
3092 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3093            This value should be the same as the local size used in creating the
3094            y vector for the matrix-vector product y = Ax.
3095 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3096            This value should be the same as the local size used in creating the
3097            x vector for the matrix-vector product y = Ax.
3098 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3099 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3100 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3101            submatrix  (same for all local rows)
3102 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3103            of the in diagonal portion of the local (possibly different for each block
3104            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3105 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3106            submatrix (same for all local rows).
3107 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3108            off-diagonal portion of the local submatrix (possibly different for
3109            each block row) or PETSC_NULL.
3110 
3111    Output Parameter:
3112 .  A - the matrix
3113 
3114    Options Database Keys:
3115 +   -mat_block_size - size of the blocks to use
3116 -   -mat_use_hash_table <fact>
3117 
3118    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3119    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3120    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3121 
3122    Notes:
3123    If the *_nnz parameter is given then the *_nz parameter is ignored
3124 
3125    A nonzero block is any block that as 1 or more nonzeros in it
3126 
3127    The user MUST specify either the local or global matrix dimensions
3128    (possibly both).
3129 
3130    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3131    than it must be used on all processors that share the object for that argument.
3132 
3133    Storage Information:
3134    For a square global matrix we define each processor's diagonal portion
3135    to be its local rows and the corresponding columns (a square submatrix);
3136    each processor's off-diagonal portion encompasses the remainder of the
3137    local matrix (a rectangular submatrix).
3138 
3139    The user can specify preallocated storage for the diagonal part of
3140    the local submatrix with either d_nz or d_nnz (not both).  Set
3141    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3142    memory allocation.  Likewise, specify preallocated storage for the
3143    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3144 
3145    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3146    the figure below we depict these three local rows and all columns (0-11).
3147 
3148 .vb
3149            0 1 2 3 4 5 6 7 8 9 10 11
3150           -------------------
3151    row 3  |  o o o d d d o o o o o o
3152    row 4  |  o o o d d d o o o o o o
3153    row 5  |  o o o d d d o o o o o o
3154           -------------------
3155 .ve
3156 
3157    Thus, any entries in the d locations are stored in the d (diagonal)
3158    submatrix, and any entries in the o locations are stored in the
3159    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3160    stored simply in the MATSEQBAIJ format for compressed row storage.
3161 
3162    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3163    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3164    In general, for PDE problems in which most nonzeros are near the diagonal,
3165    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3166    or you will get TERRIBLE performance; see the users' manual chapter on
3167    matrices.
3168 
3169    Level: intermediate
3170 
3171 .keywords: matrix, block, aij, compressed row, sparse, parallel
3172 
3173 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3174 @*/
3175 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3176 {
3177   PetscErrorCode ierr;
3178   PetscMPIInt    size;
3179 
3180   PetscFunctionBegin;
3181   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3182   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3183   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3184   if (size > 1) {
3185     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3186     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3187   } else {
3188     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3189     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3190   }
3191   PetscFunctionReturn(0);
3192 }
3193 
3194 #undef __FUNCT__
3195 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3196 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3197 {
3198   Mat            mat;
3199   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3200   PetscErrorCode ierr;
3201   PetscInt       len=0;
3202 
3203   PetscFunctionBegin;
3204   *newmat       = 0;
3205   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3206   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3207   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3208   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3209 
3210   mat->factor       = matin->factor;
3211   mat->preallocated = PETSC_TRUE;
3212   mat->assembled    = PETSC_TRUE;
3213   mat->insertmode   = NOT_SET_VALUES;
3214 
3215   a      = (Mat_MPIBAIJ*)mat->data;
3216   mat->rmap->bs  = matin->rmap->bs;
3217   a->bs2   = oldmat->bs2;
3218   a->mbs   = oldmat->mbs;
3219   a->nbs   = oldmat->nbs;
3220   a->Mbs   = oldmat->Mbs;
3221   a->Nbs   = oldmat->Nbs;
3222 
3223   ierr = PetscMapCopy(((PetscObject)matin)->comm,matin->rmap,mat->rmap);CHKERRQ(ierr);
3224   ierr = PetscMapCopy(((PetscObject)matin)->comm,matin->cmap,mat->cmap);CHKERRQ(ierr);
3225 
3226   a->size         = oldmat->size;
3227   a->rank         = oldmat->rank;
3228   a->donotstash   = oldmat->donotstash;
3229   a->roworiented  = oldmat->roworiented;
3230   a->rowindices   = 0;
3231   a->rowvalues    = 0;
3232   a->getrowactive = PETSC_FALSE;
3233   a->barray       = 0;
3234   a->rstartbs     = oldmat->rstartbs;
3235   a->rendbs       = oldmat->rendbs;
3236   a->cstartbs     = oldmat->cstartbs;
3237   a->cendbs       = oldmat->cendbs;
3238 
3239   /* hash table stuff */
3240   a->ht           = 0;
3241   a->hd           = 0;
3242   a->ht_size      = 0;
3243   a->ht_flag      = oldmat->ht_flag;
3244   a->ht_fact      = oldmat->ht_fact;
3245   a->ht_total_ct  = 0;
3246   a->ht_insert_ct = 0;
3247 
3248   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3249   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
3250   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3251   if (oldmat->colmap) {
3252 #if defined (PETSC_USE_CTABLE)
3253   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3254 #else
3255   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3256   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3257   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3258 #endif
3259   } else a->colmap = 0;
3260 
3261   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3262     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3263     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3264     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3265   } else a->garray = 0;
3266 
3267   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3268   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3269   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3270   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3271 
3272   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3273   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3274   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3275   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3276   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3277   *newmat = mat;
3278 
3279   PetscFunctionReturn(0);
3280 }
3281 
3282 #include "petscsys.h"
3283 
3284 #undef __FUNCT__
3285 #define __FUNCT__ "MatLoad_MPIBAIJ"
3286 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
3287 {
3288   Mat            A;
3289   PetscErrorCode ierr;
3290   int            fd;
3291   PetscInt       i,nz,j,rstart,rend;
3292   PetscScalar    *vals,*buf;
3293   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3294   MPI_Status     status;
3295   PetscMPIInt    rank,size,maxnz;
3296   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3297   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
3298   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3299   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
3300   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
3301   PetscInt       dcount,kmax,k,nzcount,tmp,mend;
3302 
3303   PetscFunctionBegin;
3304   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3305     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3306   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3307 
3308   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3309   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3310   if (!rank) {
3311     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3312     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3313     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3314   }
3315 
3316   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3317   M = header[1]; N = header[2];
3318 
3319   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
3320 
3321   /*
3322      This code adds extra rows to make sure the number of rows is
3323      divisible by the blocksize
3324   */
3325   Mbs        = M/bs;
3326   extra_rows = bs - M + bs*Mbs;
3327   if (extra_rows == bs) extra_rows = 0;
3328   else                  Mbs++;
3329   if (extra_rows && !rank) {
3330     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3331   }
3332 
3333   /* determine ownership of all rows */
3334   mbs        = Mbs/size + ((Mbs % size) > rank);
3335   m          = mbs*bs;
3336   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
3337   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3338 
3339   /* process 0 needs enough room for process with most rows */
3340   if (!rank) {
3341     mmax = rowners[1];
3342     for (i=2; i<size; i++) {
3343       mmax = PetscMax(mmax,rowners[i]);
3344     }
3345     mmax*=bs;
3346   } else mmax = m;
3347 
3348   rowners[0] = 0;
3349   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
3350   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
3351   rstart = rowners[rank];
3352   rend   = rowners[rank+1];
3353 
3354   /* distribute row lengths to all processors */
3355   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
3356   if (!rank) {
3357     mend = m;
3358     if (size == 1) mend = mend - extra_rows;
3359     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3360     for (j=mend; j<m; j++) locrowlens[j] = 1;
3361     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3362     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3363     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3364     for (j=0; j<m; j++) {
3365       procsnz[0] += locrowlens[j];
3366     }
3367     for (i=1; i<size; i++) {
3368       mend = browners[i+1] - browners[i];
3369       if (i == size-1) mend = mend - extra_rows;
3370       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3371       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3372       /* calculate the number of nonzeros on each processor */
3373       for (j=0; j<browners[i+1]-browners[i]; j++) {
3374         procsnz[i] += rowlengths[j];
3375       }
3376       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3377     }
3378     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3379   } else {
3380     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3381   }
3382 
3383   if (!rank) {
3384     /* determine max buffer needed and allocate it */
3385     maxnz = procsnz[0];
3386     for (i=1; i<size; i++) {
3387       maxnz = PetscMax(maxnz,procsnz[i]);
3388     }
3389     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3390 
3391     /* read in my part of the matrix column indices  */
3392     nz     = procsnz[0];
3393     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3394     mycols = ibuf;
3395     if (size == 1)  nz -= extra_rows;
3396     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3397     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
3398 
3399     /* read in every ones (except the last) and ship off */
3400     for (i=1; i<size-1; i++) {
3401       nz   = procsnz[i];
3402       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3403       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3404     }
3405     /* read in the stuff for the last proc */
3406     if (size != 1) {
3407       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3408       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3409       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3410       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3411     }
3412     ierr = PetscFree(cols);CHKERRQ(ierr);
3413   } else {
3414     /* determine buffer space needed for message */
3415     nz = 0;
3416     for (i=0; i<m; i++) {
3417       nz += locrowlens[i];
3418     }
3419     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3420     mycols = ibuf;
3421     /* receive message of column indices*/
3422     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3423     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3424     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3425   }
3426 
3427   /* loop over local rows, determining number of off diagonal entries */
3428   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
3429   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
3430   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3431   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3432   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3433   rowcount = 0; nzcount = 0;
3434   for (i=0; i<mbs; i++) {
3435     dcount  = 0;
3436     odcount = 0;
3437     for (j=0; j<bs; j++) {
3438       kmax = locrowlens[rowcount];
3439       for (k=0; k<kmax; k++) {
3440         tmp = mycols[nzcount++]/bs;
3441         if (!mask[tmp]) {
3442           mask[tmp] = 1;
3443           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3444           else masked1[dcount++] = tmp;
3445         }
3446       }
3447       rowcount++;
3448     }
3449 
3450     dlens[i]  = dcount;
3451     odlens[i] = odcount;
3452 
3453     /* zero out the mask elements we set */
3454     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3455     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3456   }
3457 
3458   /* create our matrix */
3459   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
3460   ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3461   ierr = MatSetType(A,type);CHKERRQ(ierr)
3462   ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3463 
3464   if (!rank) {
3465     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3466     /* read in my part of the matrix numerical values  */
3467     nz = procsnz[0];
3468     vals = buf;
3469     mycols = ibuf;
3470     if (size == 1)  nz -= extra_rows;
3471     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3472     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
3473 
3474     /* insert into matrix */
3475     jj      = rstart*bs;
3476     for (i=0; i<m; i++) {
3477       ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3478       mycols += locrowlens[i];
3479       vals   += locrowlens[i];
3480       jj++;
3481     }
3482     /* read in other processors (except the last one) and ship out */
3483     for (i=1; i<size-1; i++) {
3484       nz   = procsnz[i];
3485       vals = buf;
3486       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3487       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
3488     }
3489     /* the last proc */
3490     if (size != 1){
3491       nz   = procsnz[i] - extra_rows;
3492       vals = buf;
3493       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3494       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3495       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
3496     }
3497     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3498   } else {
3499     /* receive numeric values */
3500     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3501 
3502     /* receive message of values*/
3503     vals   = buf;
3504     mycols = ibuf;
3505     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
3506     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
3507     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3508 
3509     /* insert into matrix */
3510     jj      = rstart*bs;
3511     for (i=0; i<m; i++) {
3512       ierr    = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3513       mycols += locrowlens[i];
3514       vals   += locrowlens[i];
3515       jj++;
3516     }
3517   }
3518   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3519   ierr = PetscFree(buf);CHKERRQ(ierr);
3520   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3521   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3522   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3523   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3524   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3525   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3526 
3527   *newmat = A;
3528   PetscFunctionReturn(0);
3529 }
3530 
3531 #undef __FUNCT__
3532 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3533 /*@
3534    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3535 
3536    Input Parameters:
3537 .  mat  - the matrix
3538 .  fact - factor
3539 
3540    Collective on Mat
3541 
3542    Level: advanced
3543 
3544   Notes:
3545    This can also be set by the command line option: -mat_use_hash_table <fact>
3546 
3547 .keywords: matrix, hashtable, factor, HT
3548 
3549 .seealso: MatSetOption()
3550 @*/
3551 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3552 {
3553   PetscErrorCode ierr,(*f)(Mat,PetscReal);
3554 
3555   PetscFunctionBegin;
3556   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
3557   if (f) {
3558     ierr = (*f)(mat,fact);CHKERRQ(ierr);
3559   }
3560   PetscFunctionReturn(0);
3561 }
3562 
3563 EXTERN_C_BEGIN
3564 #undef __FUNCT__
3565 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3566 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3567 {
3568   Mat_MPIBAIJ *baij;
3569 
3570   PetscFunctionBegin;
3571   baij = (Mat_MPIBAIJ*)mat->data;
3572   baij->ht_fact = fact;
3573   PetscFunctionReturn(0);
3574 }
3575 EXTERN_C_END
3576 
3577 #undef __FUNCT__
3578 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3579 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3580 {
3581   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3582   PetscFunctionBegin;
3583   *Ad     = a->A;
3584   *Ao     = a->B;
3585   *colmap = a->garray;
3586   PetscFunctionReturn(0);
3587 }
3588 
3589 /*
3590     Special version for direct calls from Fortran (to eliminate two function call overheads
3591 */
3592 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3593 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3594 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3595 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3596 #endif
3597 
3598 #undef __FUNCT__
3599 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3600 /*@C
3601   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3602 
3603   Collective on Mat
3604 
3605   Input Parameters:
3606 + mat - the matrix
3607 . min - number of input rows
3608 . im - input rows
3609 . nin - number of input columns
3610 . in - input columns
3611 . v - numerical values input
3612 - addvin - INSERT_VALUES or ADD_VALUES
3613 
3614   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3615 
3616   Level: advanced
3617 
3618 .seealso:   MatSetValuesBlocked()
3619 @*/
3620 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3621 {
3622   /* convert input arguments to C version */
3623   Mat             mat = *matin;
3624   PetscInt        m = *min, n = *nin;
3625   InsertMode      addv = *addvin;
3626 
3627   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3628   const MatScalar *value;
3629   MatScalar       *barray=baij->barray;
3630   PetscTruth      roworiented = baij->roworiented;
3631   PetscErrorCode  ierr;
3632   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3633   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3634   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3635 
3636   PetscFunctionBegin;
3637   /* tasks normally handled by MatSetValuesBlocked() */
3638   if (mat->insertmode == NOT_SET_VALUES) {
3639     mat->insertmode = addv;
3640   }
3641 #if defined(PETSC_USE_DEBUG)
3642   else if (mat->insertmode != addv) {
3643     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3644   }
3645   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3646 #endif
3647   if (mat->assembled) {
3648     mat->was_assembled = PETSC_TRUE;
3649     mat->assembled     = PETSC_FALSE;
3650   }
3651   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3652 
3653 
3654   if(!barray) {
3655     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
3656     baij->barray = barray;
3657   }
3658 
3659   if (roworiented) {
3660     stepval = (n-1)*bs;
3661   } else {
3662     stepval = (m-1)*bs;
3663   }
3664   for (i=0; i<m; i++) {
3665     if (im[i] < 0) continue;
3666 #if defined(PETSC_USE_DEBUG)
3667     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3668 #endif
3669     if (im[i] >= rstart && im[i] < rend) {
3670       row = im[i] - rstart;
3671       for (j=0; j<n; j++) {
3672         /* If NumCol = 1 then a copy is not required */
3673         if ((roworiented) && (n == 1)) {
3674           barray = (MatScalar*)v + i*bs2;
3675         } else if((!roworiented) && (m == 1)) {
3676           barray = (MatScalar*)v + j*bs2;
3677         } else { /* Here a copy is required */
3678           if (roworiented) {
3679             value = v + i*(stepval+bs)*bs + j*bs;
3680           } else {
3681             value = v + j*(stepval+bs)*bs + i*bs;
3682           }
3683           for (ii=0; ii<bs; ii++,value+=stepval) {
3684             for (jj=0; jj<bs; jj++) {
3685               *barray++  = *value++;
3686             }
3687           }
3688           barray -=bs2;
3689         }
3690 
3691         if (in[j] >= cstart && in[j] < cend){
3692           col  = in[j] - cstart;
3693           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3694         }
3695         else if (in[j] < 0) continue;
3696 #if defined(PETSC_USE_DEBUG)
3697         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
3698 #endif
3699         else {
3700           if (mat->was_assembled) {
3701             if (!baij->colmap) {
3702               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3703             }
3704 
3705 #if defined(PETSC_USE_DEBUG)
3706 #if defined (PETSC_USE_CTABLE)
3707             { PetscInt data;
3708               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3709               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
3710             }
3711 #else
3712             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
3713 #endif
3714 #endif
3715 #if defined (PETSC_USE_CTABLE)
3716 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3717             col  = (col - 1)/bs;
3718 #else
3719             col = (baij->colmap[in[j]] - 1)/bs;
3720 #endif
3721             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3722               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3723               col =  in[j];
3724             }
3725           }
3726           else col = in[j];
3727           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3728         }
3729       }
3730     } else {
3731       if (!baij->donotstash) {
3732         if (roworiented) {
3733           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3734         } else {
3735           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3736         }
3737       }
3738     }
3739   }
3740 
3741   /* task normally handled by MatSetValuesBlocked() */
3742   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3743   PetscFunctionReturn(0);
3744 }
3745