xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 7842634f46ad2ee292fa135afd07ef040c80768f)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4 #include "petscblaslapack.h"
5 
6 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
7 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
8 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
9 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
10 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
11 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
12 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
13 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
14 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
15 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar);
16 
17 #undef __FUNCT__
18 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
19 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
20 {
21   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
22   PetscErrorCode ierr;
23   PetscInt       i,*idxb = 0;
24   PetscScalar    *va,*vb;
25   Vec            vtmp;
26 
27   PetscFunctionBegin;
28   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
29   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
30   if (idx) {
31     for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;}
32   }
33 
34   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
35   if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
36   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
37   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
38 
39   for (i=0; i<A->rmap->n; i++){
40     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);}
41   }
42 
43   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
44   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
45   if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);}
46   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
47   PetscFunctionReturn(0);
48 }
49 
50 EXTERN_C_BEGIN
51 #undef __FUNCT__
52 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
53 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat)
54 {
55   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
56   PetscErrorCode ierr;
57 
58   PetscFunctionBegin;
59   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
60   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
61   PetscFunctionReturn(0);
62 }
63 EXTERN_C_END
64 
65 EXTERN_C_BEGIN
66 #undef __FUNCT__
67 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
68 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat)
69 {
70   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
71   PetscErrorCode ierr;
72 
73   PetscFunctionBegin;
74   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
75   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
76   PetscFunctionReturn(0);
77 }
78 EXTERN_C_END
79 
80 /*
81      Local utility routine that creates a mapping from the global column
82    number to the local number in the off-diagonal part of the local
83    storage of the matrix.  This is done in a non scalable way since the
84    length of colmap equals the global matrix length.
85 */
86 #undef __FUNCT__
87 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
88 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
89 {
90   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
91   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
92   PetscErrorCode ierr;
93   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
94 
95   PetscFunctionBegin;
96 #if defined (PETSC_USE_CTABLE)
97   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
98   for (i=0; i<nbs; i++){
99     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
100   }
101 #else
102   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
103   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
104   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
105   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
106 #endif
107   PetscFunctionReturn(0);
108 }
109 
110 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
111 { \
112  \
113     brow = row/bs;  \
114     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
115     rmax = aimax[brow]; nrow = ailen[brow]; \
116       bcol = col/bs; \
117       ridx = row % bs; cidx = col % bs; \
118       low = 0; high = nrow; \
119       while (high-low > 3) { \
120         t = (low+high)/2; \
121         if (rp[t] > bcol) high = t; \
122         else              low  = t; \
123       } \
124       for (_i=low; _i<high; _i++) { \
125         if (rp[_i] > bcol) break; \
126         if (rp[_i] == bcol) { \
127           bap  = ap +  bs2*_i + bs*cidx + ridx; \
128           if (addv == ADD_VALUES) *bap += value;  \
129           else                    *bap  = value;  \
130           goto a_noinsert; \
131         } \
132       } \
133       if (a->nonew == 1) goto a_noinsert; \
134       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
135       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
136       N = nrow++ - 1;  \
137       /* shift up all the later entries in this row */ \
138       for (ii=N; ii>=_i; ii--) { \
139         rp[ii+1] = rp[ii]; \
140         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
141       } \
142       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
143       rp[_i]                      = bcol;  \
144       ap[bs2*_i + bs*cidx + ridx] = value;  \
145       a_noinsert:; \
146     ailen[brow] = nrow; \
147 }
148 
149 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
150 { \
151     brow = row/bs;  \
152     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
153     rmax = bimax[brow]; nrow = bilen[brow]; \
154       bcol = col/bs; \
155       ridx = row % bs; cidx = col % bs; \
156       low = 0; high = nrow; \
157       while (high-low > 3) { \
158         t = (low+high)/2; \
159         if (rp[t] > bcol) high = t; \
160         else              low  = t; \
161       } \
162       for (_i=low; _i<high; _i++) { \
163         if (rp[_i] > bcol) break; \
164         if (rp[_i] == bcol) { \
165           bap  = ap +  bs2*_i + bs*cidx + ridx; \
166           if (addv == ADD_VALUES) *bap += value;  \
167           else                    *bap  = value;  \
168           goto b_noinsert; \
169         } \
170       } \
171       if (b->nonew == 1) goto b_noinsert; \
172       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
173       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
174       CHKMEMQ;\
175       N = nrow++ - 1;  \
176       /* shift up all the later entries in this row */ \
177       for (ii=N; ii>=_i; ii--) { \
178         rp[ii+1] = rp[ii]; \
179         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
180       } \
181       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
182       rp[_i]                      = bcol;  \
183       ap[bs2*_i + bs*cidx + ridx] = value;  \
184       b_noinsert:; \
185     bilen[brow] = nrow; \
186 }
187 
188 #undef __FUNCT__
189 #define __FUNCT__ "MatSetValues_MPIBAIJ"
190 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
191 {
192   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
193   MatScalar      value;
194   PetscTruth     roworiented = baij->roworiented;
195   PetscErrorCode ierr;
196   PetscInt       i,j,row,col;
197   PetscInt       rstart_orig=mat->rmap->rstart;
198   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
199   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
200 
201   /* Some Variables required in the macro */
202   Mat            A = baij->A;
203   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
204   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
205   MatScalar      *aa=a->a;
206 
207   Mat            B = baij->B;
208   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
209   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
210   MatScalar      *ba=b->a;
211 
212   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
213   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
214   MatScalar      *ap,*bap;
215 
216   PetscFunctionBegin;
217   if (v) PetscValidScalarPointer(v,6);
218   for (i=0; i<m; i++) {
219     if (im[i] < 0) continue;
220 #if defined(PETSC_USE_DEBUG)
221     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
222 #endif
223     if (im[i] >= rstart_orig && im[i] < rend_orig) {
224       row = im[i] - rstart_orig;
225       for (j=0; j<n; j++) {
226         if (in[j] >= cstart_orig && in[j] < cend_orig){
227           col = in[j] - cstart_orig;
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
230           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
231         } else if (in[j] < 0) continue;
232 #if defined(PETSC_USE_DEBUG)
233         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap->N-1);
234 #endif
235         else {
236           if (mat->was_assembled) {
237             if (!baij->colmap) {
238               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
239             }
240 #if defined (PETSC_USE_CTABLE)
241             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
242             col  = col - 1;
243 #else
244             col = baij->colmap[in[j]/bs] - 1;
245 #endif
246             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
247               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
248               col =  in[j];
249               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
250               B = baij->B;
251               b = (Mat_SeqBAIJ*)(B)->data;
252               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
253               ba=b->a;
254             } else col += in[j]%bs;
255           } else col = in[j];
256           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
257           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
258           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
259         }
260       }
261     } else {
262       if (!baij->donotstash) {
263         if (roworiented) {
264           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
265         } else {
266           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
267         }
268       }
269     }
270   }
271   PetscFunctionReturn(0);
272 }
273 
274 #undef __FUNCT__
275 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
276 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
277 {
278   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
279   const PetscScalar *value;
280   MatScalar         *barray=baij->barray;
281   PetscTruth        roworiented = baij->roworiented;
282   PetscErrorCode    ierr;
283   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
284   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
285   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
286 
287   PetscFunctionBegin;
288   if(!barray) {
289     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
290     baij->barray = barray;
291   }
292 
293   if (roworiented) {
294     stepval = (n-1)*bs;
295   } else {
296     stepval = (m-1)*bs;
297   }
298   for (i=0; i<m; i++) {
299     if (im[i] < 0) continue;
300 #if defined(PETSC_USE_DEBUG)
301     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
302 #endif
303     if (im[i] >= rstart && im[i] < rend) {
304       row = im[i] - rstart;
305       for (j=0; j<n; j++) {
306         /* If NumCol = 1 then a copy is not required */
307         if ((roworiented) && (n == 1)) {
308           barray = (MatScalar*)v + i*bs2;
309         } else if((!roworiented) && (m == 1)) {
310           barray = (MatScalar*)v + j*bs2;
311         } else { /* Here a copy is required */
312           if (roworiented) {
313             value = v + i*(stepval+bs)*bs + j*bs;
314           } else {
315             value = v + j*(stepval+bs)*bs + i*bs;
316           }
317           for (ii=0; ii<bs; ii++,value+=stepval) {
318             for (jj=0; jj<bs; jj++) {
319               *barray++  = *value++;
320             }
321           }
322           barray -=bs2;
323         }
324 
325         if (in[j] >= cstart && in[j] < cend){
326           col  = in[j] - cstart;
327           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
328         }
329         else if (in[j] < 0) continue;
330 #if defined(PETSC_USE_DEBUG)
331         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
332 #endif
333         else {
334           if (mat->was_assembled) {
335             if (!baij->colmap) {
336               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
337             }
338 
339 #if defined(PETSC_USE_DEBUG)
340 #if defined (PETSC_USE_CTABLE)
341             { PetscInt data;
342               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
343               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
344             }
345 #else
346             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
347 #endif
348 #endif
349 #if defined (PETSC_USE_CTABLE)
350 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
351             col  = (col - 1)/bs;
352 #else
353             col = (baij->colmap[in[j]] - 1)/bs;
354 #endif
355             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
356               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
357               col =  in[j];
358             }
359           }
360           else col = in[j];
361           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
362         }
363       }
364     } else {
365       if (!baij->donotstash) {
366         if (roworiented) {
367           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
368         } else {
369           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
370         }
371       }
372     }
373   }
374   PetscFunctionReturn(0);
375 }
376 
377 #define HASH_KEY 0.6180339887
378 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
379 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
380 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
381 #undef __FUNCT__
382 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
383 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
384 {
385   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
386   PetscTruth     roworiented = baij->roworiented;
387   PetscErrorCode ierr;
388   PetscInt       i,j,row,col;
389   PetscInt       rstart_orig=mat->rmap->rstart;
390   PetscInt       rend_orig=mat->rmap->rend,Nbs=baij->Nbs;
391   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
392   PetscReal      tmp;
393   MatScalar      **HD = baij->hd,value;
394 #if defined(PETSC_USE_DEBUG)
395   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
396 #endif
397 
398   PetscFunctionBegin;
399   if (v) PetscValidScalarPointer(v,6);
400   for (i=0; i<m; i++) {
401 #if defined(PETSC_USE_DEBUG)
402     if (im[i] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
403     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
404 #endif
405       row = im[i];
406     if (row >= rstart_orig && row < rend_orig) {
407       for (j=0; j<n; j++) {
408         col = in[j];
409         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
410         /* Look up PetscInto the Hash Table */
411         key = (row/bs)*Nbs+(col/bs)+1;
412         h1  = HASH(size,key,tmp);
413 
414 
415         idx = h1;
416 #if defined(PETSC_USE_DEBUG)
417         insert_ct++;
418         total_ct++;
419         if (HT[idx] != key) {
420           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
421           if (idx == size) {
422             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
423             if (idx == h1) {
424               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
425             }
426           }
427         }
428 #else
429         if (HT[idx] != key) {
430           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
431           if (idx == size) {
432             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
433             if (idx == h1) {
434               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
435             }
436           }
437         }
438 #endif
439         /* A HASH table entry is found, so insert the values at the correct address */
440         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
441         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
442       }
443     } else {
444       if (!baij->donotstash) {
445         if (roworiented) {
446           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
447         } else {
448           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
449         }
450       }
451     }
452   }
453 #if defined(PETSC_USE_DEBUG)
454   baij->ht_total_ct = total_ct;
455   baij->ht_insert_ct = insert_ct;
456 #endif
457   PetscFunctionReturn(0);
458 }
459 
460 #undef __FUNCT__
461 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
462 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
463 {
464   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
465   PetscTruth        roworiented = baij->roworiented;
466   PetscErrorCode    ierr;
467   PetscInt          i,j,ii,jj,row,col;
468   PetscInt          rstart=baij->rstartbs;
469   PetscInt          rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
470   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
471   PetscReal         tmp;
472   MatScalar         **HD = baij->hd,*baij_a;
473   const PetscScalar *v_t,*value;
474 #if defined(PETSC_USE_DEBUG)
475   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
476 #endif
477 
478   PetscFunctionBegin;
479 
480   if (roworiented) {
481     stepval = (n-1)*bs;
482   } else {
483     stepval = (m-1)*bs;
484   }
485   for (i=0; i<m; i++) {
486 #if defined(PETSC_USE_DEBUG)
487     if (im[i] < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
488     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
489 #endif
490     row   = im[i];
491     v_t   = v + i*nbs2;
492     if (row >= rstart && row < rend) {
493       for (j=0; j<n; j++) {
494         col = in[j];
495 
496         /* Look up into the Hash Table */
497         key = row*Nbs+col+1;
498         h1  = HASH(size,key,tmp);
499 
500         idx = h1;
501 #if defined(PETSC_USE_DEBUG)
502         total_ct++;
503         insert_ct++;
504        if (HT[idx] != key) {
505           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
506           if (idx == size) {
507             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
508             if (idx == h1) {
509               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
510             }
511           }
512         }
513 #else
514         if (HT[idx] != key) {
515           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
516           if (idx == size) {
517             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
518             if (idx == h1) {
519               SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
520             }
521           }
522         }
523 #endif
524         baij_a = HD[idx];
525         if (roworiented) {
526           /*value = v + i*(stepval+bs)*bs + j*bs;*/
527           /* value = v + (i*(stepval+bs)+j)*bs; */
528           value = v_t;
529           v_t  += bs;
530           if (addv == ADD_VALUES) {
531             for (ii=0; ii<bs; ii++,value+=stepval) {
532               for (jj=ii; jj<bs2; jj+=bs) {
533                 baij_a[jj]  += *value++;
534               }
535             }
536           } else {
537             for (ii=0; ii<bs; ii++,value+=stepval) {
538               for (jj=ii; jj<bs2; jj+=bs) {
539                 baij_a[jj]  = *value++;
540               }
541             }
542           }
543         } else {
544           value = v + j*(stepval+bs)*bs + i*bs;
545           if (addv == ADD_VALUES) {
546             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
547               for (jj=0; jj<bs; jj++) {
548                 baij_a[jj]  += *value++;
549               }
550             }
551           } else {
552             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
553               for (jj=0; jj<bs; jj++) {
554                 baij_a[jj]  = *value++;
555               }
556             }
557           }
558         }
559       }
560     } else {
561       if (!baij->donotstash) {
562         if (roworiented) {
563           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
564         } else {
565           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
566         }
567       }
568     }
569   }
570 #if defined(PETSC_USE_DEBUG)
571   baij->ht_total_ct = total_ct;
572   baij->ht_insert_ct = insert_ct;
573 #endif
574   PetscFunctionReturn(0);
575 }
576 
577 #undef __FUNCT__
578 #define __FUNCT__ "MatGetValues_MPIBAIJ"
579 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
580 {
581   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
582   PetscErrorCode ierr;
583   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
584   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
585 
586   PetscFunctionBegin;
587   for (i=0; i<m; i++) {
588     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
589     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
590     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
591       row = idxm[i] - bsrstart;
592       for (j=0; j<n; j++) {
593         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
594         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
595         if (idxn[j] >= bscstart && idxn[j] < bscend){
596           col = idxn[j] - bscstart;
597           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
598         } else {
599           if (!baij->colmap) {
600             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
601           }
602 #if defined (PETSC_USE_CTABLE)
603           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
604           data --;
605 #else
606           data = baij->colmap[idxn[j]/bs]-1;
607 #endif
608           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
609           else {
610             col  = data + idxn[j]%bs;
611             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
612           }
613         }
614       }
615     } else {
616       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
617     }
618   }
619  PetscFunctionReturn(0);
620 }
621 
622 #undef __FUNCT__
623 #define __FUNCT__ "MatNorm_MPIBAIJ"
624 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
625 {
626   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
627   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
628   PetscErrorCode ierr;
629   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
630   PetscReal      sum = 0.0;
631   MatScalar      *v;
632 
633   PetscFunctionBegin;
634   if (baij->size == 1) {
635     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
636   } else {
637     if (type == NORM_FROBENIUS) {
638       v = amat->a;
639       nz = amat->nz*bs2;
640       for (i=0; i<nz; i++) {
641 #if defined(PETSC_USE_COMPLEX)
642         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
643 #else
644         sum += (*v)*(*v); v++;
645 #endif
646       }
647       v = bmat->a;
648       nz = bmat->nz*bs2;
649       for (i=0; i<nz; i++) {
650 #if defined(PETSC_USE_COMPLEX)
651         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
652 #else
653         sum += (*v)*(*v); v++;
654 #endif
655       }
656       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
657       *nrm = sqrt(*nrm);
658     } else if (type == NORM_1) { /* max column sum */
659       PetscReal *tmp,*tmp2;
660       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
661       ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
662       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
663       v = amat->a; jj = amat->j;
664       for (i=0; i<amat->nz; i++) {
665         for (j=0; j<bs; j++){
666           col = bs*(cstart + *jj) + j; /* column index */
667           for (row=0; row<bs; row++){
668             tmp[col] += PetscAbsScalar(*v);  v++;
669           }
670         }
671         jj++;
672       }
673       v = bmat->a; jj = bmat->j;
674       for (i=0; i<bmat->nz; i++) {
675         for (j=0; j<bs; j++){
676           col = bs*garray[*jj] + j;
677           for (row=0; row<bs; row++){
678             tmp[col] += PetscAbsScalar(*v); v++;
679           }
680         }
681         jj++;
682       }
683       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
684       *nrm = 0.0;
685       for (j=0; j<mat->cmap->N; j++) {
686         if (tmp2[j] > *nrm) *nrm = tmp2[j];
687       }
688       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
689     } else if (type == NORM_INFINITY) { /* max row sum */
690       PetscReal *sums;
691       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr);
692       sum = 0.0;
693       for (j=0; j<amat->mbs; j++) {
694         for (row=0; row<bs; row++) sums[row] = 0.0;
695         v = amat->a + bs2*amat->i[j];
696         nz = amat->i[j+1]-amat->i[j];
697         for (i=0; i<nz; i++) {
698           for (col=0; col<bs; col++){
699             for (row=0; row<bs; row++){
700               sums[row] += PetscAbsScalar(*v); v++;
701             }
702           }
703         }
704         v = bmat->a + bs2*bmat->i[j];
705         nz = bmat->i[j+1]-bmat->i[j];
706         for (i=0; i<nz; i++) {
707           for (col=0; col<bs; col++){
708             for (row=0; row<bs; row++){
709               sums[row] += PetscAbsScalar(*v); v++;
710             }
711           }
712         }
713         for (row=0; row<bs; row++){
714           if (sums[row] > sum) sum = sums[row];
715         }
716       }
717       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
718       ierr = PetscFree(sums);CHKERRQ(ierr);
719     } else SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_SUP,"No support for this norm yet");
720   }
721   PetscFunctionReturn(0);
722 }
723 
724 /*
725   Creates the hash table, and sets the table
726   This table is created only once.
727   If new entried need to be added to the matrix
728   then the hash table has to be destroyed and
729   recreated.
730 */
731 #undef __FUNCT__
732 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
733 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
734 {
735   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
736   Mat            A = baij->A,B=baij->B;
737   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
738   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
739   PetscErrorCode ierr;
740   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
741   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
742   PetscInt       *HT,key;
743   MatScalar      **HD;
744   PetscReal      tmp;
745 #if defined(PETSC_USE_INFO)
746   PetscInt       ct=0,max=0;
747 #endif
748 
749   PetscFunctionBegin;
750   if (baij->ht) PetscFunctionReturn(0);
751 
752   baij->ht_size = (PetscInt)(factor*nz);
753   ht_size       = baij->ht_size;
754 
755   /* Allocate Memory for Hash Table */
756   ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr);
757   ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr);
758   ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr);
759   HD   = baij->hd;
760   HT   = baij->ht;
761 
762   /* Loop Over A */
763   for (i=0; i<a->mbs; i++) {
764     for (j=ai[i]; j<ai[i+1]; j++) {
765       row = i+rstart;
766       col = aj[j]+cstart;
767 
768       key = row*Nbs + col + 1;
769       h1  = HASH(ht_size,key,tmp);
770       for (k=0; k<ht_size; k++){
771         if (!HT[(h1+k)%ht_size]) {
772           HT[(h1+k)%ht_size] = key;
773           HD[(h1+k)%ht_size] = a->a + j*bs2;
774           break;
775 #if defined(PETSC_USE_INFO)
776         } else {
777           ct++;
778 #endif
779         }
780       }
781 #if defined(PETSC_USE_INFO)
782       if (k> max) max = k;
783 #endif
784     }
785   }
786   /* Loop Over B */
787   for (i=0; i<b->mbs; i++) {
788     for (j=bi[i]; j<bi[i+1]; j++) {
789       row = i+rstart;
790       col = garray[bj[j]];
791       key = row*Nbs + col + 1;
792       h1  = HASH(ht_size,key,tmp);
793       for (k=0; k<ht_size; k++){
794         if (!HT[(h1+k)%ht_size]) {
795           HT[(h1+k)%ht_size] = key;
796           HD[(h1+k)%ht_size] = b->a + j*bs2;
797           break;
798 #if defined(PETSC_USE_INFO)
799         } else {
800           ct++;
801 #endif
802         }
803       }
804 #if defined(PETSC_USE_INFO)
805       if (k> max) max = k;
806 #endif
807     }
808   }
809 
810   /* Print Summary */
811 #if defined(PETSC_USE_INFO)
812   for (i=0,j=0; i<ht_size; i++) {
813     if (HT[i]) {j++;}
814   }
815   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
816 #endif
817   PetscFunctionReturn(0);
818 }
819 
820 #undef __FUNCT__
821 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
822 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
823 {
824   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
825   PetscErrorCode ierr;
826   PetscInt       nstash,reallocs;
827   InsertMode     addv;
828 
829   PetscFunctionBegin;
830   if (baij->donotstash) {
831     PetscFunctionReturn(0);
832   }
833 
834   /* make sure all processors are either in INSERTMODE or ADDMODE */
835   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
836   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
837   mat->insertmode = addv; /* in case this processor had no cache */
838 
839   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
840   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
841   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
842   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
843   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
844   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
845   PetscFunctionReturn(0);
846 }
847 
848 #undef __FUNCT__
849 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
850 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
851 {
852   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
853   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
854   PetscErrorCode ierr;
855   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
856   PetscInt       *row,*col;
857   PetscTruth     r1,r2,r3,other_disassembled;
858   MatScalar      *val;
859   InsertMode     addv = mat->insertmode;
860   PetscMPIInt    n;
861 
862   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
863   PetscFunctionBegin;
864   if (!baij->donotstash) {
865     while (1) {
866       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
867       if (!flg) break;
868 
869       for (i=0; i<n;) {
870         /* Now identify the consecutive vals belonging to the same row */
871         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
872         if (j < n) ncols = j-i;
873         else       ncols = n-i;
874         /* Now assemble all these values with a single function call */
875         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
876         i = j;
877       }
878     }
879     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
880     /* Now process the block-stash. Since the values are stashed column-oriented,
881        set the roworiented flag to column oriented, and after MatSetValues()
882        restore the original flags */
883     r1 = baij->roworiented;
884     r2 = a->roworiented;
885     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
886     baij->roworiented = PETSC_FALSE;
887     a->roworiented    = PETSC_FALSE;
888     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
889     while (1) {
890       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
891       if (!flg) break;
892 
893       for (i=0; i<n;) {
894         /* Now identify the consecutive vals belonging to the same row */
895         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
896         if (j < n) ncols = j-i;
897         else       ncols = n-i;
898         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
899         i = j;
900       }
901     }
902     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
903     baij->roworiented = r1;
904     a->roworiented    = r2;
905     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
906   }
907 
908   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
909   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
910 
911   /* determine if any processor has disassembled, if so we must
912      also disassemble ourselfs, in order that we may reassemble. */
913   /*
914      if nonzero structure of submatrix B cannot change then we know that
915      no processor disassembled thus we can skip this stuff
916   */
917   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
918     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
919     if (mat->was_assembled && !other_disassembled) {
920       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
921     }
922   }
923 
924   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
925     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
926   }
927   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
928   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
929   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
930 
931 #if defined(PETSC_USE_INFO)
932   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
933     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
934     baij->ht_total_ct  = 0;
935     baij->ht_insert_ct = 0;
936   }
937 #endif
938   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
939     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
940     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
941     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
942   }
943 
944   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
945   baij->rowvalues = 0;
946   PetscFunctionReturn(0);
947 }
948 
949 #undef __FUNCT__
950 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
951 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
952 {
953   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
954   PetscErrorCode    ierr;
955   PetscMPIInt       size = baij->size,rank = baij->rank;
956   PetscInt          bs = mat->rmap->bs;
957   PetscTruth        iascii,isdraw;
958   PetscViewer       sviewer;
959   PetscViewerFormat format;
960 
961   PetscFunctionBegin;
962   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
963   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
964   if (iascii) {
965     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
966     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
967       MatInfo info;
968       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
969       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
970       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
971               rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
972               mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
973       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
974       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
975       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
976       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
977       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
978       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
979       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
980       PetscFunctionReturn(0);
981     } else if (format == PETSC_VIEWER_ASCII_INFO) {
982       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
983       PetscFunctionReturn(0);
984     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
985       PetscFunctionReturn(0);
986     }
987   }
988 
989   if (isdraw) {
990     PetscDraw       draw;
991     PetscTruth isnull;
992     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
993     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
994   }
995 
996   if (size == 1) {
997     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
998     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
999   } else {
1000     /* assemble the entire matrix onto first processor. */
1001     Mat         A;
1002     Mat_SeqBAIJ *Aloc;
1003     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1004     MatScalar   *a;
1005 
1006     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1007     /* Perhaps this should be the type of mat? */
1008     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1009     if (!rank) {
1010       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1011     } else {
1012       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1013     }
1014     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1015     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1016     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1017 
1018     /* copy over the A part */
1019     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1020     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1021     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1022 
1023     for (i=0; i<mbs; i++) {
1024       rvals[0] = bs*(baij->rstartbs + i);
1025       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1026       for (j=ai[i]; j<ai[i+1]; j++) {
1027         col = (baij->cstartbs+aj[j])*bs;
1028         for (k=0; k<bs; k++) {
1029           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1030           col++; a += bs;
1031         }
1032       }
1033     }
1034     /* copy over the B part */
1035     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1036     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1037     for (i=0; i<mbs; i++) {
1038       rvals[0] = bs*(baij->rstartbs + i);
1039       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1040       for (j=ai[i]; j<ai[i+1]; j++) {
1041         col = baij->garray[aj[j]]*bs;
1042         for (k=0; k<bs; k++) {
1043           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1044           col++; a += bs;
1045         }
1046       }
1047     }
1048     ierr = PetscFree(rvals);CHKERRQ(ierr);
1049     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1050     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1051     /*
1052        Everyone has to call to draw the matrix since the graphics waits are
1053        synchronized across all processors that share the PetscDraw object
1054     */
1055     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1056     if (!rank) {
1057       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1058       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1059     }
1060     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1061     ierr = MatDestroy(A);CHKERRQ(ierr);
1062   }
1063   PetscFunctionReturn(0);
1064 }
1065 
1066 #undef __FUNCT__
1067 #define __FUNCT__ "MatView_MPIBAIJ"
1068 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1069 {
1070   PetscErrorCode ierr;
1071   PetscTruth     iascii,isdraw,issocket,isbinary;
1072 
1073   PetscFunctionBegin;
1074   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1075   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1076   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1077   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1078   if (iascii || isdraw || issocket || isbinary) {
1079     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1080   } else {
1081     SETERRQ1(((PetscObject)mat)->comm,PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1082   }
1083   PetscFunctionReturn(0);
1084 }
1085 
1086 #undef __FUNCT__
1087 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1088 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1089 {
1090   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1091   PetscErrorCode ierr;
1092 
1093   PetscFunctionBegin;
1094 #if defined(PETSC_USE_LOG)
1095   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1096 #endif
1097   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1098   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1099   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1100   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1101 #if defined (PETSC_USE_CTABLE)
1102   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
1103 #else
1104   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1105 #endif
1106   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1107   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1108   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1109   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1110   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1111   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1112   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1113   ierr = PetscFree(baij);CHKERRQ(ierr);
1114 
1115   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1116   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1117   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1118   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1119   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1120   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1121   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1122   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1123   PetscFunctionReturn(0);
1124 }
1125 
1126 #undef __FUNCT__
1127 #define __FUNCT__ "MatMult_MPIBAIJ"
1128 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1129 {
1130   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1131   PetscErrorCode ierr;
1132   PetscInt       nt;
1133 
1134   PetscFunctionBegin;
1135   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1136   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1137   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1138   if (nt != A->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1139   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1140   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1141   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1142   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1143   PetscFunctionReturn(0);
1144 }
1145 
1146 #undef __FUNCT__
1147 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1148 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1149 {
1150   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1151   PetscErrorCode ierr;
1152 
1153   PetscFunctionBegin;
1154   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1155   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1156   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1157   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1158   PetscFunctionReturn(0);
1159 }
1160 
1161 #undef __FUNCT__
1162 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1163 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1164 {
1165   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1166   PetscErrorCode ierr;
1167   PetscTruth     merged;
1168 
1169   PetscFunctionBegin;
1170   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1171   /* do nondiagonal part */
1172   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1173   if (!merged) {
1174     /* send it on its way */
1175     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1176     /* do local part */
1177     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1178     /* receive remote parts: note this assumes the values are not actually */
1179     /* inserted in yy until the next line */
1180     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1181   } else {
1182     /* do local part */
1183     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1184     /* send it on its way */
1185     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1186     /* values actually were received in the Begin() but we need to call this nop */
1187     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1188   }
1189   PetscFunctionReturn(0);
1190 }
1191 
1192 #undef __FUNCT__
1193 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1194 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1195 {
1196   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1197   PetscErrorCode ierr;
1198 
1199   PetscFunctionBegin;
1200   /* do nondiagonal part */
1201   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1202   /* send it on its way */
1203   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1204   /* do local part */
1205   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1206   /* receive remote parts: note this assumes the values are not actually */
1207   /* inserted in yy until the next line, which is true for my implementation*/
1208   /* but is not perhaps always true. */
1209   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1210   PetscFunctionReturn(0);
1211 }
1212 
1213 /*
1214   This only works correctly for square matrices where the subblock A->A is the
1215    diagonal block
1216 */
1217 #undef __FUNCT__
1218 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1219 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1220 {
1221   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1222   PetscErrorCode ierr;
1223 
1224   PetscFunctionBegin;
1225   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1226   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1227   PetscFunctionReturn(0);
1228 }
1229 
1230 #undef __FUNCT__
1231 #define __FUNCT__ "MatScale_MPIBAIJ"
1232 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1233 {
1234   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1235   PetscErrorCode ierr;
1236 
1237   PetscFunctionBegin;
1238   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1239   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1240   PetscFunctionReturn(0);
1241 }
1242 
1243 #undef __FUNCT__
1244 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1245 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1246 {
1247   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1248   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1249   PetscErrorCode ierr;
1250   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1251   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1252   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1253 
1254   PetscFunctionBegin;
1255   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1256   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1257   mat->getrowactive = PETSC_TRUE;
1258 
1259   if (!mat->rowvalues && (idx || v)) {
1260     /*
1261         allocate enough space to hold information from the longest row.
1262     */
1263     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1264     PetscInt     max = 1,mbs = mat->mbs,tmp;
1265     for (i=0; i<mbs; i++) {
1266       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1267       if (max < tmp) { max = tmp; }
1268     }
1269     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1270   }
1271   lrow = row - brstart;
1272 
1273   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1274   if (!v)   {pvA = 0; pvB = 0;}
1275   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1276   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1277   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1278   nztot = nzA + nzB;
1279 
1280   cmap  = mat->garray;
1281   if (v  || idx) {
1282     if (nztot) {
1283       /* Sort by increasing column numbers, assuming A and B already sorted */
1284       PetscInt imark = -1;
1285       if (v) {
1286         *v = v_p = mat->rowvalues;
1287         for (i=0; i<nzB; i++) {
1288           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1289           else break;
1290         }
1291         imark = i;
1292         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1293         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1294       }
1295       if (idx) {
1296         *idx = idx_p = mat->rowindices;
1297         if (imark > -1) {
1298           for (i=0; i<imark; i++) {
1299             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1300           }
1301         } else {
1302           for (i=0; i<nzB; i++) {
1303             if (cmap[cworkB[i]/bs] < cstart)
1304               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1305             else break;
1306           }
1307           imark = i;
1308         }
1309         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1310         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1311       }
1312     } else {
1313       if (idx) *idx = 0;
1314       if (v)   *v   = 0;
1315     }
1316   }
1317   *nz = nztot;
1318   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1319   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1320   PetscFunctionReturn(0);
1321 }
1322 
1323 #undef __FUNCT__
1324 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1325 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1326 {
1327   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1328 
1329   PetscFunctionBegin;
1330   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1331   baij->getrowactive = PETSC_FALSE;
1332   PetscFunctionReturn(0);
1333 }
1334 
1335 #undef __FUNCT__
1336 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1337 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1338 {
1339   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1340   PetscErrorCode ierr;
1341 
1342   PetscFunctionBegin;
1343   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1344   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1345   PetscFunctionReturn(0);
1346 }
1347 
1348 #undef __FUNCT__
1349 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1350 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1351 {
1352   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1353   Mat            A = a->A,B = a->B;
1354   PetscErrorCode ierr;
1355   PetscReal      isend[5],irecv[5];
1356 
1357   PetscFunctionBegin;
1358   info->block_size     = (PetscReal)matin->rmap->bs;
1359   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1360   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1361   isend[3] = info->memory;  isend[4] = info->mallocs;
1362   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1363   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1364   isend[3] += info->memory;  isend[4] += info->mallocs;
1365   if (flag == MAT_LOCAL) {
1366     info->nz_used      = isend[0];
1367     info->nz_allocated = isend[1];
1368     info->nz_unneeded  = isend[2];
1369     info->memory       = isend[3];
1370     info->mallocs      = isend[4];
1371   } else if (flag == MAT_GLOBAL_MAX) {
1372     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1373     info->nz_used      = irecv[0];
1374     info->nz_allocated = irecv[1];
1375     info->nz_unneeded  = irecv[2];
1376     info->memory       = irecv[3];
1377     info->mallocs      = irecv[4];
1378   } else if (flag == MAT_GLOBAL_SUM) {
1379     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1380     info->nz_used      = irecv[0];
1381     info->nz_allocated = irecv[1];
1382     info->nz_unneeded  = irecv[2];
1383     info->memory       = irecv[3];
1384     info->mallocs      = irecv[4];
1385   } else {
1386     SETERRQ1(((PetscObject)matin)->comm,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1387   }
1388   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1389   info->fill_ratio_needed = 0;
1390   info->factor_mallocs    = 0;
1391   PetscFunctionReturn(0);
1392 }
1393 
1394 #undef __FUNCT__
1395 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1396 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscTruth flg)
1397 {
1398   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1399   PetscErrorCode ierr;
1400 
1401   PetscFunctionBegin;
1402   switch (op) {
1403   case MAT_NEW_NONZERO_LOCATIONS:
1404   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1405   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1406   case MAT_KEEP_NONZERO_PATTERN:
1407   case MAT_NEW_NONZERO_LOCATION_ERR:
1408     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1409     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1410     break;
1411   case MAT_ROW_ORIENTED:
1412     a->roworiented = flg;
1413     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1414     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1415     break;
1416   case MAT_NEW_DIAGONALS:
1417     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1418     break;
1419   case MAT_IGNORE_OFF_PROC_ENTRIES:
1420     a->donotstash = flg;
1421     break;
1422   case MAT_USE_HASH_TABLE:
1423     a->ht_flag = flg;
1424     break;
1425   case MAT_SYMMETRIC:
1426   case MAT_STRUCTURALLY_SYMMETRIC:
1427   case MAT_HERMITIAN:
1428   case MAT_SYMMETRY_ETERNAL:
1429     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1430     break;
1431   default:
1432     SETERRQ1(((PetscObject)A)->comm,PETSC_ERR_SUP,"unknown option %d",op);
1433   }
1434   PetscFunctionReturn(0);
1435 }
1436 
1437 #undef __FUNCT__
1438 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1439 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1440 {
1441   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1442   Mat_SeqBAIJ    *Aloc;
1443   Mat            B;
1444   PetscErrorCode ierr;
1445   PetscInt       M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1446   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1447   MatScalar      *a;
1448 
1449   PetscFunctionBegin;
1450   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1451   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1452     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1453     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1454     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1455     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1456   } else {
1457     B = *matout;
1458   }
1459 
1460   /* copy over the A part */
1461   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1462   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1463   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1464 
1465   for (i=0; i<mbs; i++) {
1466     rvals[0] = bs*(baij->rstartbs + i);
1467     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1468     for (j=ai[i]; j<ai[i+1]; j++) {
1469       col = (baij->cstartbs+aj[j])*bs;
1470       for (k=0; k<bs; k++) {
1471         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1472         col++; a += bs;
1473       }
1474     }
1475   }
1476   /* copy over the B part */
1477   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1478   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1479   for (i=0; i<mbs; i++) {
1480     rvals[0] = bs*(baij->rstartbs + i);
1481     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1482     for (j=ai[i]; j<ai[i+1]; j++) {
1483       col = baij->garray[aj[j]]*bs;
1484       for (k=0; k<bs; k++) {
1485         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1486         col++; a += bs;
1487       }
1488     }
1489   }
1490   ierr = PetscFree(rvals);CHKERRQ(ierr);
1491   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1492   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1493 
1494   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1495     *matout = B;
1496   } else {
1497     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1498   }
1499   PetscFunctionReturn(0);
1500 }
1501 
1502 #undef __FUNCT__
1503 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1504 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1505 {
1506   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1507   Mat            a = baij->A,b = baij->B;
1508   PetscErrorCode ierr;
1509   PetscInt       s1,s2,s3;
1510 
1511   PetscFunctionBegin;
1512   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1513   if (rr) {
1514     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1515     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1516     /* Overlap communication with computation. */
1517     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1518   }
1519   if (ll) {
1520     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1521     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1522     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1523   }
1524   /* scale  the diagonal block */
1525   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1526 
1527   if (rr) {
1528     /* Do a scatter end and then right scale the off-diagonal block */
1529     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1530     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1531   }
1532 
1533   PetscFunctionReturn(0);
1534 }
1535 
1536 #undef __FUNCT__
1537 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1538 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
1539 {
1540   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1541   PetscErrorCode ierr;
1542   PetscMPIInt    imdex,size = l->size,n,rank = l->rank;
1543   PetscInt       i,*owners = A->rmap->range;
1544   PetscInt       *nprocs,j,idx,nsends,row;
1545   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
1546   PetscInt       *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1547   PetscInt       *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1548   MPI_Comm       comm = ((PetscObject)A)->comm;
1549   MPI_Request    *send_waits,*recv_waits;
1550   MPI_Status     recv_status,*send_status;
1551 #if defined(PETSC_DEBUG)
1552   PetscTruth     found = PETSC_FALSE;
1553 #endif
1554 
1555   PetscFunctionBegin;
1556   /*  first count number of contributors to each processor */
1557   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1558   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1559   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1560   j = 0;
1561   for (i=0; i<N; i++) {
1562     if (lastidx > (idx = rows[i])) j = 0;
1563     lastidx = idx;
1564     for (; j<size; j++) {
1565       if (idx >= owners[j] && idx < owners[j+1]) {
1566         nprocs[2*j]++;
1567         nprocs[2*j+1] = 1;
1568         owner[i] = j;
1569 #if defined(PETSC_DEBUG)
1570         found = PETSC_TRUE;
1571 #endif
1572         break;
1573       }
1574     }
1575 #if defined(PETSC_DEBUG)
1576     if (!found) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1577     found = PETSC_FALSE;
1578 #endif
1579   }
1580   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1581 
1582   /* inform other processors of number of messages and max length*/
1583   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1584 
1585   /* post receives:   */
1586   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1587   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1588   for (i=0; i<nrecvs; i++) {
1589     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1590   }
1591 
1592   /* do sends:
1593      1) starts[i] gives the starting index in svalues for stuff going to
1594      the ith processor
1595   */
1596   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1597   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1598   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1599   starts[0]  = 0;
1600   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1601   for (i=0; i<N; i++) {
1602     svalues[starts[owner[i]]++] = rows[i];
1603   }
1604 
1605   starts[0] = 0;
1606   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1607   count = 0;
1608   for (i=0; i<size; i++) {
1609     if (nprocs[2*i+1]) {
1610       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1611     }
1612   }
1613   ierr = PetscFree(starts);CHKERRQ(ierr);
1614 
1615   base = owners[rank];
1616 
1617   /*  wait on receives */
1618   ierr   = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr);
1619   count  = nrecvs;
1620   slen = 0;
1621   while (count) {
1622     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1623     /* unpack receives into our local space */
1624     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1625     source[imdex]  = recv_status.MPI_SOURCE;
1626     lens[imdex]    = n;
1627     slen          += n;
1628     count--;
1629   }
1630   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1631 
1632   /* move the data into the send scatter */
1633   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1634   count = 0;
1635   for (i=0; i<nrecvs; i++) {
1636     values = rvalues + i*nmax;
1637     for (j=0; j<lens[i]; j++) {
1638       lrows[count++] = values[j] - base;
1639     }
1640   }
1641   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1642   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1643   ierr = PetscFree(owner);CHKERRQ(ierr);
1644   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1645 
1646   /* actually zap the local rows */
1647   /*
1648         Zero the required rows. If the "diagonal block" of the matrix
1649      is square and the user wishes to set the diagonal we use separate
1650      code so that MatSetValues() is not called for each diagonal allocating
1651      new memory, thus calling lots of mallocs and slowing things down.
1652 
1653   */
1654   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1655   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr);
1656   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1657     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr);
1658   } else if (diag != 0.0) {
1659     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1660     if (((Mat_SeqBAIJ*)l->A->data)->nonew) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1661        MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1662     for (i=0; i<slen; i++) {
1663       row  = lrows[i] + rstart_bs;
1664       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1665     }
1666     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1667     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1668   } else {
1669     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1670   }
1671 
1672   ierr = PetscFree(lrows);CHKERRQ(ierr);
1673 
1674   /* wait on sends */
1675   if (nsends) {
1676     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1677     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1678     ierr = PetscFree(send_status);CHKERRQ(ierr);
1679   }
1680   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1681   ierr = PetscFree(svalues);CHKERRQ(ierr);
1682 
1683   PetscFunctionReturn(0);
1684 }
1685 
1686 #undef __FUNCT__
1687 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1688 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1689 {
1690   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1691   PetscErrorCode ierr;
1692 
1693   PetscFunctionBegin;
1694   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1695   PetscFunctionReturn(0);
1696 }
1697 
1698 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1699 
1700 #undef __FUNCT__
1701 #define __FUNCT__ "MatEqual_MPIBAIJ"
1702 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1703 {
1704   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1705   Mat            a,b,c,d;
1706   PetscTruth     flg;
1707   PetscErrorCode ierr;
1708 
1709   PetscFunctionBegin;
1710   a = matA->A; b = matA->B;
1711   c = matB->A; d = matB->B;
1712 
1713   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1714   if (flg) {
1715     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1716   }
1717   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1718   PetscFunctionReturn(0);
1719 }
1720 
1721 #undef __FUNCT__
1722 #define __FUNCT__ "MatCopy_MPIBAIJ"
1723 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1724 {
1725   PetscErrorCode ierr;
1726   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1727   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1728 
1729   PetscFunctionBegin;
1730   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1731   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1732     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1733   } else {
1734     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1735     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1736   }
1737   PetscFunctionReturn(0);
1738 }
1739 
1740 #undef __FUNCT__
1741 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1742 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1743 {
1744   PetscErrorCode ierr;
1745 
1746   PetscFunctionBegin;
1747   ierr =  MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1748   PetscFunctionReturn(0);
1749 }
1750 
1751 #undef __FUNCT__
1752 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1753 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1754 {
1755   PetscErrorCode ierr;
1756   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1757   PetscBLASInt   bnz,one=1;
1758   Mat_SeqBAIJ    *x,*y;
1759 
1760   PetscFunctionBegin;
1761   if (str == SAME_NONZERO_PATTERN) {
1762     PetscScalar alpha = a;
1763     x = (Mat_SeqBAIJ *)xx->A->data;
1764     y = (Mat_SeqBAIJ *)yy->A->data;
1765     bnz = PetscBLASIntCast(x->nz);
1766     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1767     x = (Mat_SeqBAIJ *)xx->B->data;
1768     y = (Mat_SeqBAIJ *)yy->B->data;
1769     bnz = PetscBLASIntCast(x->nz);
1770     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1771   } else {
1772     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1773   }
1774   PetscFunctionReturn(0);
1775 }
1776 
1777 #undef __FUNCT__
1778 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ"
1779 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs)
1780 {
1781   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1782   PetscInt rbs,cbs;
1783   PetscErrorCode ierr;
1784 
1785   PetscFunctionBegin;
1786   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1787   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1788   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
1789   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
1790   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
1791   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
1792   PetscFunctionReturn(0);
1793 }
1794 
1795 #undef __FUNCT__
1796 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1797 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1798 {
1799   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1800   PetscErrorCode ierr;
1801 
1802   PetscFunctionBegin;
1803   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1804   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1805   PetscFunctionReturn(0);
1806 }
1807 
1808 #undef __FUNCT__
1809 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
1810 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1811 {
1812   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1813   PetscErrorCode ierr;
1814 
1815   PetscFunctionBegin;
1816   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1817   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1818   PetscFunctionReturn(0);
1819 }
1820 
1821 #undef __FUNCT__
1822 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
1823 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1824 {
1825   PetscErrorCode ierr;
1826   IS             iscol_local;
1827   PetscInt       csize;
1828 
1829   PetscFunctionBegin;
1830   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1831   if (call == MAT_REUSE_MATRIX) {
1832     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1833     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1834   } else {
1835     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1836   }
1837   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1838   if (call == MAT_INITIAL_MATRIX) {
1839     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1840     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
1841   }
1842   PetscFunctionReturn(0);
1843 }
1844 
1845 #undef __FUNCT__
1846 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
1847 /*
1848     Not great since it makes two copies of the submatrix, first an SeqBAIJ
1849   in local and then by concatenating the local matrices the end result.
1850   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
1851 */
1852 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
1853 {
1854   PetscErrorCode ierr;
1855   PetscMPIInt    rank,size;
1856   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
1857   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
1858   Mat            *local,M,Mreuse;
1859   MatScalar      *vwork,*aa;
1860   MPI_Comm       comm = ((PetscObject)mat)->comm;
1861   Mat_SeqBAIJ    *aij;
1862 
1863 
1864   PetscFunctionBegin;
1865   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1866   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1867 
1868   if (call ==  MAT_REUSE_MATRIX) {
1869     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
1870     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1871     local = &Mreuse;
1872     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
1873   } else {
1874     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
1875     Mreuse = *local;
1876     ierr   = PetscFree(local);CHKERRQ(ierr);
1877   }
1878 
1879   /*
1880       m - number of local rows
1881       n - number of columns (same on all processors)
1882       rstart - first row in new global matrix generated
1883   */
1884   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
1885   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1886   m    = m/bs;
1887   n    = n/bs;
1888 
1889   if (call == MAT_INITIAL_MATRIX) {
1890     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
1891     ii  = aij->i;
1892     jj  = aij->j;
1893 
1894     /*
1895         Determine the number of non-zeros in the diagonal and off-diagonal
1896         portions of the matrix in order to do correct preallocation
1897     */
1898 
1899     /* first get start and end of "diagonal" columns */
1900     if (csize == PETSC_DECIDE) {
1901       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
1902       if (mglobal == n*bs) { /* square matrix */
1903 	nlocal = m;
1904       } else {
1905         nlocal = n/size + ((n % size) > rank);
1906       }
1907     } else {
1908       nlocal = csize/bs;
1909     }
1910     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1911     rstart = rend - nlocal;
1912     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
1913 
1914     /* next, compute all the lengths */
1915     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
1916     olens = dlens + m;
1917     for (i=0; i<m; i++) {
1918       jend = ii[i+1] - ii[i];
1919       olen = 0;
1920       dlen = 0;
1921       for (j=0; j<jend; j++) {
1922         if (*jj < rstart || *jj >= rend) olen++;
1923         else dlen++;
1924         jj++;
1925       }
1926       olens[i] = olen;
1927       dlens[i] = dlen;
1928     }
1929     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
1930     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
1931     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
1932     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
1933     ierr = PetscFree(dlens);CHKERRQ(ierr);
1934   } else {
1935     PetscInt ml,nl;
1936 
1937     M = *newmat;
1938     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
1939     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
1940     ierr = MatZeroEntries(M);CHKERRQ(ierr);
1941     /*
1942          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
1943        rather than the slower MatSetValues().
1944     */
1945     M->was_assembled = PETSC_TRUE;
1946     M->assembled     = PETSC_FALSE;
1947   }
1948   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1949   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
1950   aij = (Mat_SeqBAIJ*)(Mreuse)->data;
1951   ii  = aij->i;
1952   jj  = aij->j;
1953   aa  = aij->a;
1954   for (i=0; i<m; i++) {
1955     row   = rstart/bs + i;
1956     nz    = ii[i+1] - ii[i];
1957     cwork = jj;     jj += nz;
1958     vwork = aa;     aa += nz;
1959     ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
1960   }
1961 
1962   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1963   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1964   *newmat = M;
1965 
1966   /* save submatrix used in processor for next request */
1967   if (call ==  MAT_INITIAL_MATRIX) {
1968     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
1969     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
1970   }
1971 
1972   PetscFunctionReturn(0);
1973 }
1974 
1975 #undef __FUNCT__
1976 #define __FUNCT__ "MatPermute_MPIBAIJ"
1977 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
1978 {
1979   MPI_Comm       comm,pcomm;
1980   PetscInt       first,local_size,nrows;
1981   const PetscInt *rows;
1982   PetscMPIInt    size;
1983   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1984   PetscErrorCode ierr;
1985 
1986   PetscFunctionBegin;
1987   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
1988   /* make a collective version of 'rowp' */
1989   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
1990   if (pcomm==comm) {
1991     crowp = rowp;
1992   } else {
1993     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
1994     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
1995     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
1996     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
1997   }
1998   /* collect the global row permutation and invert it */
1999   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
2000   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
2001   if (pcomm!=comm) {
2002     ierr = ISDestroy(crowp);CHKERRQ(ierr);
2003   }
2004   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
2005   /* get the local target indices */
2006   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
2007   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
2008   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
2009   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
2010   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
2011   ierr = ISDestroy(irowp);CHKERRQ(ierr);
2012   /* the column permutation is so much easier;
2013      make a local version of 'colp' and invert it */
2014   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2015   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2016   if (size==1) {
2017     lcolp = colp;
2018   } else {
2019     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
2020     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
2021     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
2022   }
2023   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2024   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
2025   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
2026   if (size>1) {
2027     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
2028     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
2029   }
2030   /* now we just get the submatrix */
2031   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2032   /* clean up */
2033   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
2034   ierr = ISDestroy(icolp);CHKERRQ(ierr);
2035   PetscFunctionReturn(0);
2036 }
2037 
2038 #undef __FUNCT__
2039 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2040 PetscErrorCode PETSCMAT_DLLEXPORT MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2041 {
2042   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*) mat->data;
2043   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
2044 
2045   PetscFunctionBegin;
2046   if (nghosts) { *nghosts = B->nbs;}
2047   if (ghosts) {*ghosts = baij->garray;}
2048   PetscFunctionReturn(0);
2049 }
2050 
2051 EXTERN PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat);
2052 
2053 #undef __FUNCT__
2054 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ"
2055 /*
2056     This routine is almost identical to MatFDColoringCreate_MPIBAIJ()!
2057 */
2058 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
2059 {
2060   Mat_MPIBAIJ            *baij = (Mat_MPIBAIJ*)mat->data;
2061   PetscErrorCode        ierr;
2062   PetscMPIInt           size,*ncolsonproc,*disp,nn;
2063   PetscInt              bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col;
2064   const PetscInt        *is;
2065   PetscInt              nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj;
2066   PetscInt              *rowhit,M,cstart,cend,colb;
2067   PetscInt              *columnsforrow,l;
2068   IS                    *isa;
2069   PetscTruth             done,flg;
2070   ISLocalToGlobalMapping map = mat->bmapping;
2071   PetscInt               *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype;
2072 
2073   PetscFunctionBegin;
2074   if (!mat->assembled) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();");
2075   if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock");
2076 
2077   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
2078   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2079   M                = mat->rmap->n/bs;
2080   cstart           = mat->cmap->rstart/bs;
2081   cend             = mat->cmap->rend/bs;
2082   c->M             = mat->rmap->N/bs;  /* set the global rows and columns and local rows */
2083   c->N             = mat->cmap->N/bs;
2084   c->m             = mat->rmap->n/bs;
2085   c->rstart        = mat->rmap->rstart/bs;
2086 
2087   c->ncolors       = nis;
2088   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
2089   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
2090   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
2091   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
2092   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
2093   ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
2094 
2095   /* Allow access to data structures of local part of matrix */
2096   if (!baij->colmap) {
2097     ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
2098   }
2099   ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2100   ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2101 
2102   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
2103   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
2104 
2105   for (i=0; i<nis; i++) {
2106     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
2107     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
2108     c->ncolumns[i] = n;
2109     if (n) {
2110       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
2111       ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr);
2112       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
2113     } else {
2114       c->columns[i]  = 0;
2115     }
2116 
2117     if (ctype == IS_COLORING_GLOBAL){
2118       /* Determine the total (parallel) number of columns of this color */
2119       ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
2120       ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr);
2121 
2122       nn   = PetscMPIIntCast(n);
2123       ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2124       nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];}
2125       if (!nctot) {
2126         ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
2127       }
2128 
2129       disp[0] = 0;
2130       for (j=1; j<size; j++) {
2131         disp[j] = disp[j-1] + ncolsonproc[j-1];
2132       }
2133 
2134       /* Get complete list of columns for color on each processor */
2135       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2136       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2137       ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr);
2138     } else if (ctype == IS_COLORING_GHOSTED){
2139       /* Determine local number of columns of this color on this process, including ghost points */
2140       nctot = n;
2141       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2142       ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
2143     } else {
2144       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
2145     }
2146 
2147     /*
2148        Mark all rows affect by these columns
2149     */
2150     /* Temporary option to allow for debugging/testing */
2151     flg  = PETSC_FALSE;
2152     ierr = PetscOptionsGetTruth(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr);
2153     if (!flg) {/*-----------------------------------------------------------------------------*/
2154       /* crude, fast version */
2155       ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
2156       /* loop over columns*/
2157       for (j=0; j<nctot; j++) {
2158         if (ctype == IS_COLORING_GHOSTED) {
2159           col = ltog[cols[j]];
2160         } else {
2161           col  = cols[j];
2162         }
2163         if (col >= cstart && col < cend) {
2164           /* column is in diagonal block of matrix */
2165           rows = A_cj + A_ci[col-cstart];
2166           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2167         } else {
2168 #if defined (PETSC_USE_CTABLE)
2169           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2170 	  colb --;
2171 #else
2172           colb = baij->colmap[col] - 1;
2173 #endif
2174           if (colb == -1) {
2175             m = 0;
2176           } else {
2177             colb = colb/bs;
2178             rows = B_cj + B_ci[colb];
2179             m    = B_ci[colb+1] - B_ci[colb];
2180           }
2181         }
2182         /* loop over columns marking them in rowhit */
2183         for (k=0; k<m; k++) {
2184           rowhit[*rows++] = col + 1;
2185         }
2186       }
2187 
2188       /* count the number of hits */
2189       nrows = 0;
2190       for (j=0; j<M; j++) {
2191         if (rowhit[j]) nrows++;
2192       }
2193       c->nrows[i]         = nrows;
2194       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2195       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2196       ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2197       nrows = 0;
2198       for (j=0; j<M; j++) {
2199         if (rowhit[j]) {
2200           c->rows[i][nrows]           = j;
2201           c->columnsforrow[i][nrows] = rowhit[j] - 1;
2202           nrows++;
2203         }
2204       }
2205     } else {/*-------------------------------------------------------------------------------*/
2206       /* slow version, using rowhit as a linked list */
2207       PetscInt currentcol,fm,mfm;
2208       rowhit[M] = M;
2209       nrows     = 0;
2210       /* loop over columns*/
2211       for (j=0; j<nctot; j++) {
2212         if (ctype == IS_COLORING_GHOSTED) {
2213           col = ltog[cols[j]];
2214         } else {
2215           col  = cols[j];
2216         }
2217         if (col >= cstart && col < cend) {
2218           /* column is in diagonal block of matrix */
2219           rows = A_cj + A_ci[col-cstart];
2220           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2221         } else {
2222 #if defined (PETSC_USE_CTABLE)
2223           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2224           colb --;
2225 #else
2226           colb = baij->colmap[col] - 1;
2227 #endif
2228           if (colb == -1) {
2229             m = 0;
2230           } else {
2231             colb = colb/bs;
2232             rows = B_cj + B_ci[colb];
2233             m    = B_ci[colb+1] - B_ci[colb];
2234           }
2235         }
2236 
2237         /* loop over columns marking them in rowhit */
2238         fm    = M; /* fm points to first entry in linked list */
2239         for (k=0; k<m; k++) {
2240           currentcol = *rows++;
2241 	  /* is it already in the list? */
2242           do {
2243             mfm  = fm;
2244             fm   = rowhit[fm];
2245           } while (fm < currentcol);
2246           /* not in list so add it */
2247           if (fm != currentcol) {
2248             nrows++;
2249             columnsforrow[currentcol] = col;
2250             /* next three lines insert new entry into linked list */
2251             rowhit[mfm]               = currentcol;
2252             rowhit[currentcol]        = fm;
2253             fm                        = currentcol;
2254             /* fm points to present position in list since we know the columns are sorted */
2255           } else {
2256             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
2257           }
2258         }
2259       }
2260       c->nrows[i]         = nrows;
2261       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2262       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2263       ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2264       /* now store the linked list of rows into c->rows[i] */
2265       nrows = 0;
2266       fm    = rowhit[M];
2267       do {
2268         c->rows[i][nrows]            = fm;
2269         c->columnsforrow[i][nrows++] = columnsforrow[fm];
2270         fm                           = rowhit[fm];
2271       } while (fm < M);
2272     } /* ---------------------------------------------------------------------------------------*/
2273     ierr = PetscFree(cols);CHKERRQ(ierr);
2274   }
2275 
2276   /* Optimize by adding the vscale, and scaleforrow[][] fields */
2277   /*
2278        vscale will contain the "diagonal" on processor scalings followed by the off processor
2279   */
2280   if (ctype == IS_COLORING_GLOBAL) {
2281     PetscInt *garray;
2282     ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2283     for (i=0; i<baij->B->cmap->n/bs; i++) {
2284       for (j=0; j<bs; j++) {
2285         garray[i*bs+j] = bs*baij->garray[i]+j;
2286       }
2287     }
2288     ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr);
2289     ierr = PetscFree(garray);CHKERRQ(ierr);
2290     CHKMEMQ;
2291     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2292     for (k=0; k<c->ncolors; k++) {
2293       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2294       for (l=0; l<c->nrows[k]; l++) {
2295         col = c->columnsforrow[k][l];
2296         if (col >= cstart && col < cend) {
2297           /* column is in diagonal block of matrix */
2298           colb = col - cstart;
2299         } else {
2300           /* column  is in "off-processor" part */
2301 #if defined (PETSC_USE_CTABLE)
2302           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2303           colb --;
2304 #else
2305           colb = baij->colmap[col] - 1;
2306 #endif
2307           colb = colb/bs;
2308           colb += cend - cstart;
2309         }
2310         c->vscaleforrow[k][l] = colb;
2311       }
2312     }
2313   } else if (ctype == IS_COLORING_GHOSTED) {
2314     /* Get gtol mapping */
2315     PetscInt N = mat->cmap->N, *gtol;
2316     ierr = PetscMalloc((N+1)*sizeof(PetscInt),&gtol);CHKERRQ(ierr);
2317     for (i=0; i<N; i++) gtol[i] = -1;
2318     for (i=0; i<map->n; i++) gtol[ltog[i]] = i;
2319 
2320     c->vscale = 0; /* will be created in MatFDColoringApply() */
2321     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2322     for (k=0; k<c->ncolors; k++) {
2323       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2324       for (l=0; l<c->nrows[k]; l++) {
2325         col = c->columnsforrow[k][l];      /* global column index */
2326         c->vscaleforrow[k][l] = gtol[col]; /* local column index */
2327       }
2328     }
2329     ierr = PetscFree(gtol);CHKERRQ(ierr);
2330   }
2331   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
2332 
2333   ierr = PetscFree(rowhit);CHKERRQ(ierr);
2334   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
2335   ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2336   ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2337     CHKMEMQ;
2338   PetscFunctionReturn(0);
2339 }
2340 
2341 #undef __FUNCT__
2342 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ"
2343 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat)
2344 {
2345   Mat            B;
2346   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
2347   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2348   Mat_SeqAIJ     *b;
2349   PetscErrorCode ierr;
2350   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2351   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2352   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2353 
2354   PetscFunctionBegin;
2355   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2356   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2357 
2358   /* ----------------------------------------------------------------
2359      Tell every processor the number of nonzeros per row
2360   */
2361   ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
2362   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2363     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2364   }
2365   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2366   ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr);
2367   displs     = recvcounts + size;
2368   for (i=0; i<size; i++) {
2369     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2370     displs[i]     = A->rmap->range[i]/bs;
2371   }
2372 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2373   ierr  = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2374 #else
2375   ierr  = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2376 #endif
2377   /* ---------------------------------------------------------------
2378      Create the sequential matrix of the same type as the local block diagonal
2379   */
2380   ierr  = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2381   ierr  = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2382   ierr  = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2383   ierr  = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2384   b = (Mat_SeqAIJ *)B->data;
2385 
2386   /*--------------------------------------------------------------------
2387     Copy my part of matrix column indices over
2388   */
2389   sendcount  = ad->nz + bd->nz;
2390   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2391   a_jsendbuf = ad->j;
2392   b_jsendbuf = bd->j;
2393   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2394   cnt        = 0;
2395   for (i=0; i<n; i++) {
2396 
2397     /* put in lower diagonal portion */
2398     m = bd->i[i+1] - bd->i[i];
2399     while (m > 0) {
2400       /* is it above diagonal (in bd (compressed) numbering) */
2401       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2402       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2403       m--;
2404     }
2405 
2406     /* put in diagonal portion */
2407     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2408       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2409     }
2410 
2411     /* put in upper diagonal portion */
2412     while (m-- > 0) {
2413       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2414     }
2415   }
2416   if (cnt != sendcount) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2417 
2418   /*--------------------------------------------------------------------
2419     Gather all column indices to all processors
2420   */
2421   for (i=0; i<size; i++) {
2422     recvcounts[i] = 0;
2423     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2424       recvcounts[i] += lens[j];
2425     }
2426   }
2427   displs[0]  = 0;
2428   for (i=1; i<size; i++) {
2429     displs[i] = displs[i-1] + recvcounts[i-1];
2430   }
2431 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2432   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2433 #else
2434   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2435 #endif
2436   /*--------------------------------------------------------------------
2437     Assemble the matrix into useable form (note numerical values not yet set)
2438   */
2439   /* set the b->ilen (length of each row) values */
2440   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2441   /* set the b->i indices */
2442   b->i[0] = 0;
2443   for (i=1; i<=A->rmap->N/bs; i++) {
2444     b->i[i] = b->i[i-1] + lens[i-1];
2445   }
2446   ierr = PetscFree(lens);CHKERRQ(ierr);
2447   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2448   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2449   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2450 
2451   if (A->symmetric){
2452     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2453   } else if (A->hermitian) {
2454     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2455   } else if (A->structurally_symmetric) {
2456     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2457   }
2458   *newmat = B;
2459   PetscFunctionReturn(0);
2460 }
2461 
2462 #undef __FUNCT__
2463 #define __FUNCT__ "MatSOR_MPIBAIJ"
2464 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2465 {
2466   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2467   PetscErrorCode ierr;
2468   Vec            bb1 = 0;
2469 
2470   PetscFunctionBegin;
2471   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2472     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2473   }
2474 
2475   if (flag == SOR_APPLY_UPPER) {
2476     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2477     PetscFunctionReturn(0);
2478   }
2479 
2480   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2481     if (flag & SOR_ZERO_INITIAL_GUESS) {
2482       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2483       its--;
2484     }
2485 
2486     while (its--) {
2487       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2488       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2489 
2490       /* update rhs: bb1 = bb - B*x */
2491       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2492       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2493 
2494       /* local sweep */
2495       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2496     }
2497   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
2498     if (flag & SOR_ZERO_INITIAL_GUESS) {
2499       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2500       its--;
2501     }
2502     while (its--) {
2503       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2504       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2505 
2506       /* update rhs: bb1 = bb - B*x */
2507       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2508       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2509 
2510       /* local sweep */
2511       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2512     }
2513   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
2514     if (flag & SOR_ZERO_INITIAL_GUESS) {
2515       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2516       its--;
2517     }
2518     while (its--) {
2519       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2520       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2521 
2522       /* update rhs: bb1 = bb - B*x */
2523       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2524       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2525 
2526       /* local sweep */
2527       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2528     }
2529   } else SETERRQ(((PetscObject)matin)->comm,PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2530 
2531   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
2532   PetscFunctionReturn(0);
2533 }
2534 
2535 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2536 
2537 
2538 /* -------------------------------------------------------------------*/
2539 static struct _MatOps MatOps_Values = {
2540        MatSetValues_MPIBAIJ,
2541        MatGetRow_MPIBAIJ,
2542        MatRestoreRow_MPIBAIJ,
2543        MatMult_MPIBAIJ,
2544 /* 4*/ MatMultAdd_MPIBAIJ,
2545        MatMultTranspose_MPIBAIJ,
2546        MatMultTransposeAdd_MPIBAIJ,
2547        0,
2548        0,
2549        0,
2550 /*10*/ 0,
2551        0,
2552        0,
2553        MatSOR_MPIBAIJ,
2554        MatTranspose_MPIBAIJ,
2555 /*15*/ MatGetInfo_MPIBAIJ,
2556        MatEqual_MPIBAIJ,
2557        MatGetDiagonal_MPIBAIJ,
2558        MatDiagonalScale_MPIBAIJ,
2559        MatNorm_MPIBAIJ,
2560 /*20*/ MatAssemblyBegin_MPIBAIJ,
2561        MatAssemblyEnd_MPIBAIJ,
2562        MatSetOption_MPIBAIJ,
2563        MatZeroEntries_MPIBAIJ,
2564 /*24*/ MatZeroRows_MPIBAIJ,
2565        0,
2566        0,
2567        0,
2568        0,
2569 /*29*/ MatSetUpPreallocation_MPIBAIJ,
2570        0,
2571        0,
2572        0,
2573        0,
2574 /*34*/ MatDuplicate_MPIBAIJ,
2575        0,
2576        0,
2577        0,
2578        0,
2579 /*39*/ MatAXPY_MPIBAIJ,
2580        MatGetSubMatrices_MPIBAIJ,
2581        MatIncreaseOverlap_MPIBAIJ,
2582        MatGetValues_MPIBAIJ,
2583        MatCopy_MPIBAIJ,
2584 /*44*/ 0,
2585        MatScale_MPIBAIJ,
2586        0,
2587        0,
2588        0,
2589 /*49*/ MatSetBlockSize_MPIBAIJ,
2590        0,
2591        0,
2592        0,
2593        0,
2594 /*54*/ MatFDColoringCreate_MPIBAIJ,
2595        0,
2596        MatSetUnfactored_MPIBAIJ,
2597        MatPermute_MPIBAIJ,
2598        MatSetValuesBlocked_MPIBAIJ,
2599 /*59*/ MatGetSubMatrix_MPIBAIJ,
2600        MatDestroy_MPIBAIJ,
2601        MatView_MPIBAIJ,
2602        0,
2603        0,
2604 /*64*/ 0,
2605        0,
2606        0,
2607        0,
2608        0,
2609 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2610        0,
2611        0,
2612        0,
2613        0,
2614 /*74*/ 0,
2615        MatFDColoringApply_BAIJ,
2616        0,
2617        0,
2618        0,
2619 /*79*/ 0,
2620        0,
2621        0,
2622        0,
2623        0,
2624 /*84*/ 0,
2625        0,
2626        0,
2627        0,
2628        0,
2629 /*89*/ 0,
2630        0,
2631        0,
2632        0,
2633        0,
2634 /*94*/ 0,
2635        0,
2636        0,
2637        0,
2638        0,
2639 /*99*/ 0,
2640        0,
2641        0,
2642        0,
2643        0,
2644 /*104*/0,
2645        MatRealPart_MPIBAIJ,
2646        MatImaginaryPart_MPIBAIJ,
2647        0,
2648        0,
2649 /*109*/0,
2650        0,
2651        0,
2652        0,
2653        0,
2654 /*114*/MatGetSeqNonzerostructure_MPIBAIJ,
2655        0,
2656        MatGetGhosts_MPIBAIJ,
2657        0,
2658        0,
2659 /*119*/0,
2660        0,
2661        0,
2662        0,
2663        MatLoadnew_MPIBAIJ
2664 };
2665 
2666 EXTERN_C_BEGIN
2667 #undef __FUNCT__
2668 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2669 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
2670 {
2671   PetscFunctionBegin;
2672   *a      = ((Mat_MPIBAIJ *)A->data)->A;
2673   *iscopy = PETSC_FALSE;
2674   PetscFunctionReturn(0);
2675 }
2676 EXTERN_C_END
2677 
2678 EXTERN_C_BEGIN
2679 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2680 EXTERN_C_END
2681 
2682 EXTERN_C_BEGIN
2683 #undef __FUNCT__
2684 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2685 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2686 {
2687   PetscInt       m,rstart,cstart,cend;
2688   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2689   const PetscInt *JJ=0;
2690   PetscScalar    *values=0;
2691   PetscErrorCode ierr;
2692 
2693   PetscFunctionBegin;
2694 
2695   if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2696   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2697   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2698   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2699   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2700   m      = B->rmap->n/bs;
2701   rstart = B->rmap->rstart/bs;
2702   cstart = B->cmap->rstart/bs;
2703   cend   = B->cmap->rend/bs;
2704 
2705   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2706   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
2707   for (i=0; i<m; i++) {
2708     nz = ii[i+1] - ii[i];
2709     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2710     nz_max = PetscMax(nz_max,nz);
2711     JJ  = jj + ii[i];
2712     for (j=0; j<nz; j++) {
2713       if (*JJ >= cstart) break;
2714       JJ++;
2715     }
2716     d = 0;
2717     for (; j<nz; j++) {
2718       if (*JJ++ >= cend) break;
2719       d++;
2720     }
2721     d_nnz[i] = d;
2722     o_nnz[i] = nz - d;
2723   }
2724   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2725   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2726 
2727   values = (PetscScalar*)V;
2728   if (!values) {
2729     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2730     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2731   }
2732   for (i=0; i<m; i++) {
2733     PetscInt          row    = i + rstart;
2734     PetscInt          ncols  = ii[i+1] - ii[i];
2735     const PetscInt    *icols = jj + ii[i];
2736     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2737     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2738   }
2739 
2740   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2741   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2742   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2743 
2744   PetscFunctionReturn(0);
2745 }
2746 EXTERN_C_END
2747 
2748 #undef __FUNCT__
2749 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2750 /*@C
2751    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2752    (the default parallel PETSc format).
2753 
2754    Collective on MPI_Comm
2755 
2756    Input Parameters:
2757 +  A - the matrix
2758 .  i - the indices into j for the start of each local row (starts with zero)
2759 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2760 -  v - optional values in the matrix
2761 
2762    Level: developer
2763 
2764 .keywords: matrix, aij, compressed row, sparse, parallel
2765 
2766 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2767 @*/
2768 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2769 {
2770   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
2771 
2772   PetscFunctionBegin;
2773   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2774   if (f) {
2775     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
2776   }
2777   PetscFunctionReturn(0);
2778 }
2779 
2780 EXTERN_C_BEGIN
2781 #undef __FUNCT__
2782 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2783 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2784 {
2785   Mat_MPIBAIJ    *b;
2786   PetscErrorCode ierr;
2787   PetscInt       i, newbs = PetscAbs(bs);
2788 
2789   PetscFunctionBegin;
2790   if (bs < 0) {
2791     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr);
2792       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2793     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2794     bs   = PetscAbs(bs);
2795   }
2796   if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
2797   bs = newbs;
2798 
2799 
2800   if (bs < 1) SETERRQ(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2801   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2802   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2803   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2804   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2805 
2806   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2807   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2808   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2809   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2810 
2811   if (d_nnz) {
2812     for (i=0; i<B->rmap->n/bs; i++) {
2813       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2814     }
2815   }
2816   if (o_nnz) {
2817     for (i=0; i<B->rmap->n/bs; i++) {
2818       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2819     }
2820   }
2821 
2822   b = (Mat_MPIBAIJ*)B->data;
2823   b->bs2 = bs*bs;
2824   b->mbs = B->rmap->n/bs;
2825   b->nbs = B->cmap->n/bs;
2826   b->Mbs = B->rmap->N/bs;
2827   b->Nbs = B->cmap->N/bs;
2828 
2829   for (i=0; i<=b->size; i++) {
2830     b->rangebs[i] = B->rmap->range[i]/bs;
2831   }
2832   b->rstartbs = B->rmap->rstart/bs;
2833   b->rendbs   = B->rmap->rend/bs;
2834   b->cstartbs = B->cmap->rstart/bs;
2835   b->cendbs   = B->cmap->rend/bs;
2836 
2837   if (!B->preallocated) {
2838     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2839     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2840     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2841     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2842     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2843     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2844     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2845     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2846     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
2847   }
2848 
2849   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2850   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2851   B->preallocated = PETSC_TRUE;
2852   PetscFunctionReturn(0);
2853 }
2854 EXTERN_C_END
2855 
2856 EXTERN_C_BEGIN
2857 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2858 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2859 EXTERN_C_END
2860 
2861 
2862 EXTERN_C_BEGIN
2863 #undef __FUNCT__
2864 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
2865 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj)
2866 {
2867   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2868   PetscErrorCode ierr;
2869   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2870   PetscInt       M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2871   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2872 
2873   PetscFunctionBegin;
2874   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr);
2875   ii[0] = 0;
2876   CHKMEMQ;
2877   for (i=0; i<M; i++) {
2878     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2879     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
2880     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2881     /* remove one from count of matrix has diagonal */
2882     for (j=id[i]; j<id[i+1]; j++) {
2883       if (jd[j] == i) {ii[i+1]--;break;}
2884     }
2885   CHKMEMQ;
2886   }
2887   ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2888   cnt = 0;
2889   for (i=0; i<M; i++) {
2890     for (j=io[i]; j<io[i+1]; j++) {
2891       if (garray[jo[j]] > rstart) break;
2892       jj[cnt++] = garray[jo[j]];
2893   CHKMEMQ;
2894     }
2895     for (k=id[i]; k<id[i+1]; k++) {
2896       if (jd[k] != i) {
2897         jj[cnt++] = rstart + jd[k];
2898   CHKMEMQ;
2899       }
2900     }
2901     for (;j<io[i+1]; j++) {
2902       jj[cnt++] = garray[jo[j]];
2903   CHKMEMQ;
2904     }
2905   }
2906   ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr);
2907   PetscFunctionReturn(0);
2908 }
2909 EXTERN_C_END
2910 
2911 EXTERN_C_BEGIN
2912 #if defined(PETSC_HAVE_MUMPS)
2913 extern PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
2914 #endif
2915 EXTERN_C_END
2916 
2917 /*MC
2918    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2919 
2920    Options Database Keys:
2921 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2922 . -mat_block_size <bs> - set the blocksize used to store the matrix
2923 - -mat_use_hash_table <fact>
2924 
2925   Level: beginner
2926 
2927 .seealso: MatCreateMPIBAIJ
2928 M*/
2929 
2930 EXTERN_C_BEGIN
2931 #undef __FUNCT__
2932 #define __FUNCT__ "MatCreate_MPIBAIJ"
2933 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B)
2934 {
2935   Mat_MPIBAIJ    *b;
2936   PetscErrorCode ierr;
2937   PetscTruth     flg;
2938 
2939   PetscFunctionBegin;
2940   ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
2941   B->data = (void*)b;
2942 
2943 
2944   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2945   B->mapping    = 0;
2946   B->assembled  = PETSC_FALSE;
2947 
2948   B->insertmode = NOT_SET_VALUES;
2949   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
2950   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
2951 
2952   /* build local table of row and column ownerships */
2953   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
2954 
2955   /* build cache for off array entries formed */
2956   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
2957   b->donotstash  = PETSC_FALSE;
2958   b->colmap      = PETSC_NULL;
2959   b->garray      = PETSC_NULL;
2960   b->roworiented = PETSC_TRUE;
2961 
2962   /* stuff used in block assembly */
2963   b->barray       = 0;
2964 
2965   /* stuff used for matrix vector multiply */
2966   b->lvec         = 0;
2967   b->Mvctx        = 0;
2968 
2969   /* stuff for MatGetRow() */
2970   b->rowindices   = 0;
2971   b->rowvalues    = 0;
2972   b->getrowactive = PETSC_FALSE;
2973 
2974   /* hash table stuff */
2975   b->ht           = 0;
2976   b->hd           = 0;
2977   b->ht_size      = 0;
2978   b->ht_flag      = PETSC_FALSE;
2979   b->ht_fact      = 0;
2980   b->ht_total_ct  = 0;
2981   b->ht_insert_ct = 0;
2982 
2983   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
2984     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2985     if (flg) {
2986       PetscReal fact = 1.39;
2987       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2988       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
2989       if (fact <= 1.0) fact = 1.39;
2990       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2991       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
2992     }
2993   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2994 
2995 #if defined(PETSC_HAVE_MUMPS)
2996   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps",MatGetFactor_baij_mumps);CHKERRQ(ierr);
2997 #endif
2998   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",
2999                                      "MatConvert_MPIBAIJ_MPIAdj",
3000                                       MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3001   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3002                                      "MatStoreValues_MPIBAIJ",
3003                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3004   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3005                                      "MatRetrieveValues_MPIBAIJ",
3006                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3007   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3008                                      "MatGetDiagonalBlock_MPIBAIJ",
3009                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3010   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
3011                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
3012                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3013   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
3014 				     "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
3015 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3016   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3017                                      "MatDiagonalScaleLocal_MPIBAIJ",
3018                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3019   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
3020                                      "MatSetHashTableFactor_MPIBAIJ",
3021                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3022   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3023   PetscFunctionReturn(0);
3024 }
3025 EXTERN_C_END
3026 
3027 /*MC
3028    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3029 
3030    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3031    and MATMPIBAIJ otherwise.
3032 
3033    Options Database Keys:
3034 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3035 
3036   Level: beginner
3037 
3038 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3039 M*/
3040 
3041 EXTERN_C_BEGIN
3042 #undef __FUNCT__
3043 #define __FUNCT__ "MatCreate_BAIJ"
3044 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A)
3045 {
3046   PetscErrorCode ierr;
3047   PetscMPIInt    size;
3048 
3049   PetscFunctionBegin;
3050   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
3051   if (size == 1) {
3052     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
3053   } else {
3054     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
3055   }
3056   PetscFunctionReturn(0);
3057 }
3058 EXTERN_C_END
3059 
3060 #undef __FUNCT__
3061 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3062 /*@C
3063    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3064    (block compressed row).  For good matrix assembly performance
3065    the user should preallocate the matrix storage by setting the parameters
3066    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3067    performance can be increased by more than a factor of 50.
3068 
3069    Collective on Mat
3070 
3071    Input Parameters:
3072 +  A - the matrix
3073 .  bs   - size of blockk
3074 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3075            submatrix  (same for all local rows)
3076 .  d_nnz - array containing the number of block nonzeros in the various block rows
3077            of the in diagonal portion of the local (possibly different for each block
3078            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3079 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3080            submatrix (same for all local rows).
3081 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3082            off-diagonal portion of the local submatrix (possibly different for
3083            each block row) or PETSC_NULL.
3084 
3085    If the *_nnz parameter is given then the *_nz parameter is ignored
3086 
3087    Options Database Keys:
3088 +   -mat_block_size - size of the blocks to use
3089 -   -mat_use_hash_table <fact>
3090 
3091    Notes:
3092    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3093    than it must be used on all processors that share the object for that argument.
3094 
3095    Storage Information:
3096    For a square global matrix we define each processor's diagonal portion
3097    to be its local rows and the corresponding columns (a square submatrix);
3098    each processor's off-diagonal portion encompasses the remainder of the
3099    local matrix (a rectangular submatrix).
3100 
3101    The user can specify preallocated storage for the diagonal part of
3102    the local submatrix with either d_nz or d_nnz (not both).  Set
3103    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3104    memory allocation.  Likewise, specify preallocated storage for the
3105    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3106 
3107    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3108    the figure below we depict these three local rows and all columns (0-11).
3109 
3110 .vb
3111            0 1 2 3 4 5 6 7 8 9 10 11
3112           -------------------
3113    row 3  |  o o o d d d o o o o o o
3114    row 4  |  o o o d d d o o o o o o
3115    row 5  |  o o o d d d o o o o o o
3116           -------------------
3117 .ve
3118 
3119    Thus, any entries in the d locations are stored in the d (diagonal)
3120    submatrix, and any entries in the o locations are stored in the
3121    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3122    stored simply in the MATSEQBAIJ format for compressed row storage.
3123 
3124    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3125    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3126    In general, for PDE problems in which most nonzeros are near the diagonal,
3127    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3128    or you will get TERRIBLE performance; see the users' manual chapter on
3129    matrices.
3130 
3131    You can call MatGetInfo() to get information on how effective the preallocation was;
3132    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3133    You can also run with the option -info and look for messages with the string
3134    malloc in them to see if additional memory allocation was needed.
3135 
3136    Level: intermediate
3137 
3138 .keywords: matrix, block, aij, compressed row, sparse, parallel
3139 
3140 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
3141 @*/
3142 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3143 {
3144   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3145 
3146   PetscFunctionBegin;
3147   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3148   if (f) {
3149     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3150   }
3151   PetscFunctionReturn(0);
3152 }
3153 
3154 #undef __FUNCT__
3155 #define __FUNCT__ "MatCreateMPIBAIJ"
3156 /*@C
3157    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
3158    (block compressed row).  For good matrix assembly performance
3159    the user should preallocate the matrix storage by setting the parameters
3160    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3161    performance can be increased by more than a factor of 50.
3162 
3163    Collective on MPI_Comm
3164 
3165    Input Parameters:
3166 +  comm - MPI communicator
3167 .  bs   - size of blockk
3168 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3169            This value should be the same as the local size used in creating the
3170            y vector for the matrix-vector product y = Ax.
3171 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3172            This value should be the same as the local size used in creating the
3173            x vector for the matrix-vector product y = Ax.
3174 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3175 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3176 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3177            submatrix  (same for all local rows)
3178 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3179            of the in diagonal portion of the local (possibly different for each block
3180            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3181 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3182            submatrix (same for all local rows).
3183 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3184            off-diagonal portion of the local submatrix (possibly different for
3185            each block row) or PETSC_NULL.
3186 
3187    Output Parameter:
3188 .  A - the matrix
3189 
3190    Options Database Keys:
3191 +   -mat_block_size - size of the blocks to use
3192 -   -mat_use_hash_table <fact>
3193 
3194    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3195    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3196    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3197 
3198    Notes:
3199    If the *_nnz parameter is given then the *_nz parameter is ignored
3200 
3201    A nonzero block is any block that as 1 or more nonzeros in it
3202 
3203    The user MUST specify either the local or global matrix dimensions
3204    (possibly both).
3205 
3206    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3207    than it must be used on all processors that share the object for that argument.
3208 
3209    Storage Information:
3210    For a square global matrix we define each processor's diagonal portion
3211    to be its local rows and the corresponding columns (a square submatrix);
3212    each processor's off-diagonal portion encompasses the remainder of the
3213    local matrix (a rectangular submatrix).
3214 
3215    The user can specify preallocated storage for the diagonal part of
3216    the local submatrix with either d_nz or d_nnz (not both).  Set
3217    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3218    memory allocation.  Likewise, specify preallocated storage for the
3219    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3220 
3221    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3222    the figure below we depict these three local rows and all columns (0-11).
3223 
3224 .vb
3225            0 1 2 3 4 5 6 7 8 9 10 11
3226           -------------------
3227    row 3  |  o o o d d d o o o o o o
3228    row 4  |  o o o d d d o o o o o o
3229    row 5  |  o o o d d d o o o o o o
3230           -------------------
3231 .ve
3232 
3233    Thus, any entries in the d locations are stored in the d (diagonal)
3234    submatrix, and any entries in the o locations are stored in the
3235    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3236    stored simply in the MATSEQBAIJ format for compressed row storage.
3237 
3238    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3239    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3240    In general, for PDE problems in which most nonzeros are near the diagonal,
3241    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3242    or you will get TERRIBLE performance; see the users' manual chapter on
3243    matrices.
3244 
3245    Level: intermediate
3246 
3247 .keywords: matrix, block, aij, compressed row, sparse, parallel
3248 
3249 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3250 @*/
3251 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3252 {
3253   PetscErrorCode ierr;
3254   PetscMPIInt    size;
3255 
3256   PetscFunctionBegin;
3257   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3258   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3259   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3260   if (size > 1) {
3261     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3262     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3263   } else {
3264     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3265     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3266   }
3267   PetscFunctionReturn(0);
3268 }
3269 
3270 #undef __FUNCT__
3271 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3272 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3273 {
3274   Mat            mat;
3275   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3276   PetscErrorCode ierr;
3277   PetscInt       len=0;
3278 
3279   PetscFunctionBegin;
3280   *newmat       = 0;
3281   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3282   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3283   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3284   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3285 
3286   mat->factortype   = matin->factortype;
3287   mat->preallocated = PETSC_TRUE;
3288   mat->assembled    = PETSC_TRUE;
3289   mat->insertmode   = NOT_SET_VALUES;
3290 
3291   a      = (Mat_MPIBAIJ*)mat->data;
3292   mat->rmap->bs  = matin->rmap->bs;
3293   a->bs2   = oldmat->bs2;
3294   a->mbs   = oldmat->mbs;
3295   a->nbs   = oldmat->nbs;
3296   a->Mbs   = oldmat->Mbs;
3297   a->Nbs   = oldmat->Nbs;
3298 
3299   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3300   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3301 
3302   a->size         = oldmat->size;
3303   a->rank         = oldmat->rank;
3304   a->donotstash   = oldmat->donotstash;
3305   a->roworiented  = oldmat->roworiented;
3306   a->rowindices   = 0;
3307   a->rowvalues    = 0;
3308   a->getrowactive = PETSC_FALSE;
3309   a->barray       = 0;
3310   a->rstartbs     = oldmat->rstartbs;
3311   a->rendbs       = oldmat->rendbs;
3312   a->cstartbs     = oldmat->cstartbs;
3313   a->cendbs       = oldmat->cendbs;
3314 
3315   /* hash table stuff */
3316   a->ht           = 0;
3317   a->hd           = 0;
3318   a->ht_size      = 0;
3319   a->ht_flag      = oldmat->ht_flag;
3320   a->ht_fact      = oldmat->ht_fact;
3321   a->ht_total_ct  = 0;
3322   a->ht_insert_ct = 0;
3323 
3324   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3325   if (oldmat->colmap) {
3326 #if defined (PETSC_USE_CTABLE)
3327   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3328 #else
3329   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3330   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3331   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3332 #endif
3333   } else a->colmap = 0;
3334 
3335   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3336     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3337     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3338     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3339   } else a->garray = 0;
3340 
3341   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3342   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3343   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3344   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3345   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3346 
3347   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3348   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3349   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3350   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3351   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3352   *newmat = mat;
3353 
3354   PetscFunctionReturn(0);
3355 }
3356 
3357 #undef __FUNCT__
3358 #define __FUNCT__ "MatLoad_MPIBAIJ"
3359 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
3360 {
3361   Mat            A;
3362   PetscErrorCode ierr;
3363   int            fd;
3364   PetscInt       i,nz,j,rstart,rend;
3365   PetscScalar    *vals,*buf;
3366   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3367   MPI_Status     status;
3368   PetscMPIInt    rank,size,maxnz;
3369   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3370   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
3371   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3372   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
3373   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
3374   PetscInt       dcount,kmax,k,nzcount,tmp,mend;
3375 
3376   PetscFunctionBegin;
3377   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3378     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3379   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3380 
3381   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3382   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3383   if (!rank) {
3384     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3385     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3386     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3387   }
3388 
3389   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3390   M = header[1]; N = header[2];
3391 
3392   if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices");
3393 
3394   /*
3395      This code adds extra rows to make sure the number of rows is
3396      divisible by the blocksize
3397   */
3398   Mbs        = M/bs;
3399   extra_rows = bs - M + bs*Mbs;
3400   if (extra_rows == bs) extra_rows = 0;
3401   else                  Mbs++;
3402   if (extra_rows && !rank) {
3403     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3404   }
3405 
3406   /* determine ownership of all rows */
3407   mbs        = Mbs/size + ((Mbs % size) > rank);
3408   m          = mbs*bs;
3409   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
3410   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3411 
3412   /* process 0 needs enough room for process with most rows */
3413   if (!rank) {
3414     mmax = rowners[1];
3415     for (i=2; i<size; i++) {
3416       mmax = PetscMax(mmax,rowners[i]);
3417     }
3418     mmax*=bs;
3419   } else mmax = m;
3420 
3421   rowners[0] = 0;
3422   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
3423   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
3424   rstart = rowners[rank];
3425   rend   = rowners[rank+1];
3426 
3427   /* distribute row lengths to all processors */
3428   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
3429   if (!rank) {
3430     mend = m;
3431     if (size == 1) mend = mend - extra_rows;
3432     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3433     for (j=mend; j<m; j++) locrowlens[j] = 1;
3434     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3435     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3436     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3437     for (j=0; j<m; j++) {
3438       procsnz[0] += locrowlens[j];
3439     }
3440     for (i=1; i<size; i++) {
3441       mend = browners[i+1] - browners[i];
3442       if (i == size-1) mend = mend - extra_rows;
3443       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3444       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3445       /* calculate the number of nonzeros on each processor */
3446       for (j=0; j<browners[i+1]-browners[i]; j++) {
3447         procsnz[i] += rowlengths[j];
3448       }
3449       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3450     }
3451     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3452   } else {
3453     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3454   }
3455 
3456   if (!rank) {
3457     /* determine max buffer needed and allocate it */
3458     maxnz = procsnz[0];
3459     for (i=1; i<size; i++) {
3460       maxnz = PetscMax(maxnz,procsnz[i]);
3461     }
3462     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3463 
3464     /* read in my part of the matrix column indices  */
3465     nz     = procsnz[0];
3466     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3467     mycols = ibuf;
3468     if (size == 1)  nz -= extra_rows;
3469     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3470     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
3471 
3472     /* read in every ones (except the last) and ship off */
3473     for (i=1; i<size-1; i++) {
3474       nz   = procsnz[i];
3475       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3476       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3477     }
3478     /* read in the stuff for the last proc */
3479     if (size != 1) {
3480       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3481       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3482       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3483       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3484     }
3485     ierr = PetscFree(cols);CHKERRQ(ierr);
3486   } else {
3487     /* determine buffer space needed for message */
3488     nz = 0;
3489     for (i=0; i<m; i++) {
3490       nz += locrowlens[i];
3491     }
3492     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3493     mycols = ibuf;
3494     /* receive message of column indices*/
3495     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3496     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3497     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3498   }
3499 
3500   /* loop over local rows, determining number of off diagonal entries */
3501   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
3502   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
3503   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3504   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3505   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3506   rowcount = 0; nzcount = 0;
3507   for (i=0; i<mbs; i++) {
3508     dcount  = 0;
3509     odcount = 0;
3510     for (j=0; j<bs; j++) {
3511       kmax = locrowlens[rowcount];
3512       for (k=0; k<kmax; k++) {
3513         tmp = mycols[nzcount++]/bs;
3514         if (!mask[tmp]) {
3515           mask[tmp] = 1;
3516           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3517           else masked1[dcount++] = tmp;
3518         }
3519       }
3520       rowcount++;
3521     }
3522 
3523     dlens[i]  = dcount;
3524     odlens[i] = odcount;
3525 
3526     /* zero out the mask elements we set */
3527     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3528     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3529   }
3530 
3531   /* create our matrix */
3532   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
3533   ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3534   ierr = MatSetType(A,type);CHKERRQ(ierr);
3535   ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3536 
3537   if (!rank) {
3538     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3539     /* read in my part of the matrix numerical values  */
3540     nz = procsnz[0];
3541     vals = buf;
3542     mycols = ibuf;
3543     if (size == 1)  nz -= extra_rows;
3544     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3545     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
3546 
3547     /* insert into matrix */
3548     jj      = rstart*bs;
3549     for (i=0; i<m; i++) {
3550       ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3551       mycols += locrowlens[i];
3552       vals   += locrowlens[i];
3553       jj++;
3554     }
3555     /* read in other processors (except the last one) and ship out */
3556     for (i=1; i<size-1; i++) {
3557       nz   = procsnz[i];
3558       vals = buf;
3559       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3560       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
3561     }
3562     /* the last proc */
3563     if (size != 1){
3564       nz   = procsnz[i] - extra_rows;
3565       vals = buf;
3566       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3567       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3568       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
3569     }
3570     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3571   } else {
3572     /* receive numeric values */
3573     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3574 
3575     /* receive message of values*/
3576     vals   = buf;
3577     mycols = ibuf;
3578     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
3579     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
3580     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3581 
3582     /* insert into matrix */
3583     jj      = rstart*bs;
3584     for (i=0; i<m; i++) {
3585       ierr    = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3586       mycols += locrowlens[i];
3587       vals   += locrowlens[i];
3588       jj++;
3589     }
3590   }
3591   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3592   ierr = PetscFree(buf);CHKERRQ(ierr);
3593   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3594   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3595   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3596   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3597   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3598   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3599 
3600   *newmat = A;
3601   PetscFunctionReturn(0);
3602 }
3603 
3604 #undef __FUNCT__
3605 #define __FUNCT__ "MatLoadnew_MPIBAIJ"
3606 PetscErrorCode MatLoadnew_MPIBAIJ(PetscViewer viewer, Mat newmat)
3607 {
3608   PetscErrorCode ierr;
3609   int            fd;
3610   PetscInt       i,nz,j,rstart,rend;
3611   PetscScalar    *vals,*buf;
3612   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3613   MPI_Status     status;
3614   PetscMPIInt    rank,size,maxnz;
3615   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3616   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
3617   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3618   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
3619   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
3620   PetscInt       dcount,kmax,k,nzcount,tmp,mend,sizesset=1,grows,gcols;
3621 
3622   PetscFunctionBegin;
3623   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3624     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3625   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3626 
3627   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3628   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3629   if (!rank) {
3630     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3631     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3632     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3633   }
3634 
3635   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
3636 
3637   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3638   M = header[1]; N = header[2];
3639 
3640   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
3641   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
3642   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
3643 
3644   /* If global sizes are set, check if they are consistent with that given in the file */
3645   if (sizesset) {
3646     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
3647   }
3648   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Incostintent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
3649   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Incostintent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
3650 
3651   if (M != N) SETERRQ(((PetscObject)viewer)->comm,PETSC_ERR_SUP,"Can only do square matrices");
3652 
3653   /*
3654      This code adds extra rows to make sure the number of rows is
3655      divisible by the blocksize
3656   */
3657   Mbs        = M/bs;
3658   extra_rows = bs - M + bs*Mbs;
3659   if (extra_rows == bs) extra_rows = 0;
3660   else                  Mbs++;
3661   if (extra_rows && !rank) {
3662     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3663   }
3664 
3665   /* determine ownership of all rows */
3666   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
3667     mbs        = Mbs/size + ((Mbs % size) > rank);
3668     m          = mbs*bs;
3669   } else { /* User set */
3670     m          = newmat->rmap->n;
3671     mbs        = m/bs;
3672   }
3673   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
3674   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3675 
3676   /* process 0 needs enough room for process with most rows */
3677   if (!rank) {
3678     mmax = rowners[1];
3679     for (i=2; i<size; i++) {
3680       mmax = PetscMax(mmax,rowners[i]);
3681     }
3682     mmax*=bs;
3683   } else mmax = m;
3684 
3685   rowners[0] = 0;
3686   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
3687   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
3688   rstart = rowners[rank];
3689   rend   = rowners[rank+1];
3690 
3691   /* distribute row lengths to all processors */
3692   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
3693   if (!rank) {
3694     mend = m;
3695     if (size == 1) mend = mend - extra_rows;
3696     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3697     for (j=mend; j<m; j++) locrowlens[j] = 1;
3698     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3699     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3700     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3701     for (j=0; j<m; j++) {
3702       procsnz[0] += locrowlens[j];
3703     }
3704     for (i=1; i<size; i++) {
3705       mend = browners[i+1] - browners[i];
3706       if (i == size-1) mend = mend - extra_rows;
3707       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3708       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3709       /* calculate the number of nonzeros on each processor */
3710       for (j=0; j<browners[i+1]-browners[i]; j++) {
3711         procsnz[i] += rowlengths[j];
3712       }
3713       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3714     }
3715     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3716   } else {
3717     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3718   }
3719 
3720   if (!rank) {
3721     /* determine max buffer needed and allocate it */
3722     maxnz = procsnz[0];
3723     for (i=1; i<size; i++) {
3724       maxnz = PetscMax(maxnz,procsnz[i]);
3725     }
3726     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3727 
3728     /* read in my part of the matrix column indices  */
3729     nz     = procsnz[0];
3730     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3731     mycols = ibuf;
3732     if (size == 1)  nz -= extra_rows;
3733     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3734     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
3735 
3736     /* read in every ones (except the last) and ship off */
3737     for (i=1; i<size-1; i++) {
3738       nz   = procsnz[i];
3739       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3740       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3741     }
3742     /* read in the stuff for the last proc */
3743     if (size != 1) {
3744       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3745       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3746       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3747       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3748     }
3749     ierr = PetscFree(cols);CHKERRQ(ierr);
3750   } else {
3751     /* determine buffer space needed for message */
3752     nz = 0;
3753     for (i=0; i<m; i++) {
3754       nz += locrowlens[i];
3755     }
3756     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3757     mycols = ibuf;
3758     /* receive message of column indices*/
3759     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3760     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3761     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3762   }
3763 
3764   /* loop over local rows, determining number of off diagonal entries */
3765   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
3766   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
3767   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3768   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3769   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3770   rowcount = 0; nzcount = 0;
3771   for (i=0; i<mbs; i++) {
3772     dcount  = 0;
3773     odcount = 0;
3774     for (j=0; j<bs; j++) {
3775       kmax = locrowlens[rowcount];
3776       for (k=0; k<kmax; k++) {
3777         tmp = mycols[nzcount++]/bs;
3778         if (!mask[tmp]) {
3779           mask[tmp] = 1;
3780           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3781           else masked1[dcount++] = tmp;
3782         }
3783       }
3784       rowcount++;
3785     }
3786 
3787     dlens[i]  = dcount;
3788     odlens[i] = odcount;
3789 
3790     /* zero out the mask elements we set */
3791     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3792     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3793   }
3794 
3795 
3796   if (!sizesset) {
3797     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3798   }
3799   ierr = MatMPIBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3800 
3801   if (!rank) {
3802     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3803     /* read in my part of the matrix numerical values  */
3804     nz = procsnz[0];
3805     vals = buf;
3806     mycols = ibuf;
3807     if (size == 1)  nz -= extra_rows;
3808     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3809     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
3810 
3811     /* insert into matrix */
3812     jj      = rstart*bs;
3813     for (i=0; i<m; i++) {
3814       ierr = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3815       mycols += locrowlens[i];
3816       vals   += locrowlens[i];
3817       jj++;
3818     }
3819     /* read in other processors (except the last one) and ship out */
3820     for (i=1; i<size-1; i++) {
3821       nz   = procsnz[i];
3822       vals = buf;
3823       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3824       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3825     }
3826     /* the last proc */
3827     if (size != 1){
3828       nz   = procsnz[i] - extra_rows;
3829       vals = buf;
3830       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3831       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3832       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
3833     }
3834     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3835   } else {
3836     /* receive numeric values */
3837     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3838 
3839     /* receive message of values*/
3840     vals   = buf;
3841     mycols = ibuf;
3842     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
3843     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
3844     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3845 
3846     /* insert into matrix */
3847     jj      = rstart*bs;
3848     for (i=0; i<m; i++) {
3849       ierr    = MatSetValues_MPIBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3850       mycols += locrowlens[i];
3851       vals   += locrowlens[i];
3852       jj++;
3853     }
3854   }
3855   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3856   ierr = PetscFree(buf);CHKERRQ(ierr);
3857   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3858   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3859   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3860   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3861   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3862   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3863 
3864   PetscFunctionReturn(0);
3865 }
3866 
3867 #undef __FUNCT__
3868 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3869 /*@
3870    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3871 
3872    Input Parameters:
3873 .  mat  - the matrix
3874 .  fact - factor
3875 
3876    Collective on Mat
3877 
3878    Level: advanced
3879 
3880   Notes:
3881    This can also be set by the command line option: -mat_use_hash_table <fact>
3882 
3883 .keywords: matrix, hashtable, factor, HT
3884 
3885 .seealso: MatSetOption()
3886 @*/
3887 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3888 {
3889   PetscErrorCode ierr,(*f)(Mat,PetscReal);
3890 
3891   PetscFunctionBegin;
3892   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
3893   if (f) {
3894     ierr = (*f)(mat,fact);CHKERRQ(ierr);
3895   }
3896   PetscFunctionReturn(0);
3897 }
3898 
3899 EXTERN_C_BEGIN
3900 #undef __FUNCT__
3901 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3902 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3903 {
3904   Mat_MPIBAIJ *baij;
3905 
3906   PetscFunctionBegin;
3907   baij = (Mat_MPIBAIJ*)mat->data;
3908   baij->ht_fact = fact;
3909   PetscFunctionReturn(0);
3910 }
3911 EXTERN_C_END
3912 
3913 #undef __FUNCT__
3914 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3915 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3916 {
3917   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3918   PetscFunctionBegin;
3919   *Ad     = a->A;
3920   *Ao     = a->B;
3921   *colmap = a->garray;
3922   PetscFunctionReturn(0);
3923 }
3924 
3925 /*
3926     Special version for direct calls from Fortran (to eliminate two function call overheads
3927 */
3928 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3929 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3930 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3931 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3932 #endif
3933 
3934 #undef __FUNCT__
3935 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3936 /*@C
3937   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3938 
3939   Collective on Mat
3940 
3941   Input Parameters:
3942 + mat - the matrix
3943 . min - number of input rows
3944 . im - input rows
3945 . nin - number of input columns
3946 . in - input columns
3947 . v - numerical values input
3948 - addvin - INSERT_VALUES or ADD_VALUES
3949 
3950   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3951 
3952   Level: advanced
3953 
3954 .seealso:   MatSetValuesBlocked()
3955 @*/
3956 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3957 {
3958   /* convert input arguments to C version */
3959   Mat             mat = *matin;
3960   PetscInt        m = *min, n = *nin;
3961   InsertMode      addv = *addvin;
3962 
3963   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3964   const MatScalar *value;
3965   MatScalar       *barray=baij->barray;
3966   PetscTruth      roworiented = baij->roworiented;
3967   PetscErrorCode  ierr;
3968   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3969   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3970   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3971 
3972   PetscFunctionBegin;
3973   /* tasks normally handled by MatSetValuesBlocked() */
3974   if (mat->insertmode == NOT_SET_VALUES) {
3975     mat->insertmode = addv;
3976   }
3977 #if defined(PETSC_USE_DEBUG)
3978   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3979   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3980 #endif
3981   if (mat->assembled) {
3982     mat->was_assembled = PETSC_TRUE;
3983     mat->assembled     = PETSC_FALSE;
3984   }
3985   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3986 
3987 
3988   if(!barray) {
3989     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
3990     baij->barray = barray;
3991   }
3992 
3993   if (roworiented) {
3994     stepval = (n-1)*bs;
3995   } else {
3996     stepval = (m-1)*bs;
3997   }
3998   for (i=0; i<m; i++) {
3999     if (im[i] < 0) continue;
4000 #if defined(PETSC_USE_DEBUG)
4001     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
4002 #endif
4003     if (im[i] >= rstart && im[i] < rend) {
4004       row = im[i] - rstart;
4005       for (j=0; j<n; j++) {
4006         /* If NumCol = 1 then a copy is not required */
4007         if ((roworiented) && (n == 1)) {
4008           barray = (MatScalar*)v + i*bs2;
4009         } else if((!roworiented) && (m == 1)) {
4010           barray = (MatScalar*)v + j*bs2;
4011         } else { /* Here a copy is required */
4012           if (roworiented) {
4013             value = v + i*(stepval+bs)*bs + j*bs;
4014           } else {
4015             value = v + j*(stepval+bs)*bs + i*bs;
4016           }
4017           for (ii=0; ii<bs; ii++,value+=stepval) {
4018             for (jj=0; jj<bs; jj++) {
4019               *barray++  = *value++;
4020             }
4021           }
4022           barray -=bs2;
4023         }
4024 
4025         if (in[j] >= cstart && in[j] < cend){
4026           col  = in[j] - cstart;
4027           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4028         }
4029         else if (in[j] < 0) continue;
4030 #if defined(PETSC_USE_DEBUG)
4031         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
4032 #endif
4033         else {
4034           if (mat->was_assembled) {
4035             if (!baij->colmap) {
4036               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
4037             }
4038 
4039 #if defined(PETSC_USE_DEBUG)
4040 #if defined (PETSC_USE_CTABLE)
4041             { PetscInt data;
4042               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
4043               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4044             }
4045 #else
4046             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
4047 #endif
4048 #endif
4049 #if defined (PETSC_USE_CTABLE)
4050 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
4051             col  = (col - 1)/bs;
4052 #else
4053             col = (baij->colmap[in[j]] - 1)/bs;
4054 #endif
4055             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
4056               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
4057               col =  in[j];
4058             }
4059           }
4060           else col = in[j];
4061           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
4062         }
4063       }
4064     } else {
4065       if (!baij->donotstash) {
4066         if (roworiented) {
4067           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4068         } else {
4069           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
4070         }
4071       }
4072     }
4073   }
4074 
4075   /* task normally handled by MatSetValuesBlocked() */
4076   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
4077   PetscFunctionReturn(0);
4078 }
4079