xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 41c166b161c7dce5d06e24db58189774b43d9506)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4 
5 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
7 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
8 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
9 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
10 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
11 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
12 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
14 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar);
15 
16 #undef __FUNCT__
17 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
18 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
19 {
20   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
21   PetscErrorCode ierr;
22   PetscInt       i,*idxb = 0;
23   PetscScalar    *va,*vb;
24   Vec            vtmp;
25 
26   PetscFunctionBegin;
27   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
28   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
29   if (idx) {
30     for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;}
31   }
32 
33   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
34   if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
35   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
36   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
37 
38   for (i=0; i<A->rmap->n; i++){
39     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);}
40   }
41 
42   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
43   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
44   if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);}
45   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
46   PetscFunctionReturn(0);
47 }
48 
49 EXTERN_C_BEGIN
50 #undef __FUNCT__
51 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
52 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat)
53 {
54   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
55   PetscErrorCode ierr;
56 
57   PetscFunctionBegin;
58   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
59   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
60   PetscFunctionReturn(0);
61 }
62 EXTERN_C_END
63 
64 EXTERN_C_BEGIN
65 #undef __FUNCT__
66 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
67 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat)
68 {
69   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
70   PetscErrorCode ierr;
71 
72   PetscFunctionBegin;
73   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
74   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
75   PetscFunctionReturn(0);
76 }
77 EXTERN_C_END
78 
79 /*
80      Local utility routine that creates a mapping from the global column
81    number to the local number in the off-diagonal part of the local
82    storage of the matrix.  This is done in a non scable way since the
83    length of colmap equals the global matrix length.
84 */
85 #undef __FUNCT__
86 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
87 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
88 {
89   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
90   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
91   PetscErrorCode ierr;
92   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
93 
94   PetscFunctionBegin;
95 #if defined (PETSC_USE_CTABLE)
96   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
97   for (i=0; i<nbs; i++){
98     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
99   }
100 #else
101   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
102   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
103   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
104   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
105 #endif
106   PetscFunctionReturn(0);
107 }
108 
109 #define CHUNKSIZE  10
110 
111 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
112 { \
113  \
114     brow = row/bs;  \
115     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
116     rmax = aimax[brow]; nrow = ailen[brow]; \
117       bcol = col/bs; \
118       ridx = row % bs; cidx = col % bs; \
119       low = 0; high = nrow; \
120       while (high-low > 3) { \
121         t = (low+high)/2; \
122         if (rp[t] > bcol) high = t; \
123         else              low  = t; \
124       } \
125       for (_i=low; _i<high; _i++) { \
126         if (rp[_i] > bcol) break; \
127         if (rp[_i] == bcol) { \
128           bap  = ap +  bs2*_i + bs*cidx + ridx; \
129           if (addv == ADD_VALUES) *bap += value;  \
130           else                    *bap  = value;  \
131           goto a_noinsert; \
132         } \
133       } \
134       if (a->nonew == 1) goto a_noinsert; \
135       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
136       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
137       N = nrow++ - 1;  \
138       /* shift up all the later entries in this row */ \
139       for (ii=N; ii>=_i; ii--) { \
140         rp[ii+1] = rp[ii]; \
141         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
142       } \
143       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
144       rp[_i]                      = bcol;  \
145       ap[bs2*_i + bs*cidx + ridx] = value;  \
146       a_noinsert:; \
147     ailen[brow] = nrow; \
148 }
149 
150 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
151 { \
152     brow = row/bs;  \
153     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
154     rmax = bimax[brow]; nrow = bilen[brow]; \
155       bcol = col/bs; \
156       ridx = row % bs; cidx = col % bs; \
157       low = 0; high = nrow; \
158       while (high-low > 3) { \
159         t = (low+high)/2; \
160         if (rp[t] > bcol) high = t; \
161         else              low  = t; \
162       } \
163       for (_i=low; _i<high; _i++) { \
164         if (rp[_i] > bcol) break; \
165         if (rp[_i] == bcol) { \
166           bap  = ap +  bs2*_i + bs*cidx + ridx; \
167           if (addv == ADD_VALUES) *bap += value;  \
168           else                    *bap  = value;  \
169           goto b_noinsert; \
170         } \
171       } \
172       if (b->nonew == 1) goto b_noinsert; \
173       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
174       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
175       CHKMEMQ;\
176       N = nrow++ - 1;  \
177       /* shift up all the later entries in this row */ \
178       for (ii=N; ii>=_i; ii--) { \
179         rp[ii+1] = rp[ii]; \
180         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
181       } \
182       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
183       rp[_i]                      = bcol;  \
184       ap[bs2*_i + bs*cidx + ridx] = value;  \
185       b_noinsert:; \
186     bilen[brow] = nrow; \
187 }
188 
189 #undef __FUNCT__
190 #define __FUNCT__ "MatSetValues_MPIBAIJ"
191 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
192 {
193   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
194   MatScalar      value;
195   PetscTruth     roworiented = baij->roworiented;
196   PetscErrorCode ierr;
197   PetscInt       i,j,row,col;
198   PetscInt       rstart_orig=mat->rmap->rstart;
199   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
200   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
201 
202   /* Some Variables required in the macro */
203   Mat            A = baij->A;
204   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
205   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
206   MatScalar      *aa=a->a;
207 
208   Mat            B = baij->B;
209   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
210   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
211   MatScalar      *ba=b->a;
212 
213   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
214   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
215   MatScalar      *ap,*bap;
216 
217   PetscFunctionBegin;
218   for (i=0; i<m; i++) {
219     if (im[i] < 0) continue;
220 #if defined(PETSC_USE_DEBUG)
221     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
222 #endif
223     if (im[i] >= rstart_orig && im[i] < rend_orig) {
224       row = im[i] - rstart_orig;
225       for (j=0; j<n; j++) {
226         if (in[j] >= cstart_orig && in[j] < cend_orig){
227           col = in[j] - cstart_orig;
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
230           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
231         } else if (in[j] < 0) continue;
232 #if defined(PETSC_USE_DEBUG)
233         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap->N-1);}
234 #endif
235         else {
236           if (mat->was_assembled) {
237             if (!baij->colmap) {
238               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
239             }
240 #if defined (PETSC_USE_CTABLE)
241             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
242             col  = col - 1;
243 #else
244             col = baij->colmap[in[j]/bs] - 1;
245 #endif
246             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
247               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
248               col =  in[j];
249               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
250               B = baij->B;
251               b = (Mat_SeqBAIJ*)(B)->data;
252               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
253               ba=b->a;
254             } else col += in[j]%bs;
255           } else col = in[j];
256           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
257           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
258           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
259         }
260       }
261     } else {
262       if (!baij->donotstash) {
263         if (roworiented) {
264           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
265         } else {
266           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
267         }
268       }
269     }
270   }
271   PetscFunctionReturn(0);
272 }
273 
274 #undef __FUNCT__
275 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
276 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
277 {
278   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
279   const PetscScalar *value;
280   MatScalar         *barray=baij->barray;
281   PetscTruth        roworiented = baij->roworiented;
282   PetscErrorCode    ierr;
283   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
284   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
285   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
286 
287   PetscFunctionBegin;
288   if(!barray) {
289     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
290     baij->barray = barray;
291   }
292 
293   if (roworiented) {
294     stepval = (n-1)*bs;
295   } else {
296     stepval = (m-1)*bs;
297   }
298   for (i=0; i<m; i++) {
299     if (im[i] < 0) continue;
300 #if defined(PETSC_USE_DEBUG)
301     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
302 #endif
303     if (im[i] >= rstart && im[i] < rend) {
304       row = im[i] - rstart;
305       for (j=0; j<n; j++) {
306         /* If NumCol = 1 then a copy is not required */
307         if ((roworiented) && (n == 1)) {
308           barray = (MatScalar*)v + i*bs2;
309         } else if((!roworiented) && (m == 1)) {
310           barray = (MatScalar*)v + j*bs2;
311         } else { /* Here a copy is required */
312           if (roworiented) {
313             value = v + i*(stepval+bs)*bs + j*bs;
314           } else {
315             value = v + j*(stepval+bs)*bs + i*bs;
316           }
317           for (ii=0; ii<bs; ii++,value+=stepval) {
318             for (jj=0; jj<bs; jj++) {
319               *barray++  = *value++;
320             }
321           }
322           barray -=bs2;
323         }
324 
325         if (in[j] >= cstart && in[j] < cend){
326           col  = in[j] - cstart;
327           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
328         }
329         else if (in[j] < 0) continue;
330 #if defined(PETSC_USE_DEBUG)
331         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
332 #endif
333         else {
334           if (mat->was_assembled) {
335             if (!baij->colmap) {
336               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
337             }
338 
339 #if defined(PETSC_USE_DEBUG)
340 #if defined (PETSC_USE_CTABLE)
341             { PetscInt data;
342               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
343               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
344             }
345 #else
346             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
347 #endif
348 #endif
349 #if defined (PETSC_USE_CTABLE)
350 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
351             col  = (col - 1)/bs;
352 #else
353             col = (baij->colmap[in[j]] - 1)/bs;
354 #endif
355             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
356               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
357               col =  in[j];
358             }
359           }
360           else col = in[j];
361           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
362         }
363       }
364     } else {
365       if (!baij->donotstash) {
366         if (roworiented) {
367           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
368         } else {
369           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
370         }
371       }
372     }
373   }
374   PetscFunctionReturn(0);
375 }
376 
377 #define HASH_KEY 0.6180339887
378 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
379 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
380 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
381 #undef __FUNCT__
382 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
383 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
384 {
385   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
386   PetscTruth     roworiented = baij->roworiented;
387   PetscErrorCode ierr;
388   PetscInt       i,j,row,col;
389   PetscInt       rstart_orig=mat->rmap->rstart;
390   PetscInt       rend_orig=mat->rmap->rend,Nbs=baij->Nbs;
391   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
392   PetscReal      tmp;
393   MatScalar      **HD = baij->hd,value;
394 #if defined(PETSC_USE_DEBUG)
395   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
396 #endif
397 
398   PetscFunctionBegin;
399 
400   for (i=0; i<m; i++) {
401 #if defined(PETSC_USE_DEBUG)
402     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
403     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
404 #endif
405       row = im[i];
406     if (row >= rstart_orig && row < rend_orig) {
407       for (j=0; j<n; j++) {
408         col = in[j];
409         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
410         /* Look up PetscInto the Hash Table */
411         key = (row/bs)*Nbs+(col/bs)+1;
412         h1  = HASH(size,key,tmp);
413 
414 
415         idx = h1;
416 #if defined(PETSC_USE_DEBUG)
417         insert_ct++;
418         total_ct++;
419         if (HT[idx] != key) {
420           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
421           if (idx == size) {
422             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
423             if (idx == h1) {
424               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
425             }
426           }
427         }
428 #else
429         if (HT[idx] != key) {
430           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
431           if (idx == size) {
432             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
433             if (idx == h1) {
434               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
435             }
436           }
437         }
438 #endif
439         /* A HASH table entry is found, so insert the values at the correct address */
440         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
441         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
442       }
443     } else {
444       if (!baij->donotstash) {
445         if (roworiented) {
446           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
447         } else {
448           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
449         }
450       }
451     }
452   }
453 #if defined(PETSC_USE_DEBUG)
454   baij->ht_total_ct = total_ct;
455   baij->ht_insert_ct = insert_ct;
456 #endif
457   PetscFunctionReturn(0);
458 }
459 
460 #undef __FUNCT__
461 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
462 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
463 {
464   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
465   PetscTruth        roworiented = baij->roworiented;
466   PetscErrorCode    ierr;
467   PetscInt          i,j,ii,jj,row,col;
468   PetscInt          rstart=baij->rstartbs;
469   PetscInt          rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
470   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
471   PetscReal         tmp;
472   MatScalar         **HD = baij->hd,*baij_a;
473   const PetscScalar *v_t,*value;
474 #if defined(PETSC_USE_DEBUG)
475   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
476 #endif
477 
478   PetscFunctionBegin;
479 
480   if (roworiented) {
481     stepval = (n-1)*bs;
482   } else {
483     stepval = (m-1)*bs;
484   }
485   for (i=0; i<m; i++) {
486 #if defined(PETSC_USE_DEBUG)
487     if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
488     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
489 #endif
490     row   = im[i];
491     v_t   = v + i*nbs2;
492     if (row >= rstart && row < rend) {
493       for (j=0; j<n; j++) {
494         col = in[j];
495 
496         /* Look up into the Hash Table */
497         key = row*Nbs+col+1;
498         h1  = HASH(size,key,tmp);
499 
500         idx = h1;
501 #if defined(PETSC_USE_DEBUG)
502         total_ct++;
503         insert_ct++;
504        if (HT[idx] != key) {
505           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
506           if (idx == size) {
507             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
508             if (idx == h1) {
509               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
510             }
511           }
512         }
513 #else
514         if (HT[idx] != key) {
515           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
516           if (idx == size) {
517             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
518             if (idx == h1) {
519               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
520             }
521           }
522         }
523 #endif
524         baij_a = HD[idx];
525         if (roworiented) {
526           /*value = v + i*(stepval+bs)*bs + j*bs;*/
527           /* value = v + (i*(stepval+bs)+j)*bs; */
528           value = v_t;
529           v_t  += bs;
530           if (addv == ADD_VALUES) {
531             for (ii=0; ii<bs; ii++,value+=stepval) {
532               for (jj=ii; jj<bs2; jj+=bs) {
533                 baij_a[jj]  += *value++;
534               }
535             }
536           } else {
537             for (ii=0; ii<bs; ii++,value+=stepval) {
538               for (jj=ii; jj<bs2; jj+=bs) {
539                 baij_a[jj]  = *value++;
540               }
541             }
542           }
543         } else {
544           value = v + j*(stepval+bs)*bs + i*bs;
545           if (addv == ADD_VALUES) {
546             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
547               for (jj=0; jj<bs; jj++) {
548                 baij_a[jj]  += *value++;
549               }
550             }
551           } else {
552             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
553               for (jj=0; jj<bs; jj++) {
554                 baij_a[jj]  = *value++;
555               }
556             }
557           }
558         }
559       }
560     } else {
561       if (!baij->donotstash) {
562         if (roworiented) {
563           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
564         } else {
565           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
566         }
567       }
568     }
569   }
570 #if defined(PETSC_USE_DEBUG)
571   baij->ht_total_ct = total_ct;
572   baij->ht_insert_ct = insert_ct;
573 #endif
574   PetscFunctionReturn(0);
575 }
576 
577 #undef __FUNCT__
578 #define __FUNCT__ "MatGetValues_MPIBAIJ"
579 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
580 {
581   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
582   PetscErrorCode ierr;
583   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
584   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
585 
586   PetscFunctionBegin;
587   for (i=0; i<m; i++) {
588     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
589     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
590     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
591       row = idxm[i] - bsrstart;
592       for (j=0; j<n; j++) {
593         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
594         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
595         if (idxn[j] >= bscstart && idxn[j] < bscend){
596           col = idxn[j] - bscstart;
597           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
598         } else {
599           if (!baij->colmap) {
600             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
601           }
602 #if defined (PETSC_USE_CTABLE)
603           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
604           data --;
605 #else
606           data = baij->colmap[idxn[j]/bs]-1;
607 #endif
608           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
609           else {
610             col  = data + idxn[j]%bs;
611             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
612           }
613         }
614       }
615     } else {
616       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
617     }
618   }
619  PetscFunctionReturn(0);
620 }
621 
622 #undef __FUNCT__
623 #define __FUNCT__ "MatNorm_MPIBAIJ"
624 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
625 {
626   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
627   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
628   PetscErrorCode ierr;
629   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
630   PetscReal      sum = 0.0;
631   MatScalar      *v;
632 
633   PetscFunctionBegin;
634   if (baij->size == 1) {
635     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
636   } else {
637     if (type == NORM_FROBENIUS) {
638       v = amat->a;
639       nz = amat->nz*bs2;
640       for (i=0; i<nz; i++) {
641 #if defined(PETSC_USE_COMPLEX)
642         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
643 #else
644         sum += (*v)*(*v); v++;
645 #endif
646       }
647       v = bmat->a;
648       nz = bmat->nz*bs2;
649       for (i=0; i<nz; i++) {
650 #if defined(PETSC_USE_COMPLEX)
651         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
652 #else
653         sum += (*v)*(*v); v++;
654 #endif
655       }
656       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
657       *nrm = sqrt(*nrm);
658     } else if (type == NORM_1) { /* max column sum */
659       PetscReal *tmp,*tmp2;
660       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
661       ierr = PetscMalloc((2*mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
662       tmp2 = tmp + mat->cmap->N;
663       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
664       v = amat->a; jj = amat->j;
665       for (i=0; i<amat->nz; i++) {
666         for (j=0; j<bs; j++){
667           col = bs*(cstart + *jj) + j; /* column index */
668           for (row=0; row<bs; row++){
669             tmp[col] += PetscAbsScalar(*v);  v++;
670           }
671         }
672         jj++;
673       }
674       v = bmat->a; jj = bmat->j;
675       for (i=0; i<bmat->nz; i++) {
676         for (j=0; j<bs; j++){
677           col = bs*garray[*jj] + j;
678           for (row=0; row<bs; row++){
679             tmp[col] += PetscAbsScalar(*v); v++;
680           }
681         }
682         jj++;
683       }
684       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
685       *nrm = 0.0;
686       for (j=0; j<mat->cmap->N; j++) {
687         if (tmp2[j] > *nrm) *nrm = tmp2[j];
688       }
689       ierr = PetscFree(tmp);CHKERRQ(ierr);
690     } else if (type == NORM_INFINITY) { /* max row sum */
691       PetscReal *sums;
692       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr)
693       sum = 0.0;
694       for (j=0; j<amat->mbs; j++) {
695         for (row=0; row<bs; row++) sums[row] = 0.0;
696         v = amat->a + bs2*amat->i[j];
697         nz = amat->i[j+1]-amat->i[j];
698         for (i=0; i<nz; i++) {
699           for (col=0; col<bs; col++){
700             for (row=0; row<bs; row++){
701               sums[row] += PetscAbsScalar(*v); v++;
702             }
703           }
704         }
705         v = bmat->a + bs2*bmat->i[j];
706         nz = bmat->i[j+1]-bmat->i[j];
707         for (i=0; i<nz; i++) {
708           for (col=0; col<bs; col++){
709             for (row=0; row<bs; row++){
710               sums[row] += PetscAbsScalar(*v); v++;
711             }
712           }
713         }
714         for (row=0; row<bs; row++){
715           if (sums[row] > sum) sum = sums[row];
716         }
717       }
718       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
719       ierr = PetscFree(sums);CHKERRQ(ierr);
720     } else {
721       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
722     }
723   }
724   PetscFunctionReturn(0);
725 }
726 
727 /*
728   Creates the hash table, and sets the table
729   This table is created only once.
730   If new entried need to be added to the matrix
731   then the hash table has to be destroyed and
732   recreated.
733 */
734 #undef __FUNCT__
735 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
736 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
737 {
738   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
739   Mat            A = baij->A,B=baij->B;
740   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
741   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
742   PetscErrorCode ierr;
743   PetscInt       size,bs2=baij->bs2,rstart=baij->rstartbs;
744   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
745   PetscInt       *HT,key;
746   MatScalar      **HD;
747   PetscReal      tmp;
748 #if defined(PETSC_USE_INFO)
749   PetscInt       ct=0,max=0;
750 #endif
751 
752   PetscFunctionBegin;
753   baij->ht_size=(PetscInt)(factor*nz);
754   size = baij->ht_size;
755 
756   if (baij->ht) {
757     PetscFunctionReturn(0);
758   }
759 
760   /* Allocate Memory for Hash Table */
761   ierr     = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr);
762   baij->ht = (PetscInt*)(baij->hd + size);
763   HD       = baij->hd;
764   HT       = baij->ht;
765 
766 
767   ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr);
768 
769 
770   /* Loop Over A */
771   for (i=0; i<a->mbs; i++) {
772     for (j=ai[i]; j<ai[i+1]; j++) {
773       row = i+rstart;
774       col = aj[j]+cstart;
775 
776       key = row*Nbs + col + 1;
777       h1  = HASH(size,key,tmp);
778       for (k=0; k<size; k++){
779         if (!HT[(h1+k)%size]) {
780           HT[(h1+k)%size] = key;
781           HD[(h1+k)%size] = a->a + j*bs2;
782           break;
783 #if defined(PETSC_USE_INFO)
784         } else {
785           ct++;
786 #endif
787         }
788       }
789 #if defined(PETSC_USE_INFO)
790       if (k> max) max = k;
791 #endif
792     }
793   }
794   /* Loop Over B */
795   for (i=0; i<b->mbs; i++) {
796     for (j=bi[i]; j<bi[i+1]; j++) {
797       row = i+rstart;
798       col = garray[bj[j]];
799       key = row*Nbs + col + 1;
800       h1  = HASH(size,key,tmp);
801       for (k=0; k<size; k++){
802         if (!HT[(h1+k)%size]) {
803           HT[(h1+k)%size] = key;
804           HD[(h1+k)%size] = b->a + j*bs2;
805           break;
806 #if defined(PETSC_USE_INFO)
807         } else {
808           ct++;
809 #endif
810         }
811       }
812 #if defined(PETSC_USE_INFO)
813       if (k> max) max = k;
814 #endif
815     }
816   }
817 
818   /* Print Summary */
819 #if defined(PETSC_USE_INFO)
820   for (i=0,j=0; i<size; i++) {
821     if (HT[i]) {j++;}
822   }
823   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
824 #endif
825   PetscFunctionReturn(0);
826 }
827 
828 #undef __FUNCT__
829 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
830 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
831 {
832   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
833   PetscErrorCode ierr;
834   PetscInt       nstash,reallocs;
835   InsertMode     addv;
836 
837   PetscFunctionBegin;
838   if (baij->donotstash) {
839     PetscFunctionReturn(0);
840   }
841 
842   /* make sure all processors are either in INSERTMODE or ADDMODE */
843   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
844   if (addv == (ADD_VALUES|INSERT_VALUES)) {
845     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
846   }
847   mat->insertmode = addv; /* in case this processor had no cache */
848 
849   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
850   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
851   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
852   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
853   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
854   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
855   PetscFunctionReturn(0);
856 }
857 
858 #undef __FUNCT__
859 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
860 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
861 {
862   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
863   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
864   PetscErrorCode ierr;
865   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
866   PetscInt       *row,*col;
867   PetscTruth     r1,r2,r3,other_disassembled;
868   MatScalar      *val;
869   InsertMode     addv = mat->insertmode;
870   PetscMPIInt    n;
871 
872   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
873   PetscFunctionBegin;
874   if (!baij->donotstash) {
875     while (1) {
876       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
877       if (!flg) break;
878 
879       for (i=0; i<n;) {
880         /* Now identify the consecutive vals belonging to the same row */
881         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
882         if (j < n) ncols = j-i;
883         else       ncols = n-i;
884         /* Now assemble all these values with a single function call */
885         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
886         i = j;
887       }
888     }
889     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
890     /* Now process the block-stash. Since the values are stashed column-oriented,
891        set the roworiented flag to column oriented, and after MatSetValues()
892        restore the original flags */
893     r1 = baij->roworiented;
894     r2 = a->roworiented;
895     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
896     baij->roworiented = PETSC_FALSE;
897     a->roworiented    = PETSC_FALSE;
898     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
899     while (1) {
900       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
901       if (!flg) break;
902 
903       for (i=0; i<n;) {
904         /* Now identify the consecutive vals belonging to the same row */
905         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
906         if (j < n) ncols = j-i;
907         else       ncols = n-i;
908         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
909         i = j;
910       }
911     }
912     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
913     baij->roworiented = r1;
914     a->roworiented    = r2;
915     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
916   }
917 
918   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
919   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
920 
921   /* determine if any processor has disassembled, if so we must
922      also disassemble ourselfs, in order that we may reassemble. */
923   /*
924      if nonzero structure of submatrix B cannot change then we know that
925      no processor disassembled thus we can skip this stuff
926   */
927   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
928     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
929     if (mat->was_assembled && !other_disassembled) {
930       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
931     }
932   }
933 
934   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
935     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
936   }
937   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
938   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
939   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
940 
941 #if defined(PETSC_USE_INFO)
942   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
943     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
944     baij->ht_total_ct  = 0;
945     baij->ht_insert_ct = 0;
946   }
947 #endif
948   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
949     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
950     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
951     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
952   }
953 
954   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
955   baij->rowvalues = 0;
956   PetscFunctionReturn(0);
957 }
958 
959 #undef __FUNCT__
960 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
961 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
962 {
963   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
964   PetscErrorCode    ierr;
965   PetscMPIInt       size = baij->size,rank = baij->rank;
966   PetscInt          bs = mat->rmap->bs;
967   PetscTruth        iascii,isdraw;
968   PetscViewer       sviewer;
969   PetscViewerFormat format;
970 
971   PetscFunctionBegin;
972   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
973   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
974   if (iascii) {
975     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
976     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
977       MatInfo info;
978       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
979       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
980       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
981               rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
982               mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
983       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
984       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
985       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
986       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
987       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
988       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
989       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
990       PetscFunctionReturn(0);
991     } else if (format == PETSC_VIEWER_ASCII_INFO) {
992       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
993       PetscFunctionReturn(0);
994     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
995       PetscFunctionReturn(0);
996     }
997   }
998 
999   if (isdraw) {
1000     PetscDraw       draw;
1001     PetscTruth isnull;
1002     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1003     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1004   }
1005 
1006   if (size == 1) {
1007     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1008     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1009   } else {
1010     /* assemble the entire matrix onto first processor. */
1011     Mat         A;
1012     Mat_SeqBAIJ *Aloc;
1013     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1014     MatScalar   *a;
1015 
1016     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1017     /* Perhaps this should be the type of mat? */
1018     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1019     if (!rank) {
1020       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1021     } else {
1022       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1023     }
1024     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1025     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1026     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1027 
1028     /* copy over the A part */
1029     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1030     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1031     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1032 
1033     for (i=0; i<mbs; i++) {
1034       rvals[0] = bs*(baij->rstartbs + i);
1035       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1036       for (j=ai[i]; j<ai[i+1]; j++) {
1037         col = (baij->cstartbs+aj[j])*bs;
1038         for (k=0; k<bs; k++) {
1039           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1040           col++; a += bs;
1041         }
1042       }
1043     }
1044     /* copy over the B part */
1045     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1046     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1047     for (i=0; i<mbs; i++) {
1048       rvals[0] = bs*(baij->rstartbs + i);
1049       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1050       for (j=ai[i]; j<ai[i+1]; j++) {
1051         col = baij->garray[aj[j]]*bs;
1052         for (k=0; k<bs; k++) {
1053           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1054           col++; a += bs;
1055         }
1056       }
1057     }
1058     ierr = PetscFree(rvals);CHKERRQ(ierr);
1059     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1060     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1061     /*
1062        Everyone has to call to draw the matrix since the graphics waits are
1063        synchronized across all processors that share the PetscDraw object
1064     */
1065     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1066     if (!rank) {
1067       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1068       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1069     }
1070     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1071     ierr = MatDestroy(A);CHKERRQ(ierr);
1072   }
1073   PetscFunctionReturn(0);
1074 }
1075 
1076 #undef __FUNCT__
1077 #define __FUNCT__ "MatView_MPIBAIJ"
1078 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1079 {
1080   PetscErrorCode ierr;
1081   PetscTruth     iascii,isdraw,issocket,isbinary;
1082 
1083   PetscFunctionBegin;
1084   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1085   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1086   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1087   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1088   if (iascii || isdraw || issocket || isbinary) {
1089     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1090   } else {
1091     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1092   }
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 #undef __FUNCT__
1097 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1098 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1099 {
1100   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1101   PetscErrorCode ierr;
1102 
1103   PetscFunctionBegin;
1104 #if defined(PETSC_USE_LOG)
1105   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1106 #endif
1107   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1108   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1109   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1110   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1111 #if defined (PETSC_USE_CTABLE)
1112   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
1113 #else
1114   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1115 #endif
1116   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1117   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1118   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1119   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1120   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1121   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
1122   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1123   ierr = PetscFree(baij);CHKERRQ(ierr);
1124 
1125   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1126   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1127   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1128   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1129   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1130   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1131   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1132   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1133   PetscFunctionReturn(0);
1134 }
1135 
1136 #undef __FUNCT__
1137 #define __FUNCT__ "MatMult_MPIBAIJ"
1138 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1139 {
1140   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1141   PetscErrorCode ierr;
1142   PetscInt       nt;
1143 
1144   PetscFunctionBegin;
1145   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1146   if (nt != A->cmap->n) {
1147     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1148   }
1149   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1150   if (nt != A->rmap->n) {
1151     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1152   }
1153   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1154   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1155   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1156   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1157   PetscFunctionReturn(0);
1158 }
1159 
1160 #undef __FUNCT__
1161 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1162 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1163 {
1164   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1165   PetscErrorCode ierr;
1166 
1167   PetscFunctionBegin;
1168   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1169   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1170   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1171   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1172   PetscFunctionReturn(0);
1173 }
1174 
1175 #undef __FUNCT__
1176 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1177 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1178 {
1179   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1180   PetscErrorCode ierr;
1181   PetscTruth     merged;
1182 
1183   PetscFunctionBegin;
1184   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1185   /* do nondiagonal part */
1186   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1187   if (!merged) {
1188     /* send it on its way */
1189     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1190     /* do local part */
1191     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1192     /* receive remote parts: note this assumes the values are not actually */
1193     /* inserted in yy until the next line */
1194     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1195   } else {
1196     /* do local part */
1197     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1198     /* send it on its way */
1199     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1200     /* values actually were received in the Begin() but we need to call this nop */
1201     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1202   }
1203   PetscFunctionReturn(0);
1204 }
1205 
1206 #undef __FUNCT__
1207 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1208 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1209 {
1210   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1211   PetscErrorCode ierr;
1212 
1213   PetscFunctionBegin;
1214   /* do nondiagonal part */
1215   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1216   /* send it on its way */
1217   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1218   /* do local part */
1219   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1220   /* receive remote parts: note this assumes the values are not actually */
1221   /* inserted in yy until the next line, which is true for my implementation*/
1222   /* but is not perhaps always true. */
1223   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1224   PetscFunctionReturn(0);
1225 }
1226 
1227 /*
1228   This only works correctly for square matrices where the subblock A->A is the
1229    diagonal block
1230 */
1231 #undef __FUNCT__
1232 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1233 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1234 {
1235   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1236   PetscErrorCode ierr;
1237 
1238   PetscFunctionBegin;
1239   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1240   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1241   PetscFunctionReturn(0);
1242 }
1243 
1244 #undef __FUNCT__
1245 #define __FUNCT__ "MatScale_MPIBAIJ"
1246 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1247 {
1248   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1249   PetscErrorCode ierr;
1250 
1251   PetscFunctionBegin;
1252   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1253   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1254   PetscFunctionReturn(0);
1255 }
1256 
1257 #undef __FUNCT__
1258 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1259 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1260 {
1261   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1262   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1263   PetscErrorCode ierr;
1264   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1265   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1266   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1267 
1268   PetscFunctionBegin;
1269   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1270   mat->getrowactive = PETSC_TRUE;
1271 
1272   if (!mat->rowvalues && (idx || v)) {
1273     /*
1274         allocate enough space to hold information from the longest row.
1275     */
1276     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1277     PetscInt     max = 1,mbs = mat->mbs,tmp;
1278     for (i=0; i<mbs; i++) {
1279       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1280       if (max < tmp) { max = tmp; }
1281     }
1282     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1283     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1284   }
1285 
1286   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1287   lrow = row - brstart;
1288 
1289   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1290   if (!v)   {pvA = 0; pvB = 0;}
1291   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1292   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1293   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1294   nztot = nzA + nzB;
1295 
1296   cmap  = mat->garray;
1297   if (v  || idx) {
1298     if (nztot) {
1299       /* Sort by increasing column numbers, assuming A and B already sorted */
1300       PetscInt imark = -1;
1301       if (v) {
1302         *v = v_p = mat->rowvalues;
1303         for (i=0; i<nzB; i++) {
1304           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1305           else break;
1306         }
1307         imark = i;
1308         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1309         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1310       }
1311       if (idx) {
1312         *idx = idx_p = mat->rowindices;
1313         if (imark > -1) {
1314           for (i=0; i<imark; i++) {
1315             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1316           }
1317         } else {
1318           for (i=0; i<nzB; i++) {
1319             if (cmap[cworkB[i]/bs] < cstart)
1320               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1321             else break;
1322           }
1323           imark = i;
1324         }
1325         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1326         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1327       }
1328     } else {
1329       if (idx) *idx = 0;
1330       if (v)   *v   = 0;
1331     }
1332   }
1333   *nz = nztot;
1334   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1335   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1336   PetscFunctionReturn(0);
1337 }
1338 
1339 #undef __FUNCT__
1340 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1341 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1342 {
1343   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1344 
1345   PetscFunctionBegin;
1346   if (!baij->getrowactive) {
1347     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1348   }
1349   baij->getrowactive = PETSC_FALSE;
1350   PetscFunctionReturn(0);
1351 }
1352 
1353 #undef __FUNCT__
1354 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1355 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1356 {
1357   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1358   PetscErrorCode ierr;
1359 
1360   PetscFunctionBegin;
1361   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1362   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1363   PetscFunctionReturn(0);
1364 }
1365 
1366 #undef __FUNCT__
1367 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1368 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1369 {
1370   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1371   Mat            A = a->A,B = a->B;
1372   PetscErrorCode ierr;
1373   PetscReal      isend[5],irecv[5];
1374 
1375   PetscFunctionBegin;
1376   info->block_size     = (PetscReal)matin->rmap->bs;
1377   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1378   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1379   isend[3] = info->memory;  isend[4] = info->mallocs;
1380   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1381   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1382   isend[3] += info->memory;  isend[4] += info->mallocs;
1383   if (flag == MAT_LOCAL) {
1384     info->nz_used      = isend[0];
1385     info->nz_allocated = isend[1];
1386     info->nz_unneeded  = isend[2];
1387     info->memory       = isend[3];
1388     info->mallocs      = isend[4];
1389   } else if (flag == MAT_GLOBAL_MAX) {
1390     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1391     info->nz_used      = irecv[0];
1392     info->nz_allocated = irecv[1];
1393     info->nz_unneeded  = irecv[2];
1394     info->memory       = irecv[3];
1395     info->mallocs      = irecv[4];
1396   } else if (flag == MAT_GLOBAL_SUM) {
1397     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1398     info->nz_used      = irecv[0];
1399     info->nz_allocated = irecv[1];
1400     info->nz_unneeded  = irecv[2];
1401     info->memory       = irecv[3];
1402     info->mallocs      = irecv[4];
1403   } else {
1404     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1405   }
1406   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1407   info->fill_ratio_needed = 0;
1408   info->factor_mallocs    = 0;
1409   PetscFunctionReturn(0);
1410 }
1411 
1412 #undef __FUNCT__
1413 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1414 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscTruth flg)
1415 {
1416   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1417   PetscErrorCode ierr;
1418 
1419   PetscFunctionBegin;
1420   switch (op) {
1421   case MAT_NEW_NONZERO_LOCATIONS:
1422   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1423   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1424   case MAT_KEEP_NONZERO_PATTERN:
1425   case MAT_NEW_NONZERO_LOCATION_ERR:
1426     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1427     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1428     break;
1429   case MAT_ROW_ORIENTED:
1430     a->roworiented = flg;
1431     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1432     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1433     break;
1434   case MAT_NEW_DIAGONALS:
1435     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1436     break;
1437   case MAT_IGNORE_OFF_PROC_ENTRIES:
1438     a->donotstash = flg;
1439     break;
1440   case MAT_USE_HASH_TABLE:
1441     a->ht_flag = flg;
1442     break;
1443   case MAT_SYMMETRIC:
1444   case MAT_STRUCTURALLY_SYMMETRIC:
1445   case MAT_HERMITIAN:
1446   case MAT_SYMMETRY_ETERNAL:
1447     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1448     break;
1449   default:
1450     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1451   }
1452   PetscFunctionReturn(0);
1453 }
1454 
1455 #undef __FUNCT__
1456 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1457 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1458 {
1459   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1460   Mat_SeqBAIJ    *Aloc;
1461   Mat            B;
1462   PetscErrorCode ierr;
1463   PetscInt       M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1464   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1465   MatScalar      *a;
1466 
1467   PetscFunctionBegin;
1468   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1469   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1470     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1471     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1472     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1473     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1474   } else {
1475     B = *matout;
1476   }
1477 
1478   /* copy over the A part */
1479   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1480   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1481   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1482 
1483   for (i=0; i<mbs; i++) {
1484     rvals[0] = bs*(baij->rstartbs + i);
1485     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1486     for (j=ai[i]; j<ai[i+1]; j++) {
1487       col = (baij->cstartbs+aj[j])*bs;
1488       for (k=0; k<bs; k++) {
1489         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1490         col++; a += bs;
1491       }
1492     }
1493   }
1494   /* copy over the B part */
1495   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1496   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1497   for (i=0; i<mbs; i++) {
1498     rvals[0] = bs*(baij->rstartbs + i);
1499     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1500     for (j=ai[i]; j<ai[i+1]; j++) {
1501       col = baij->garray[aj[j]]*bs;
1502       for (k=0; k<bs; k++) {
1503         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1504         col++; a += bs;
1505       }
1506     }
1507   }
1508   ierr = PetscFree(rvals);CHKERRQ(ierr);
1509   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1510   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1511 
1512   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1513     *matout = B;
1514   } else {
1515     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1516   }
1517   PetscFunctionReturn(0);
1518 }
1519 
1520 #undef __FUNCT__
1521 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1522 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1523 {
1524   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1525   Mat            a = baij->A,b = baij->B;
1526   PetscErrorCode ierr;
1527   PetscInt       s1,s2,s3;
1528 
1529   PetscFunctionBegin;
1530   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1531   if (rr) {
1532     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1533     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1534     /* Overlap communication with computation. */
1535     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1536   }
1537   if (ll) {
1538     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1539     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1540     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1541   }
1542   /* scale  the diagonal block */
1543   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1544 
1545   if (rr) {
1546     /* Do a scatter end and then right scale the off-diagonal block */
1547     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1548     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1549   }
1550 
1551   PetscFunctionReturn(0);
1552 }
1553 
1554 #undef __FUNCT__
1555 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1556 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
1557 {
1558   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1559   PetscErrorCode ierr;
1560   PetscMPIInt    imdex,size = l->size,n,rank = l->rank;
1561   PetscInt       i,*owners = A->rmap->range;
1562   PetscInt       *nprocs,j,idx,nsends,row;
1563   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
1564   PetscInt       *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1565   PetscInt       *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1566   MPI_Comm       comm = ((PetscObject)A)->comm;
1567   MPI_Request    *send_waits,*recv_waits;
1568   MPI_Status     recv_status,*send_status;
1569 #if defined(PETSC_DEBUG)
1570   PetscTruth     found = PETSC_FALSE;
1571 #endif
1572 
1573   PetscFunctionBegin;
1574   /*  first count number of contributors to each processor */
1575   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1576   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1577   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1578   j = 0;
1579   for (i=0; i<N; i++) {
1580     if (lastidx > (idx = rows[i])) j = 0;
1581     lastidx = idx;
1582     for (; j<size; j++) {
1583       if (idx >= owners[j] && idx < owners[j+1]) {
1584         nprocs[2*j]++;
1585         nprocs[2*j+1] = 1;
1586         owner[i] = j;
1587 #if defined(PETSC_DEBUG)
1588         found = PETSC_TRUE;
1589 #endif
1590         break;
1591       }
1592     }
1593 #if defined(PETSC_DEBUG)
1594     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1595     found = PETSC_FALSE;
1596 #endif
1597   }
1598   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1599 
1600   /* inform other processors of number of messages and max length*/
1601   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1602 
1603   /* post receives:   */
1604   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1605   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1606   for (i=0; i<nrecvs; i++) {
1607     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1608   }
1609 
1610   /* do sends:
1611      1) starts[i] gives the starting index in svalues for stuff going to
1612      the ith processor
1613   */
1614   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1615   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1616   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1617   starts[0]  = 0;
1618   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1619   for (i=0; i<N; i++) {
1620     svalues[starts[owner[i]]++] = rows[i];
1621   }
1622 
1623   starts[0] = 0;
1624   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1625   count = 0;
1626   for (i=0; i<size; i++) {
1627     if (nprocs[2*i+1]) {
1628       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1629     }
1630   }
1631   ierr = PetscFree(starts);CHKERRQ(ierr);
1632 
1633   base = owners[rank];
1634 
1635   /*  wait on receives */
1636   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
1637   source = lens + nrecvs;
1638   count  = nrecvs; slen = 0;
1639   while (count) {
1640     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1641     /* unpack receives into our local space */
1642     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1643     source[imdex]  = recv_status.MPI_SOURCE;
1644     lens[imdex]    = n;
1645     slen          += n;
1646     count--;
1647   }
1648   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1649 
1650   /* move the data into the send scatter */
1651   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1652   count = 0;
1653   for (i=0; i<nrecvs; i++) {
1654     values = rvalues + i*nmax;
1655     for (j=0; j<lens[i]; j++) {
1656       lrows[count++] = values[j] - base;
1657     }
1658   }
1659   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1660   ierr = PetscFree(lens);CHKERRQ(ierr);
1661   ierr = PetscFree(owner);CHKERRQ(ierr);
1662   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1663 
1664   /* actually zap the local rows */
1665   /*
1666         Zero the required rows. If the "diagonal block" of the matrix
1667      is square and the user wishes to set the diagonal we use separate
1668      code so that MatSetValues() is not called for each diagonal allocating
1669      new memory, thus calling lots of mallocs and slowing things down.
1670 
1671   */
1672   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1673   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr);
1674   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1675     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr);
1676   } else if (diag != 0.0) {
1677     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1678     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1679       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1680 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1681     }
1682     for (i=0; i<slen; i++) {
1683       row  = lrows[i] + rstart_bs;
1684       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1685     }
1686     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1687     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1688   } else {
1689     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1690   }
1691 
1692   ierr = PetscFree(lrows);CHKERRQ(ierr);
1693 
1694   /* wait on sends */
1695   if (nsends) {
1696     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1697     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1698     ierr = PetscFree(send_status);CHKERRQ(ierr);
1699   }
1700   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1701   ierr = PetscFree(svalues);CHKERRQ(ierr);
1702 
1703   PetscFunctionReturn(0);
1704 }
1705 
1706 #undef __FUNCT__
1707 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1708 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1709 {
1710   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1711   PetscErrorCode ierr;
1712 
1713   PetscFunctionBegin;
1714   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1715   PetscFunctionReturn(0);
1716 }
1717 
1718 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1719 
1720 #undef __FUNCT__
1721 #define __FUNCT__ "MatEqual_MPIBAIJ"
1722 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1723 {
1724   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1725   Mat            a,b,c,d;
1726   PetscTruth     flg;
1727   PetscErrorCode ierr;
1728 
1729   PetscFunctionBegin;
1730   a = matA->A; b = matA->B;
1731   c = matB->A; d = matB->B;
1732 
1733   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1734   if (flg) {
1735     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1736   }
1737   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1738   PetscFunctionReturn(0);
1739 }
1740 
1741 #undef __FUNCT__
1742 #define __FUNCT__ "MatCopy_MPIBAIJ"
1743 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1744 {
1745   PetscErrorCode ierr;
1746   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1747   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1748 
1749   PetscFunctionBegin;
1750   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1751   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1752     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1753   } else {
1754     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1755     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1756   }
1757   PetscFunctionReturn(0);
1758 }
1759 
1760 #undef __FUNCT__
1761 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1762 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1763 {
1764   PetscErrorCode ierr;
1765 
1766   PetscFunctionBegin;
1767   ierr =  MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1768   PetscFunctionReturn(0);
1769 }
1770 
1771 #include "petscblaslapack.h"
1772 #undef __FUNCT__
1773 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1774 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1775 {
1776   PetscErrorCode ierr;
1777   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1778   PetscBLASInt   bnz,one=1;
1779   Mat_SeqBAIJ    *x,*y;
1780 
1781   PetscFunctionBegin;
1782   if (str == SAME_NONZERO_PATTERN) {
1783     PetscScalar alpha = a;
1784     x = (Mat_SeqBAIJ *)xx->A->data;
1785     y = (Mat_SeqBAIJ *)yy->A->data;
1786     bnz = PetscBLASIntCast(x->nz);
1787     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1788     x = (Mat_SeqBAIJ *)xx->B->data;
1789     y = (Mat_SeqBAIJ *)yy->B->data;
1790     bnz = PetscBLASIntCast(x->nz);
1791     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1792   } else {
1793     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1794   }
1795   PetscFunctionReturn(0);
1796 }
1797 
1798 #undef __FUNCT__
1799 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ"
1800 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs)
1801 {
1802   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1803   PetscErrorCode ierr;
1804 
1805   PetscFunctionBegin;
1806   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1807   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1808   PetscFunctionReturn(0);
1809 }
1810 
1811 #undef __FUNCT__
1812 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1813 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1814 {
1815   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1816   PetscErrorCode ierr;
1817 
1818   PetscFunctionBegin;
1819   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1820   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1821   PetscFunctionReturn(0);
1822 }
1823 
1824 #undef __FUNCT__
1825 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
1826 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1827 {
1828   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1829   PetscErrorCode ierr;
1830 
1831   PetscFunctionBegin;
1832   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1833   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1834   PetscFunctionReturn(0);
1835 }
1836 
1837 #undef __FUNCT__
1838 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
1839 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1840 {
1841   PetscErrorCode ierr;
1842   IS             iscol_local;
1843   PetscInt       csize;
1844 
1845   PetscFunctionBegin;
1846   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1847   if (call == MAT_REUSE_MATRIX) {
1848     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1849     if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1850   } else {
1851     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1852   }
1853   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1854   if (call == MAT_INITIAL_MATRIX) {
1855     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1856     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
1857   }
1858   PetscFunctionReturn(0);
1859 }
1860 
1861 #undef __FUNCT__
1862 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
1863 /*
1864     Not great since it makes two copies of the submatrix, first an SeqBAIJ
1865   in local and then by concatenating the local matrices the end result.
1866   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
1867 */
1868 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
1869 {
1870   PetscErrorCode ierr;
1871   PetscMPIInt    rank,size;
1872   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
1873   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
1874   Mat            *local,M,Mreuse;
1875   MatScalar      *vwork,*aa;
1876   MPI_Comm       comm = ((PetscObject)mat)->comm;
1877   Mat_SeqBAIJ    *aij;
1878 
1879 
1880   PetscFunctionBegin;
1881   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1882   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1883 
1884   if (call ==  MAT_REUSE_MATRIX) {
1885     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
1886     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1887     local = &Mreuse;
1888     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
1889   } else {
1890     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
1891     Mreuse = *local;
1892     ierr   = PetscFree(local);CHKERRQ(ierr);
1893   }
1894 
1895   /*
1896       m - number of local rows
1897       n - number of columns (same on all processors)
1898       rstart - first row in new global matrix generated
1899   */
1900   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
1901   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1902   m    = m/bs;
1903   n    = n/bs;
1904 
1905   if (call == MAT_INITIAL_MATRIX) {
1906     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
1907     ii  = aij->i;
1908     jj  = aij->j;
1909 
1910     /*
1911         Determine the number of non-zeros in the diagonal and off-diagonal
1912         portions of the matrix in order to do correct preallocation
1913     */
1914 
1915     /* first get start and end of "diagonal" columns */
1916     if (csize == PETSC_DECIDE) {
1917       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
1918       if (mglobal == n*bs) { /* square matrix */
1919 	nlocal = m;
1920       } else {
1921         nlocal = n/size + ((n % size) > rank);
1922       }
1923     } else {
1924       nlocal = csize/bs;
1925     }
1926     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1927     rstart = rend - nlocal;
1928     if (rank == size - 1 && rend != n) {
1929       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
1930     }
1931 
1932     /* next, compute all the lengths */
1933     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
1934     olens = dlens + m;
1935     for (i=0; i<m; i++) {
1936       jend = ii[i+1] - ii[i];
1937       olen = 0;
1938       dlen = 0;
1939       for (j=0; j<jend; j++) {
1940         if (*jj < rstart || *jj >= rend) olen++;
1941         else dlen++;
1942         jj++;
1943       }
1944       olens[i] = olen;
1945       dlens[i] = dlen;
1946     }
1947     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
1948     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
1949     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
1950     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
1951     ierr = PetscFree(dlens);CHKERRQ(ierr);
1952   } else {
1953     PetscInt ml,nl;
1954 
1955     M = *newmat;
1956     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
1957     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
1958     ierr = MatZeroEntries(M);CHKERRQ(ierr);
1959     /*
1960          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
1961        rather than the slower MatSetValues().
1962     */
1963     M->was_assembled = PETSC_TRUE;
1964     M->assembled     = PETSC_FALSE;
1965   }
1966   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1967   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
1968   aij = (Mat_SeqBAIJ*)(Mreuse)->data;
1969   ii  = aij->i;
1970   jj  = aij->j;
1971   aa  = aij->a;
1972   for (i=0; i<m; i++) {
1973     row   = rstart/bs + i;
1974     nz    = ii[i+1] - ii[i];
1975     cwork = jj;     jj += nz;
1976     vwork = aa;     aa += nz;
1977     ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
1978   }
1979 
1980   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1981   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1982   *newmat = M;
1983 
1984   /* save submatrix used in processor for next request */
1985   if (call ==  MAT_INITIAL_MATRIX) {
1986     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
1987     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
1988   }
1989 
1990   PetscFunctionReturn(0);
1991 }
1992 
1993 #undef __FUNCT__
1994 #define __FUNCT__ "MatPermute_MPIBAIJ"
1995 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
1996 {
1997   MPI_Comm       comm,pcomm;
1998   PetscInt       first,local_size,nrows;
1999   const PetscInt *rows;
2000   PetscMPIInt    size;
2001   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
2002   PetscErrorCode ierr;
2003 
2004   PetscFunctionBegin;
2005   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2006   /* make a collective version of 'rowp' */
2007   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2008   if (pcomm==comm) {
2009     crowp = rowp;
2010   } else {
2011     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2012     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2013     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
2014     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2015   }
2016   /* collect the global row permutation and invert it */
2017   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
2018   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
2019   if (pcomm!=comm) {
2020     ierr = ISDestroy(crowp);CHKERRQ(ierr);
2021   }
2022   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
2023   /* get the local target indices */
2024   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
2025   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
2026   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
2027   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
2028   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
2029   ierr = ISDestroy(irowp);CHKERRQ(ierr);
2030   /* the column permutation is so much easier;
2031      make a local version of 'colp' and invert it */
2032   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2033   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2034   if (size==1) {
2035     lcolp = colp;
2036   } else {
2037     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
2038     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
2039     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
2040   }
2041   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2042   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
2043   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
2044   if (size>1) {
2045     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
2046     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
2047   }
2048   /* now we just get the submatrix */
2049   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2050   /* clean up */
2051   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
2052   ierr = ISDestroy(icolp);CHKERRQ(ierr);
2053   PetscFunctionReturn(0);
2054 }
2055 
2056 #undef __FUNCT__
2057 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2058 PetscErrorCode PETSCMAT_DLLEXPORT MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2059 {
2060   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*) mat->data;
2061   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
2062 
2063   PetscFunctionBegin;
2064   if (nghosts) { *nghosts = B->nbs;}
2065   if (ghosts) {*ghosts = baij->garray;}
2066   PetscFunctionReturn(0);
2067 }
2068 
2069 EXTERN PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat);
2070 
2071 #undef __FUNCT__
2072 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ"
2073 /*
2074     This routine is almost identical to MatFDColoringCreate_MPIBAIJ()!
2075 */
2076 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
2077 {
2078   Mat_MPIBAIJ            *baij = (Mat_MPIBAIJ*)mat->data;
2079   PetscErrorCode        ierr;
2080   PetscMPIInt           size,*ncolsonproc,*disp,nn;
2081   PetscInt              bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col;
2082   const PetscInt        *is;
2083   PetscInt              nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj;
2084   PetscInt              *rowhit,M,cstart,cend,colb;
2085   PetscInt              *columnsforrow,l;
2086   IS                    *isa;
2087   PetscTruth             done,flg;
2088   ISLocalToGlobalMapping map = mat->bmapping;
2089   PetscInt               *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype;
2090 
2091   PetscFunctionBegin;
2092   if (!mat->assembled) {
2093     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();");
2094   }
2095   if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock");
2096 
2097   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
2098 
2099   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2100   M                = mat->rmap->n/bs;
2101   cstart           = mat->cmap->rstart/bs;
2102   cend             = mat->cmap->rend/bs;
2103   c->M             = mat->rmap->N/bs;  /* set the global rows and columns and local rows */
2104   c->N             = mat->cmap->N/bs;
2105   c->m             = mat->rmap->n/bs;
2106   c->rstart        = mat->rmap->rstart/bs;
2107 
2108   c->ncolors       = nis;
2109   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
2110   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
2111   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
2112   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
2113   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
2114   ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
2115 
2116   /* Allow access to data structures of local part of matrix */
2117   if (!baij->colmap) {
2118     ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
2119   }
2120   ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2121   ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2122 
2123   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
2124   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
2125 
2126   for (i=0; i<nis; i++) {
2127     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
2128     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
2129     c->ncolumns[i] = n;
2130     if (n) {
2131       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
2132       ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr);
2133       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
2134     } else {
2135       c->columns[i]  = 0;
2136     }
2137 
2138     if (ctype == IS_COLORING_GLOBAL){
2139       /* Determine the total (parallel) number of columns of this color */
2140       ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
2141       ierr = PetscMalloc(2*size*sizeof(PetscInt*),&ncolsonproc);CHKERRQ(ierr);
2142       disp = ncolsonproc + size;
2143 
2144       nn   = PetscMPIIntCast(n);
2145       ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2146       nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];}
2147       if (!nctot) {
2148         ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
2149       }
2150 
2151       disp[0] = 0;
2152       for (j=1; j<size; j++) {
2153         disp[j] = disp[j-1] + ncolsonproc[j-1];
2154       }
2155 
2156       /* Get complete list of columns for color on each processor */
2157       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2158       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2159       ierr = PetscFree(ncolsonproc);CHKERRQ(ierr);
2160     } else if (ctype == IS_COLORING_GHOSTED){
2161       /* Determine local number of columns of this color on this process, including ghost points */
2162       nctot = n;
2163       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2164       ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
2165     } else {
2166       SETERRQ(PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
2167     }
2168 
2169     /*
2170        Mark all rows affect by these columns
2171     */
2172     /* Temporary option to allow for debugging/testing */
2173     flg  = PETSC_FALSE;
2174     ierr = PetscOptionsGetTruth(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr);
2175     if (!flg) {/*-----------------------------------------------------------------------------*/
2176       /* crude, fast version */
2177       ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
2178       /* loop over columns*/
2179       for (j=0; j<nctot; j++) {
2180         if (ctype == IS_COLORING_GHOSTED) {
2181           col = ltog[cols[j]];
2182         } else {
2183           col  = cols[j];
2184         }
2185         if (col >= cstart && col < cend) {
2186           /* column is in diagonal block of matrix */
2187           rows = A_cj + A_ci[col-cstart];
2188           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2189         } else {
2190 #if defined (PETSC_USE_CTABLE)
2191           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr)
2192 	  colb --;
2193 #else
2194           colb = baij->colmap[col] - 1;
2195 #endif
2196           if (colb == -1) {
2197             m = 0;
2198           } else {
2199             colb = colb/bs;
2200             rows = B_cj + B_ci[colb];
2201             m    = B_ci[colb+1] - B_ci[colb];
2202           }
2203         }
2204         /* loop over columns marking them in rowhit */
2205         for (k=0; k<m; k++) {
2206           rowhit[*rows++] = col + 1;
2207         }
2208       }
2209 
2210       /* count the number of hits */
2211       nrows = 0;
2212       for (j=0; j<M; j++) {
2213         if (rowhit[j]) nrows++;
2214       }
2215       c->nrows[i]         = nrows;
2216       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2217       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2218       ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2219       nrows = 0;
2220       for (j=0; j<M; j++) {
2221         if (rowhit[j]) {
2222           c->rows[i][nrows]           = j;
2223           c->columnsforrow[i][nrows] = rowhit[j] - 1;
2224           nrows++;
2225         }
2226       }
2227     } else {/*-------------------------------------------------------------------------------*/
2228       /* slow version, using rowhit as a linked list */
2229       PetscInt currentcol,fm,mfm;
2230       rowhit[M] = M;
2231       nrows     = 0;
2232       /* loop over columns*/
2233       for (j=0; j<nctot; j++) {
2234         if (ctype == IS_COLORING_GHOSTED) {
2235           col = ltog[cols[j]];
2236         } else {
2237           col  = cols[j];
2238         }
2239         if (col >= cstart && col < cend) {
2240           /* column is in diagonal block of matrix */
2241           rows = A_cj + A_ci[col-cstart];
2242           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2243         } else {
2244 #if defined (PETSC_USE_CTABLE)
2245           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2246           colb --;
2247 #else
2248           colb = baij->colmap[col] - 1;
2249 #endif
2250           if (colb == -1) {
2251             m = 0;
2252           } else {
2253             colb = colb/bs;
2254             rows = B_cj + B_ci[colb];
2255             m    = B_ci[colb+1] - B_ci[colb];
2256           }
2257         }
2258 
2259         /* loop over columns marking them in rowhit */
2260         fm    = M; /* fm points to first entry in linked list */
2261         for (k=0; k<m; k++) {
2262           currentcol = *rows++;
2263 	  /* is it already in the list? */
2264           do {
2265             mfm  = fm;
2266             fm   = rowhit[fm];
2267           } while (fm < currentcol);
2268           /* not in list so add it */
2269           if (fm != currentcol) {
2270             nrows++;
2271             columnsforrow[currentcol] = col;
2272             /* next three lines insert new entry into linked list */
2273             rowhit[mfm]               = currentcol;
2274             rowhit[currentcol]        = fm;
2275             fm                        = currentcol;
2276             /* fm points to present position in list since we know the columns are sorted */
2277           } else {
2278             SETERRQ(PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
2279           }
2280         }
2281       }
2282       c->nrows[i]         = nrows;
2283       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2284       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2285       ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2286       /* now store the linked list of rows into c->rows[i] */
2287       nrows = 0;
2288       fm    = rowhit[M];
2289       do {
2290         c->rows[i][nrows]            = fm;
2291         c->columnsforrow[i][nrows++] = columnsforrow[fm];
2292         fm                           = rowhit[fm];
2293       } while (fm < M);
2294     } /* ---------------------------------------------------------------------------------------*/
2295     ierr = PetscFree(cols);CHKERRQ(ierr);
2296   }
2297 
2298   /* Optimize by adding the vscale, and scaleforrow[][] fields */
2299   /*
2300        vscale will contain the "diagonal" on processor scalings followed by the off processor
2301   */
2302   if (ctype == IS_COLORING_GLOBAL) {
2303     PetscInt *garray;
2304     ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2305     for (i=0; i<baij->B->cmap->n/bs; i++) {
2306       for (j=0; j<bs; j++) {
2307         garray[i*bs+j] = bs*baij->garray[i]+j;
2308       }
2309     }
2310     ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr);
2311     ierr = PetscFree(garray);CHKERRQ(ierr);
2312     CHKMEMQ;
2313     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2314     for (k=0; k<c->ncolors; k++) {
2315       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2316       for (l=0; l<c->nrows[k]; l++) {
2317         col = c->columnsforrow[k][l];
2318         if (col >= cstart && col < cend) {
2319           /* column is in diagonal block of matrix */
2320           colb = col - cstart;
2321         } else {
2322           /* column  is in "off-processor" part */
2323 #if defined (PETSC_USE_CTABLE)
2324           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2325           colb --;
2326 #else
2327           colb = baij->colmap[col] - 1;
2328 #endif
2329           colb = colb/bs;
2330           colb += cend - cstart;
2331         }
2332         c->vscaleforrow[k][l] = colb;
2333       }
2334     }
2335   } else if (ctype == IS_COLORING_GHOSTED) {
2336     /* Get gtol mapping */
2337     PetscInt N = mat->cmap->N, *gtol;
2338     ierr = PetscMalloc((N+1)*sizeof(PetscInt),&gtol);CHKERRQ(ierr);
2339     for (i=0; i<N; i++) gtol[i] = -1;
2340     for (i=0; i<map->n; i++) gtol[ltog[i]] = i;
2341 
2342     c->vscale = 0; /* will be created in MatFDColoringApply() */
2343     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2344     for (k=0; k<c->ncolors; k++) {
2345       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2346       for (l=0; l<c->nrows[k]; l++) {
2347         col = c->columnsforrow[k][l];      /* global column index */
2348         c->vscaleforrow[k][l] = gtol[col]; /* local column index */
2349       }
2350     }
2351     ierr = PetscFree(gtol);CHKERRQ(ierr);
2352   }
2353   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
2354 
2355   ierr = PetscFree(rowhit);CHKERRQ(ierr);
2356   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
2357   ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2358   ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2359     CHKMEMQ;
2360   PetscFunctionReturn(0);
2361 }
2362 
2363 #undef __FUNCT__
2364 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ"
2365 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat)
2366 {
2367   Mat            B;
2368   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
2369   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2370   Mat_SeqAIJ     *b;
2371   PetscErrorCode ierr;
2372   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2373   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2374   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2375 
2376   PetscFunctionBegin;
2377   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2378   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2379 
2380   /* ----------------------------------------------------------------
2381      Tell every processor the number of nonzeros per row
2382   */
2383   ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
2384   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2385     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2386   }
2387   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2388   ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr);
2389   displs     = recvcounts + size;
2390   for (i=0; i<size; i++) {
2391     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2392     displs[i]     = A->rmap->range[i]/bs;
2393   }
2394 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2395   ierr  = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2396 #else
2397   ierr  = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2398 #endif
2399   /* ---------------------------------------------------------------
2400      Create the sequential matrix of the same type as the local block diagonal
2401   */
2402   ierr  = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2403   ierr  = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2404   ierr  = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2405   ierr  = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2406   b = (Mat_SeqAIJ *)B->data;
2407 
2408   /*--------------------------------------------------------------------
2409     Copy my part of matrix column indices over
2410   */
2411   sendcount  = ad->nz + bd->nz;
2412   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2413   a_jsendbuf = ad->j;
2414   b_jsendbuf = bd->j;
2415   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2416   cnt        = 0;
2417   for (i=0; i<n; i++) {
2418 
2419     /* put in lower diagonal portion */
2420     m = bd->i[i+1] - bd->i[i];
2421     while (m > 0) {
2422       /* is it above diagonal (in bd (compressed) numbering) */
2423       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2424       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2425       m--;
2426     }
2427 
2428     /* put in diagonal portion */
2429     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2430       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2431     }
2432 
2433     /* put in upper diagonal portion */
2434     while (m-- > 0) {
2435       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2436     }
2437   }
2438   if (cnt != sendcount) SETERRQ2(PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2439 
2440   /*--------------------------------------------------------------------
2441     Gather all column indices to all processors
2442   */
2443   for (i=0; i<size; i++) {
2444     recvcounts[i] = 0;
2445     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2446       recvcounts[i] += lens[j];
2447     }
2448   }
2449   displs[0]  = 0;
2450   for (i=1; i<size; i++) {
2451     displs[i] = displs[i-1] + recvcounts[i-1];
2452   }
2453 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2454   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2455 #else
2456   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2457 #endif
2458   /*--------------------------------------------------------------------
2459     Assemble the matrix into useable form (note numerical values not yet set)
2460   */
2461   /* set the b->ilen (length of each row) values */
2462   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2463   /* set the b->i indices */
2464   b->i[0] = 0;
2465   for (i=1; i<=A->rmap->N/bs; i++) {
2466     b->i[i] = b->i[i-1] + lens[i-1];
2467   }
2468   ierr = PetscFree(lens);CHKERRQ(ierr);
2469   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2470   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2471   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2472 
2473   if (A->symmetric){
2474     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2475   } else if (A->hermitian) {
2476     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2477   } else if (A->structurally_symmetric) {
2478     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2479   }
2480   *newmat = B;
2481   PetscFunctionReturn(0);
2482 }
2483 
2484 #undef __FUNCT__
2485 #define __FUNCT__ "MatSOR_MPIBAIJ"
2486 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2487 {
2488   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2489   PetscErrorCode ierr;
2490   Vec            bb1 = 0;
2491 
2492   PetscFunctionBegin;
2493   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2494     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2495   }
2496 
2497   if (flag == SOR_APPLY_UPPER) {
2498     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2499     PetscFunctionReturn(0);
2500   }
2501 
2502   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2503     if (flag & SOR_ZERO_INITIAL_GUESS) {
2504       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2505       its--;
2506     }
2507 
2508     while (its--) {
2509       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2510       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2511 
2512       /* update rhs: bb1 = bb - B*x */
2513       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2514       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2515 
2516       /* local sweep */
2517       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2518     }
2519   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
2520     if (flag & SOR_ZERO_INITIAL_GUESS) {
2521       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2522       its--;
2523     }
2524     while (its--) {
2525       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2526       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2527 
2528       /* update rhs: bb1 = bb - B*x */
2529       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2530       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2531 
2532       /* local sweep */
2533       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2534     }
2535   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
2536     if (flag & SOR_ZERO_INITIAL_GUESS) {
2537       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2538       its--;
2539     }
2540     while (its--) {
2541       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2542       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2543 
2544       /* update rhs: bb1 = bb - B*x */
2545       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2546       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2547 
2548       /* local sweep */
2549       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2550     }
2551   } else {
2552     SETERRQ(PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2553   }
2554 
2555   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
2556   PetscFunctionReturn(0);
2557 }
2558 
2559 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2560 
2561 
2562 /* -------------------------------------------------------------------*/
2563 static struct _MatOps MatOps_Values = {
2564        MatSetValues_MPIBAIJ,
2565        MatGetRow_MPIBAIJ,
2566        MatRestoreRow_MPIBAIJ,
2567        MatMult_MPIBAIJ,
2568 /* 4*/ MatMultAdd_MPIBAIJ,
2569        MatMultTranspose_MPIBAIJ,
2570        MatMultTransposeAdd_MPIBAIJ,
2571        0,
2572        0,
2573        0,
2574 /*10*/ 0,
2575        0,
2576        0,
2577        MatSOR_MPIBAIJ,
2578        MatTranspose_MPIBAIJ,
2579 /*15*/ MatGetInfo_MPIBAIJ,
2580        MatEqual_MPIBAIJ,
2581        MatGetDiagonal_MPIBAIJ,
2582        MatDiagonalScale_MPIBAIJ,
2583        MatNorm_MPIBAIJ,
2584 /*20*/ MatAssemblyBegin_MPIBAIJ,
2585        MatAssemblyEnd_MPIBAIJ,
2586        MatSetOption_MPIBAIJ,
2587        MatZeroEntries_MPIBAIJ,
2588 /*24*/ MatZeroRows_MPIBAIJ,
2589        0,
2590        0,
2591        0,
2592        0,
2593 /*29*/ MatSetUpPreallocation_MPIBAIJ,
2594        0,
2595        0,
2596        0,
2597        0,
2598 /*34*/ MatDuplicate_MPIBAIJ,
2599        0,
2600        0,
2601        0,
2602        0,
2603 /*39*/ MatAXPY_MPIBAIJ,
2604        MatGetSubMatrices_MPIBAIJ,
2605        MatIncreaseOverlap_MPIBAIJ,
2606        MatGetValues_MPIBAIJ,
2607        MatCopy_MPIBAIJ,
2608 /*44*/ 0,
2609        MatScale_MPIBAIJ,
2610        0,
2611        0,
2612        0,
2613 /*49*/ MatSetBlockSize_MPIBAIJ,
2614        0,
2615        0,
2616        0,
2617        0,
2618 /*54*/ MatFDColoringCreate_MPIBAIJ,
2619        0,
2620        MatSetUnfactored_MPIBAIJ,
2621        MatPermute_MPIBAIJ,
2622        MatSetValuesBlocked_MPIBAIJ,
2623 /*59*/ MatGetSubMatrix_MPIBAIJ,
2624        MatDestroy_MPIBAIJ,
2625        MatView_MPIBAIJ,
2626        0,
2627        0,
2628 /*64*/ 0,
2629        0,
2630        0,
2631        0,
2632        0,
2633 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2634        0,
2635        0,
2636        0,
2637        0,
2638 /*74*/ 0,
2639        MatFDColoringApply_BAIJ,
2640        0,
2641        0,
2642        0,
2643 /*79*/ 0,
2644        0,
2645        0,
2646        0,
2647        MatLoad_MPIBAIJ,
2648 /*84*/ 0,
2649        0,
2650        0,
2651        0,
2652        0,
2653 /*89*/ 0,
2654        0,
2655        0,
2656        0,
2657        0,
2658 /*94*/ 0,
2659        0,
2660        0,
2661        0,
2662        0,
2663 /*99*/ 0,
2664        0,
2665        0,
2666        0,
2667        0,
2668 /*104*/0,
2669        MatRealPart_MPIBAIJ,
2670        MatImaginaryPart_MPIBAIJ,
2671        0,
2672        0,
2673 /*109*/0,
2674        0,
2675        0,
2676        0,
2677        0,
2678 /*114*/MatGetSeqNonzerostructure_MPIBAIJ,
2679        0,
2680        MatGetGhosts_MPIBAIJ
2681 };
2682 
2683 EXTERN_C_BEGIN
2684 #undef __FUNCT__
2685 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2686 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
2687 {
2688   PetscFunctionBegin;
2689   *a      = ((Mat_MPIBAIJ *)A->data)->A;
2690   *iscopy = PETSC_FALSE;
2691   PetscFunctionReturn(0);
2692 }
2693 EXTERN_C_END
2694 
2695 EXTERN_C_BEGIN
2696 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2697 EXTERN_C_END
2698 
2699 EXTERN_C_BEGIN
2700 #undef __FUNCT__
2701 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2702 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2703 {
2704   PetscInt       m,rstart,cstart,cend;
2705   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2706   const PetscInt *JJ=0;
2707   PetscScalar    *values=0;
2708   PetscErrorCode ierr;
2709 
2710   PetscFunctionBegin;
2711 
2712   if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2713   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2714   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2715   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2716   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2717   m      = B->rmap->n/bs;
2718   rstart = B->rmap->rstart/bs;
2719   cstart = B->cmap->rstart/bs;
2720   cend   = B->cmap->rend/bs;
2721 
2722   if (ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2723   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2724   o_nnz = d_nnz + m;
2725   for (i=0; i<m; i++) {
2726     nz = ii[i+1] - ii[i];
2727     if (nz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2728     nz_max = PetscMax(nz_max,nz);
2729     JJ  = jj + ii[i];
2730     for (j=0; j<nz; j++) {
2731       if (*JJ >= cstart) break;
2732       JJ++;
2733     }
2734     d = 0;
2735     for (; j<nz; j++) {
2736       if (*JJ++ >= cend) break;
2737       d++;
2738     }
2739     d_nnz[i] = d;
2740     o_nnz[i] = nz - d;
2741   }
2742   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2743   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2744 
2745   values = (PetscScalar*)V;
2746   if (!values) {
2747     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2748     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2749   }
2750   for (i=0; i<m; i++) {
2751     PetscInt          row    = i + rstart;
2752     PetscInt          ncols  = ii[i+1] - ii[i];
2753     const PetscInt    *icols = jj + ii[i];
2754     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2755     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2756   }
2757 
2758   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2759   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2760   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2761 
2762   PetscFunctionReturn(0);
2763 }
2764 EXTERN_C_END
2765 
2766 #undef __FUNCT__
2767 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2768 /*@C
2769    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2770    (the default parallel PETSc format).
2771 
2772    Collective on MPI_Comm
2773 
2774    Input Parameters:
2775 +  A - the matrix
2776 .  i - the indices into j for the start of each local row (starts with zero)
2777 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2778 -  v - optional values in the matrix
2779 
2780    Level: developer
2781 
2782 .keywords: matrix, aij, compressed row, sparse, parallel
2783 
2784 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2785 @*/
2786 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2787 {
2788   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
2789 
2790   PetscFunctionBegin;
2791   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2792   if (f) {
2793     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
2794   }
2795   PetscFunctionReturn(0);
2796 }
2797 
2798 EXTERN_C_BEGIN
2799 #undef __FUNCT__
2800 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2801 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2802 {
2803   Mat_MPIBAIJ    *b;
2804   PetscErrorCode ierr;
2805   PetscInt       i, newbs = PetscAbs(bs);
2806 
2807   PetscFunctionBegin;
2808   if (bs < 0) {
2809     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr);
2810       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2811     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2812     bs   = PetscAbs(bs);
2813   }
2814   if ((d_nnz || o_nnz) && newbs != bs) {
2815     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
2816   }
2817   bs = newbs;
2818 
2819 
2820   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2821   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2822   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2823   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2824   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2825 
2826   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2827   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2828   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2829   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2830 
2831   if (d_nnz) {
2832     for (i=0; i<B->rmap->n/bs; i++) {
2833       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2834     }
2835   }
2836   if (o_nnz) {
2837     for (i=0; i<B->rmap->n/bs; i++) {
2838       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2839     }
2840   }
2841 
2842   b = (Mat_MPIBAIJ*)B->data;
2843   b->bs2 = bs*bs;
2844   b->mbs = B->rmap->n/bs;
2845   b->nbs = B->cmap->n/bs;
2846   b->Mbs = B->rmap->N/bs;
2847   b->Nbs = B->cmap->N/bs;
2848 
2849   for (i=0; i<=b->size; i++) {
2850     b->rangebs[i] = B->rmap->range[i]/bs;
2851   }
2852   b->rstartbs = B->rmap->rstart/bs;
2853   b->rendbs   = B->rmap->rend/bs;
2854   b->cstartbs = B->cmap->rstart/bs;
2855   b->cendbs   = B->cmap->rend/bs;
2856 
2857   if (!B->preallocated) {
2858     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2859     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2860     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2861     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2862     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2863     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2864     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2865     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2866     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
2867   }
2868 
2869   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2870   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2871   B->preallocated = PETSC_TRUE;
2872   PetscFunctionReturn(0);
2873 }
2874 EXTERN_C_END
2875 
2876 EXTERN_C_BEGIN
2877 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2878 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2879 EXTERN_C_END
2880 
2881 
2882 EXTERN_C_BEGIN
2883 #undef __FUNCT__
2884 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
2885 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj)
2886 {
2887   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2888   PetscErrorCode ierr;
2889   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2890   PetscInt       M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2891   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2892 
2893   PetscFunctionBegin;
2894   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr);
2895   ii[0] = 0;
2896   CHKMEMQ;
2897   for (i=0; i<M; i++) {
2898     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2899     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
2900     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2901     /* remove one from count of matrix has diagonal */
2902     for (j=id[i]; j<id[i+1]; j++) {
2903       if (jd[j] == i) {ii[i+1]--;break;}
2904     }
2905   CHKMEMQ;
2906   }
2907   ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2908   cnt = 0;
2909   for (i=0; i<M; i++) {
2910     for (j=io[i]; j<io[i+1]; j++) {
2911       if (garray[jo[j]] > rstart) break;
2912       jj[cnt++] = garray[jo[j]];
2913   CHKMEMQ;
2914     }
2915     for (k=id[i]; k<id[i+1]; k++) {
2916       if (jd[k] != i) {
2917         jj[cnt++] = rstart + jd[k];
2918   CHKMEMQ;
2919       }
2920     }
2921     for (;j<io[i+1]; j++) {
2922       jj[cnt++] = garray[jo[j]];
2923   CHKMEMQ;
2924     }
2925   }
2926   ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr);
2927   PetscFunctionReturn(0);
2928 }
2929 EXTERN_C_END
2930 
2931 /*MC
2932    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2933 
2934    Options Database Keys:
2935 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2936 . -mat_block_size <bs> - set the blocksize used to store the matrix
2937 - -mat_use_hash_table <fact>
2938 
2939   Level: beginner
2940 
2941 .seealso: MatCreateMPIBAIJ
2942 M*/
2943 
2944 EXTERN_C_BEGIN
2945 #undef __FUNCT__
2946 #define __FUNCT__ "MatCreate_MPIBAIJ"
2947 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B)
2948 {
2949   Mat_MPIBAIJ    *b;
2950   PetscErrorCode ierr;
2951   PetscTruth     flg;
2952 
2953   PetscFunctionBegin;
2954   ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
2955   B->data = (void*)b;
2956 
2957 
2958   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2959   B->mapping    = 0;
2960   B->assembled  = PETSC_FALSE;
2961 
2962   B->insertmode = NOT_SET_VALUES;
2963   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
2964   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
2965 
2966   /* build local table of row and column ownerships */
2967   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
2968 
2969   /* build cache for off array entries formed */
2970   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
2971   b->donotstash  = PETSC_FALSE;
2972   b->colmap      = PETSC_NULL;
2973   b->garray      = PETSC_NULL;
2974   b->roworiented = PETSC_TRUE;
2975 
2976   /* stuff used in block assembly */
2977   b->barray       = 0;
2978 
2979   /* stuff used for matrix vector multiply */
2980   b->lvec         = 0;
2981   b->Mvctx        = 0;
2982 
2983   /* stuff for MatGetRow() */
2984   b->rowindices   = 0;
2985   b->rowvalues    = 0;
2986   b->getrowactive = PETSC_FALSE;
2987 
2988   /* hash table stuff */
2989   b->ht           = 0;
2990   b->hd           = 0;
2991   b->ht_size      = 0;
2992   b->ht_flag      = PETSC_FALSE;
2993   b->ht_fact      = 0;
2994   b->ht_total_ct  = 0;
2995   b->ht_insert_ct = 0;
2996 
2997   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
2998     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2999     if (flg) {
3000       PetscReal fact = 1.39;
3001       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3002       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
3003       if (fact <= 1.0) fact = 1.39;
3004       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3005       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3006     }
3007   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3008 
3009   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",
3010                                      "MatConvert_MPIBAIJ_MPIAdj",
3011                                       MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3012   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3013                                      "MatStoreValues_MPIBAIJ",
3014                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3015   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3016                                      "MatRetrieveValues_MPIBAIJ",
3017                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3018   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3019                                      "MatGetDiagonalBlock_MPIBAIJ",
3020                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3021   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
3022                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
3023                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3024   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
3025 				     "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
3026 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3027   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3028                                      "MatDiagonalScaleLocal_MPIBAIJ",
3029                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3030   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
3031                                      "MatSetHashTableFactor_MPIBAIJ",
3032                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3033   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3034   PetscFunctionReturn(0);
3035 }
3036 EXTERN_C_END
3037 
3038 /*MC
3039    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3040 
3041    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3042    and MATMPIBAIJ otherwise.
3043 
3044    Options Database Keys:
3045 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3046 
3047   Level: beginner
3048 
3049 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3050 M*/
3051 
3052 EXTERN_C_BEGIN
3053 #undef __FUNCT__
3054 #define __FUNCT__ "MatCreate_BAIJ"
3055 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A)
3056 {
3057   PetscErrorCode ierr;
3058   PetscMPIInt    size;
3059 
3060   PetscFunctionBegin;
3061   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
3062   if (size == 1) {
3063     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
3064   } else {
3065     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
3066   }
3067   PetscFunctionReturn(0);
3068 }
3069 EXTERN_C_END
3070 
3071 #undef __FUNCT__
3072 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3073 /*@C
3074    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3075    (block compressed row).  For good matrix assembly performance
3076    the user should preallocate the matrix storage by setting the parameters
3077    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3078    performance can be increased by more than a factor of 50.
3079 
3080    Collective on Mat
3081 
3082    Input Parameters:
3083 +  A - the matrix
3084 .  bs   - size of blockk
3085 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3086            submatrix  (same for all local rows)
3087 .  d_nnz - array containing the number of block nonzeros in the various block rows
3088            of the in diagonal portion of the local (possibly different for each block
3089            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3090 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3091            submatrix (same for all local rows).
3092 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3093            off-diagonal portion of the local submatrix (possibly different for
3094            each block row) or PETSC_NULL.
3095 
3096    If the *_nnz parameter is given then the *_nz parameter is ignored
3097 
3098    Options Database Keys:
3099 +   -mat_block_size - size of the blocks to use
3100 -   -mat_use_hash_table <fact>
3101 
3102    Notes:
3103    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3104    than it must be used on all processors that share the object for that argument.
3105 
3106    Storage Information:
3107    For a square global matrix we define each processor's diagonal portion
3108    to be its local rows and the corresponding columns (a square submatrix);
3109    each processor's off-diagonal portion encompasses the remainder of the
3110    local matrix (a rectangular submatrix).
3111 
3112    The user can specify preallocated storage for the diagonal part of
3113    the local submatrix with either d_nz or d_nnz (not both).  Set
3114    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3115    memory allocation.  Likewise, specify preallocated storage for the
3116    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3117 
3118    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3119    the figure below we depict these three local rows and all columns (0-11).
3120 
3121 .vb
3122            0 1 2 3 4 5 6 7 8 9 10 11
3123           -------------------
3124    row 3  |  o o o d d d o o o o o o
3125    row 4  |  o o o d d d o o o o o o
3126    row 5  |  o o o d d d o o o o o o
3127           -------------------
3128 .ve
3129 
3130    Thus, any entries in the d locations are stored in the d (diagonal)
3131    submatrix, and any entries in the o locations are stored in the
3132    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3133    stored simply in the MATSEQBAIJ format for compressed row storage.
3134 
3135    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3136    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3137    In general, for PDE problems in which most nonzeros are near the diagonal,
3138    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3139    or you will get TERRIBLE performance; see the users' manual chapter on
3140    matrices.
3141 
3142    You can call MatGetInfo() to get information on how effective the preallocation was;
3143    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3144    You can also run with the option -info and look for messages with the string
3145    malloc in them to see if additional memory allocation was needed.
3146 
3147    Level: intermediate
3148 
3149 .keywords: matrix, block, aij, compressed row, sparse, parallel
3150 
3151 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
3152 @*/
3153 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3154 {
3155   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3156 
3157   PetscFunctionBegin;
3158   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3159   if (f) {
3160     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3161   }
3162   PetscFunctionReturn(0);
3163 }
3164 
3165 #undef __FUNCT__
3166 #define __FUNCT__ "MatCreateMPIBAIJ"
3167 /*@C
3168    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
3169    (block compressed row).  For good matrix assembly performance
3170    the user should preallocate the matrix storage by setting the parameters
3171    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3172    performance can be increased by more than a factor of 50.
3173 
3174    Collective on MPI_Comm
3175 
3176    Input Parameters:
3177 +  comm - MPI communicator
3178 .  bs   - size of blockk
3179 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3180            This value should be the same as the local size used in creating the
3181            y vector for the matrix-vector product y = Ax.
3182 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3183            This value should be the same as the local size used in creating the
3184            x vector for the matrix-vector product y = Ax.
3185 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3186 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3187 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3188            submatrix  (same for all local rows)
3189 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3190            of the in diagonal portion of the local (possibly different for each block
3191            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3192 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3193            submatrix (same for all local rows).
3194 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3195            off-diagonal portion of the local submatrix (possibly different for
3196            each block row) or PETSC_NULL.
3197 
3198    Output Parameter:
3199 .  A - the matrix
3200 
3201    Options Database Keys:
3202 +   -mat_block_size - size of the blocks to use
3203 -   -mat_use_hash_table <fact>
3204 
3205    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3206    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3207    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3208 
3209    Notes:
3210    If the *_nnz parameter is given then the *_nz parameter is ignored
3211 
3212    A nonzero block is any block that as 1 or more nonzeros in it
3213 
3214    The user MUST specify either the local or global matrix dimensions
3215    (possibly both).
3216 
3217    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3218    than it must be used on all processors that share the object for that argument.
3219 
3220    Storage Information:
3221    For a square global matrix we define each processor's diagonal portion
3222    to be its local rows and the corresponding columns (a square submatrix);
3223    each processor's off-diagonal portion encompasses the remainder of the
3224    local matrix (a rectangular submatrix).
3225 
3226    The user can specify preallocated storage for the diagonal part of
3227    the local submatrix with either d_nz or d_nnz (not both).  Set
3228    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3229    memory allocation.  Likewise, specify preallocated storage for the
3230    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3231 
3232    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3233    the figure below we depict these three local rows and all columns (0-11).
3234 
3235 .vb
3236            0 1 2 3 4 5 6 7 8 9 10 11
3237           -------------------
3238    row 3  |  o o o d d d o o o o o o
3239    row 4  |  o o o d d d o o o o o o
3240    row 5  |  o o o d d d o o o o o o
3241           -------------------
3242 .ve
3243 
3244    Thus, any entries in the d locations are stored in the d (diagonal)
3245    submatrix, and any entries in the o locations are stored in the
3246    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3247    stored simply in the MATSEQBAIJ format for compressed row storage.
3248 
3249    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3250    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3251    In general, for PDE problems in which most nonzeros are near the diagonal,
3252    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3253    or you will get TERRIBLE performance; see the users' manual chapter on
3254    matrices.
3255 
3256    Level: intermediate
3257 
3258 .keywords: matrix, block, aij, compressed row, sparse, parallel
3259 
3260 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3261 @*/
3262 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3263 {
3264   PetscErrorCode ierr;
3265   PetscMPIInt    size;
3266 
3267   PetscFunctionBegin;
3268   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3269   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3270   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3271   if (size > 1) {
3272     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3273     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3274   } else {
3275     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3276     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3277   }
3278   PetscFunctionReturn(0);
3279 }
3280 
3281 #undef __FUNCT__
3282 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3283 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3284 {
3285   Mat            mat;
3286   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3287   PetscErrorCode ierr;
3288   PetscInt       len=0;
3289 
3290   PetscFunctionBegin;
3291   *newmat       = 0;
3292   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3293   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3294   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3295   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3296 
3297   mat->factor       = matin->factor;
3298   mat->preallocated = PETSC_TRUE;
3299   mat->assembled    = PETSC_TRUE;
3300   mat->insertmode   = NOT_SET_VALUES;
3301 
3302   a      = (Mat_MPIBAIJ*)mat->data;
3303   mat->rmap->bs  = matin->rmap->bs;
3304   a->bs2   = oldmat->bs2;
3305   a->mbs   = oldmat->mbs;
3306   a->nbs   = oldmat->nbs;
3307   a->Mbs   = oldmat->Mbs;
3308   a->Nbs   = oldmat->Nbs;
3309 
3310   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3311   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3312 
3313   a->size         = oldmat->size;
3314   a->rank         = oldmat->rank;
3315   a->donotstash   = oldmat->donotstash;
3316   a->roworiented  = oldmat->roworiented;
3317   a->rowindices   = 0;
3318   a->rowvalues    = 0;
3319   a->getrowactive = PETSC_FALSE;
3320   a->barray       = 0;
3321   a->rstartbs     = oldmat->rstartbs;
3322   a->rendbs       = oldmat->rendbs;
3323   a->cstartbs     = oldmat->cstartbs;
3324   a->cendbs       = oldmat->cendbs;
3325 
3326   /* hash table stuff */
3327   a->ht           = 0;
3328   a->hd           = 0;
3329   a->ht_size      = 0;
3330   a->ht_flag      = oldmat->ht_flag;
3331   a->ht_fact      = oldmat->ht_fact;
3332   a->ht_total_ct  = 0;
3333   a->ht_insert_ct = 0;
3334 
3335   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3336   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
3337   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3338   if (oldmat->colmap) {
3339 #if defined (PETSC_USE_CTABLE)
3340   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3341 #else
3342   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3343   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3344   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3345 #endif
3346   } else a->colmap = 0;
3347 
3348   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3349     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3350     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3351     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3352   } else a->garray = 0;
3353 
3354   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3355   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3356   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3357   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3358 
3359   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3360   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3361   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3362   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3363   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3364   *newmat = mat;
3365 
3366   PetscFunctionReturn(0);
3367 }
3368 
3369 #include "petscsys.h"
3370 
3371 #undef __FUNCT__
3372 #define __FUNCT__ "MatLoad_MPIBAIJ"
3373 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
3374 {
3375   Mat            A;
3376   PetscErrorCode ierr;
3377   int            fd;
3378   PetscInt       i,nz,j,rstart,rend;
3379   PetscScalar    *vals,*buf;
3380   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3381   MPI_Status     status;
3382   PetscMPIInt    rank,size,maxnz;
3383   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3384   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
3385   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3386   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
3387   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
3388   PetscInt       dcount,kmax,k,nzcount,tmp,mend;
3389 
3390   PetscFunctionBegin;
3391   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3392     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3393   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3394 
3395   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3396   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3397   if (!rank) {
3398     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3399     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3400     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3401   }
3402 
3403   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3404   M = header[1]; N = header[2];
3405 
3406   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
3407 
3408   /*
3409      This code adds extra rows to make sure the number of rows is
3410      divisible by the blocksize
3411   */
3412   Mbs        = M/bs;
3413   extra_rows = bs - M + bs*Mbs;
3414   if (extra_rows == bs) extra_rows = 0;
3415   else                  Mbs++;
3416   if (extra_rows && !rank) {
3417     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3418   }
3419 
3420   /* determine ownership of all rows */
3421   mbs        = Mbs/size + ((Mbs % size) > rank);
3422   m          = mbs*bs;
3423   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
3424   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3425 
3426   /* process 0 needs enough room for process with most rows */
3427   if (!rank) {
3428     mmax = rowners[1];
3429     for (i=2; i<size; i++) {
3430       mmax = PetscMax(mmax,rowners[i]);
3431     }
3432     mmax*=bs;
3433   } else mmax = m;
3434 
3435   rowners[0] = 0;
3436   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
3437   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
3438   rstart = rowners[rank];
3439   rend   = rowners[rank+1];
3440 
3441   /* distribute row lengths to all processors */
3442   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
3443   if (!rank) {
3444     mend = m;
3445     if (size == 1) mend = mend - extra_rows;
3446     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3447     for (j=mend; j<m; j++) locrowlens[j] = 1;
3448     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3449     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3450     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3451     for (j=0; j<m; j++) {
3452       procsnz[0] += locrowlens[j];
3453     }
3454     for (i=1; i<size; i++) {
3455       mend = browners[i+1] - browners[i];
3456       if (i == size-1) mend = mend - extra_rows;
3457       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3458       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3459       /* calculate the number of nonzeros on each processor */
3460       for (j=0; j<browners[i+1]-browners[i]; j++) {
3461         procsnz[i] += rowlengths[j];
3462       }
3463       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3464     }
3465     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3466   } else {
3467     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3468   }
3469 
3470   if (!rank) {
3471     /* determine max buffer needed and allocate it */
3472     maxnz = procsnz[0];
3473     for (i=1; i<size; i++) {
3474       maxnz = PetscMax(maxnz,procsnz[i]);
3475     }
3476     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3477 
3478     /* read in my part of the matrix column indices  */
3479     nz     = procsnz[0];
3480     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3481     mycols = ibuf;
3482     if (size == 1)  nz -= extra_rows;
3483     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3484     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
3485 
3486     /* read in every ones (except the last) and ship off */
3487     for (i=1; i<size-1; i++) {
3488       nz   = procsnz[i];
3489       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3490       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3491     }
3492     /* read in the stuff for the last proc */
3493     if (size != 1) {
3494       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3495       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3496       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3497       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3498     }
3499     ierr = PetscFree(cols);CHKERRQ(ierr);
3500   } else {
3501     /* determine buffer space needed for message */
3502     nz = 0;
3503     for (i=0; i<m; i++) {
3504       nz += locrowlens[i];
3505     }
3506     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3507     mycols = ibuf;
3508     /* receive message of column indices*/
3509     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3510     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3511     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3512   }
3513 
3514   /* loop over local rows, determining number of off diagonal entries */
3515   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
3516   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
3517   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3518   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3519   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3520   rowcount = 0; nzcount = 0;
3521   for (i=0; i<mbs; i++) {
3522     dcount  = 0;
3523     odcount = 0;
3524     for (j=0; j<bs; j++) {
3525       kmax = locrowlens[rowcount];
3526       for (k=0; k<kmax; k++) {
3527         tmp = mycols[nzcount++]/bs;
3528         if (!mask[tmp]) {
3529           mask[tmp] = 1;
3530           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3531           else masked1[dcount++] = tmp;
3532         }
3533       }
3534       rowcount++;
3535     }
3536 
3537     dlens[i]  = dcount;
3538     odlens[i] = odcount;
3539 
3540     /* zero out the mask elements we set */
3541     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3542     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3543   }
3544 
3545   /* create our matrix */
3546   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
3547   ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3548   ierr = MatSetType(A,type);CHKERRQ(ierr)
3549   ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3550 
3551   if (!rank) {
3552     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3553     /* read in my part of the matrix numerical values  */
3554     nz = procsnz[0];
3555     vals = buf;
3556     mycols = ibuf;
3557     if (size == 1)  nz -= extra_rows;
3558     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3559     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
3560 
3561     /* insert into matrix */
3562     jj      = rstart*bs;
3563     for (i=0; i<m; i++) {
3564       ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3565       mycols += locrowlens[i];
3566       vals   += locrowlens[i];
3567       jj++;
3568     }
3569     /* read in other processors (except the last one) and ship out */
3570     for (i=1; i<size-1; i++) {
3571       nz   = procsnz[i];
3572       vals = buf;
3573       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3574       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
3575     }
3576     /* the last proc */
3577     if (size != 1){
3578       nz   = procsnz[i] - extra_rows;
3579       vals = buf;
3580       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3581       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3582       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
3583     }
3584     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3585   } else {
3586     /* receive numeric values */
3587     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3588 
3589     /* receive message of values*/
3590     vals   = buf;
3591     mycols = ibuf;
3592     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
3593     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
3594     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3595 
3596     /* insert into matrix */
3597     jj      = rstart*bs;
3598     for (i=0; i<m; i++) {
3599       ierr    = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3600       mycols += locrowlens[i];
3601       vals   += locrowlens[i];
3602       jj++;
3603     }
3604   }
3605   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3606   ierr = PetscFree(buf);CHKERRQ(ierr);
3607   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3608   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3609   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3610   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3611   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3612   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3613 
3614   *newmat = A;
3615   PetscFunctionReturn(0);
3616 }
3617 
3618 #undef __FUNCT__
3619 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3620 /*@
3621    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3622 
3623    Input Parameters:
3624 .  mat  - the matrix
3625 .  fact - factor
3626 
3627    Collective on Mat
3628 
3629    Level: advanced
3630 
3631   Notes:
3632    This can also be set by the command line option: -mat_use_hash_table <fact>
3633 
3634 .keywords: matrix, hashtable, factor, HT
3635 
3636 .seealso: MatSetOption()
3637 @*/
3638 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3639 {
3640   PetscErrorCode ierr,(*f)(Mat,PetscReal);
3641 
3642   PetscFunctionBegin;
3643   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
3644   if (f) {
3645     ierr = (*f)(mat,fact);CHKERRQ(ierr);
3646   }
3647   PetscFunctionReturn(0);
3648 }
3649 
3650 EXTERN_C_BEGIN
3651 #undef __FUNCT__
3652 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3653 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3654 {
3655   Mat_MPIBAIJ *baij;
3656 
3657   PetscFunctionBegin;
3658   baij = (Mat_MPIBAIJ*)mat->data;
3659   baij->ht_fact = fact;
3660   PetscFunctionReturn(0);
3661 }
3662 EXTERN_C_END
3663 
3664 #undef __FUNCT__
3665 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3666 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3667 {
3668   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3669   PetscFunctionBegin;
3670   *Ad     = a->A;
3671   *Ao     = a->B;
3672   *colmap = a->garray;
3673   PetscFunctionReturn(0);
3674 }
3675 
3676 /*
3677     Special version for direct calls from Fortran (to eliminate two function call overheads
3678 */
3679 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3680 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3681 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3682 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3683 #endif
3684 
3685 #undef __FUNCT__
3686 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3687 /*@C
3688   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3689 
3690   Collective on Mat
3691 
3692   Input Parameters:
3693 + mat - the matrix
3694 . min - number of input rows
3695 . im - input rows
3696 . nin - number of input columns
3697 . in - input columns
3698 . v - numerical values input
3699 - addvin - INSERT_VALUES or ADD_VALUES
3700 
3701   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3702 
3703   Level: advanced
3704 
3705 .seealso:   MatSetValuesBlocked()
3706 @*/
3707 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3708 {
3709   /* convert input arguments to C version */
3710   Mat             mat = *matin;
3711   PetscInt        m = *min, n = *nin;
3712   InsertMode      addv = *addvin;
3713 
3714   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3715   const MatScalar *value;
3716   MatScalar       *barray=baij->barray;
3717   PetscTruth      roworiented = baij->roworiented;
3718   PetscErrorCode  ierr;
3719   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3720   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3721   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3722 
3723   PetscFunctionBegin;
3724   /* tasks normally handled by MatSetValuesBlocked() */
3725   if (mat->insertmode == NOT_SET_VALUES) {
3726     mat->insertmode = addv;
3727   }
3728 #if defined(PETSC_USE_DEBUG)
3729   else if (mat->insertmode != addv) {
3730     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3731   }
3732   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3733 #endif
3734   if (mat->assembled) {
3735     mat->was_assembled = PETSC_TRUE;
3736     mat->assembled     = PETSC_FALSE;
3737   }
3738   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3739 
3740 
3741   if(!barray) {
3742     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
3743     baij->barray = barray;
3744   }
3745 
3746   if (roworiented) {
3747     stepval = (n-1)*bs;
3748   } else {
3749     stepval = (m-1)*bs;
3750   }
3751   for (i=0; i<m; i++) {
3752     if (im[i] < 0) continue;
3753 #if defined(PETSC_USE_DEBUG)
3754     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3755 #endif
3756     if (im[i] >= rstart && im[i] < rend) {
3757       row = im[i] - rstart;
3758       for (j=0; j<n; j++) {
3759         /* If NumCol = 1 then a copy is not required */
3760         if ((roworiented) && (n == 1)) {
3761           barray = (MatScalar*)v + i*bs2;
3762         } else if((!roworiented) && (m == 1)) {
3763           barray = (MatScalar*)v + j*bs2;
3764         } else { /* Here a copy is required */
3765           if (roworiented) {
3766             value = v + i*(stepval+bs)*bs + j*bs;
3767           } else {
3768             value = v + j*(stepval+bs)*bs + i*bs;
3769           }
3770           for (ii=0; ii<bs; ii++,value+=stepval) {
3771             for (jj=0; jj<bs; jj++) {
3772               *barray++  = *value++;
3773             }
3774           }
3775           barray -=bs2;
3776         }
3777 
3778         if (in[j] >= cstart && in[j] < cend){
3779           col  = in[j] - cstart;
3780           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3781         }
3782         else if (in[j] < 0) continue;
3783 #if defined(PETSC_USE_DEBUG)
3784         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
3785 #endif
3786         else {
3787           if (mat->was_assembled) {
3788             if (!baij->colmap) {
3789               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3790             }
3791 
3792 #if defined(PETSC_USE_DEBUG)
3793 #if defined (PETSC_USE_CTABLE)
3794             { PetscInt data;
3795               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3796               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
3797             }
3798 #else
3799             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
3800 #endif
3801 #endif
3802 #if defined (PETSC_USE_CTABLE)
3803 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3804             col  = (col - 1)/bs;
3805 #else
3806             col = (baij->colmap[in[j]] - 1)/bs;
3807 #endif
3808             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3809               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3810               col =  in[j];
3811             }
3812           }
3813           else col = in[j];
3814           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3815         }
3816       }
3817     } else {
3818       if (!baij->donotstash) {
3819         if (roworiented) {
3820           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3821         } else {
3822           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3823         }
3824       }
3825     }
3826   }
3827 
3828   /* task normally handled by MatSetValuesBlocked() */
3829   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3830   PetscFunctionReturn(0);
3831 }
3832