xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 753ebd1d069d7a4e86ed47ccb83df941bd1fb5db)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4 
5 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
7 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
8 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
9 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
10 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
11 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
12 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
14 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar);
15 
16 #undef __FUNCT__
17 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
18 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
19 {
20   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
21   PetscErrorCode ierr;
22   PetscInt       i,*idxb = 0;
23   PetscScalar    *va,*vb;
24   Vec            vtmp;
25 
26   PetscFunctionBegin;
27   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
28   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
29   if (idx) {
30     for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;}
31   }
32 
33   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
34   if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
35   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
36   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
37 
38   for (i=0; i<A->rmap->n; i++){
39     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);}
40   }
41 
42   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
43   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
44   if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);}
45   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
46   PetscFunctionReturn(0);
47 }
48 
49 EXTERN_C_BEGIN
50 #undef __FUNCT__
51 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
52 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat)
53 {
54   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
55   PetscErrorCode ierr;
56 
57   PetscFunctionBegin;
58   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
59   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
60   PetscFunctionReturn(0);
61 }
62 EXTERN_C_END
63 
64 EXTERN_C_BEGIN
65 #undef __FUNCT__
66 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
67 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat)
68 {
69   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
70   PetscErrorCode ierr;
71 
72   PetscFunctionBegin;
73   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
74   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
75   PetscFunctionReturn(0);
76 }
77 EXTERN_C_END
78 
79 /*
80      Local utility routine that creates a mapping from the global column
81    number to the local number in the off-diagonal part of the local
82    storage of the matrix.  This is done in a non scable way since the
83    length of colmap equals the global matrix length.
84 */
85 #undef __FUNCT__
86 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
87 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
88 {
89   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
90   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
91   PetscErrorCode ierr;
92   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
93 
94   PetscFunctionBegin;
95 #if defined (PETSC_USE_CTABLE)
96   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
97   for (i=0; i<nbs; i++){
98     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
99   }
100 #else
101   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
102   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
103   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
104   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
105 #endif
106   PetscFunctionReturn(0);
107 }
108 
109 #define CHUNKSIZE  10
110 
111 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
112 { \
113  \
114     brow = row/bs;  \
115     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
116     rmax = aimax[brow]; nrow = ailen[brow]; \
117       bcol = col/bs; \
118       ridx = row % bs; cidx = col % bs; \
119       low = 0; high = nrow; \
120       while (high-low > 3) { \
121         t = (low+high)/2; \
122         if (rp[t] > bcol) high = t; \
123         else              low  = t; \
124       } \
125       for (_i=low; _i<high; _i++) { \
126         if (rp[_i] > bcol) break; \
127         if (rp[_i] == bcol) { \
128           bap  = ap +  bs2*_i + bs*cidx + ridx; \
129           if (addv == ADD_VALUES) *bap += value;  \
130           else                    *bap  = value;  \
131           goto a_noinsert; \
132         } \
133       } \
134       if (a->nonew == 1) goto a_noinsert; \
135       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
136       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
137       N = nrow++ - 1;  \
138       /* shift up all the later entries in this row */ \
139       for (ii=N; ii>=_i; ii--) { \
140         rp[ii+1] = rp[ii]; \
141         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
142       } \
143       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
144       rp[_i]                      = bcol;  \
145       ap[bs2*_i + bs*cidx + ridx] = value;  \
146       a_noinsert:; \
147     ailen[brow] = nrow; \
148 }
149 
150 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
151 { \
152     brow = row/bs;  \
153     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
154     rmax = bimax[brow]; nrow = bilen[brow]; \
155       bcol = col/bs; \
156       ridx = row % bs; cidx = col % bs; \
157       low = 0; high = nrow; \
158       while (high-low > 3) { \
159         t = (low+high)/2; \
160         if (rp[t] > bcol) high = t; \
161         else              low  = t; \
162       } \
163       for (_i=low; _i<high; _i++) { \
164         if (rp[_i] > bcol) break; \
165         if (rp[_i] == bcol) { \
166           bap  = ap +  bs2*_i + bs*cidx + ridx; \
167           if (addv == ADD_VALUES) *bap += value;  \
168           else                    *bap  = value;  \
169           goto b_noinsert; \
170         } \
171       } \
172       if (b->nonew == 1) goto b_noinsert; \
173       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
174       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
175       CHKMEMQ;\
176       N = nrow++ - 1;  \
177       /* shift up all the later entries in this row */ \
178       for (ii=N; ii>=_i; ii--) { \
179         rp[ii+1] = rp[ii]; \
180         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
181       } \
182       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
183       rp[_i]                      = bcol;  \
184       ap[bs2*_i + bs*cidx + ridx] = value;  \
185       b_noinsert:; \
186     bilen[brow] = nrow; \
187 }
188 
189 #undef __FUNCT__
190 #define __FUNCT__ "MatSetValues_MPIBAIJ"
191 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
192 {
193   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
194   MatScalar      value;
195   PetscTruth     roworiented = baij->roworiented;
196   PetscErrorCode ierr;
197   PetscInt       i,j,row,col;
198   PetscInt       rstart_orig=mat->rmap->rstart;
199   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
200   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
201 
202   /* Some Variables required in the macro */
203   Mat            A = baij->A;
204   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
205   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
206   MatScalar      *aa=a->a;
207 
208   Mat            B = baij->B;
209   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
210   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
211   MatScalar      *ba=b->a;
212 
213   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
214   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
215   MatScalar      *ap,*bap;
216 
217   PetscFunctionBegin;
218   if (v) PetscValidScalarPointer(v,6);
219   for (i=0; i<m; i++) {
220     if (im[i] < 0) continue;
221 #if defined(PETSC_USE_DEBUG)
222     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
223 #endif
224     if (im[i] >= rstart_orig && im[i] < rend_orig) {
225       row = im[i] - rstart_orig;
226       for (j=0; j<n; j++) {
227         if (in[j] >= cstart_orig && in[j] < cend_orig){
228           col = in[j] - cstart_orig;
229           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
230           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         } else if (in[j] < 0) continue;
233 #if defined(PETSC_USE_DEBUG)
234         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap->N-1);}
235 #endif
236         else {
237           if (mat->was_assembled) {
238             if (!baij->colmap) {
239               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
240             }
241 #if defined (PETSC_USE_CTABLE)
242             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
243             col  = col - 1;
244 #else
245             col = baij->colmap[in[j]/bs] - 1;
246 #endif
247             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
248               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
249               col =  in[j];
250               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
251               B = baij->B;
252               b = (Mat_SeqBAIJ*)(B)->data;
253               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
254               ba=b->a;
255             } else col += in[j]%bs;
256           } else col = in[j];
257           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
258           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
259           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
260         }
261       }
262     } else {
263       if (!baij->donotstash) {
264         if (roworiented) {
265           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
266         } else {
267           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
268         }
269       }
270     }
271   }
272   PetscFunctionReturn(0);
273 }
274 
275 #undef __FUNCT__
276 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
277 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
278 {
279   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
280   const PetscScalar *value;
281   MatScalar         *barray=baij->barray;
282   PetscTruth        roworiented = baij->roworiented;
283   PetscErrorCode    ierr;
284   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
285   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
286   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
287 
288   PetscFunctionBegin;
289   if(!barray) {
290     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
291     baij->barray = barray;
292   }
293 
294   if (roworiented) {
295     stepval = (n-1)*bs;
296   } else {
297     stepval = (m-1)*bs;
298   }
299   for (i=0; i<m; i++) {
300     if (im[i] < 0) continue;
301 #if defined(PETSC_USE_DEBUG)
302     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
303 #endif
304     if (im[i] >= rstart && im[i] < rend) {
305       row = im[i] - rstart;
306       for (j=0; j<n; j++) {
307         /* If NumCol = 1 then a copy is not required */
308         if ((roworiented) && (n == 1)) {
309           barray = (MatScalar*)v + i*bs2;
310         } else if((!roworiented) && (m == 1)) {
311           barray = (MatScalar*)v + j*bs2;
312         } else { /* Here a copy is required */
313           if (roworiented) {
314             value = v + i*(stepval+bs)*bs + j*bs;
315           } else {
316             value = v + j*(stepval+bs)*bs + i*bs;
317           }
318           for (ii=0; ii<bs; ii++,value+=stepval) {
319             for (jj=0; jj<bs; jj++) {
320               *barray++  = *value++;
321             }
322           }
323           barray -=bs2;
324         }
325 
326         if (in[j] >= cstart && in[j] < cend){
327           col  = in[j] - cstart;
328           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
329         }
330         else if (in[j] < 0) continue;
331 #if defined(PETSC_USE_DEBUG)
332         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
333 #endif
334         else {
335           if (mat->was_assembled) {
336             if (!baij->colmap) {
337               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
338             }
339 
340 #if defined(PETSC_USE_DEBUG)
341 #if defined (PETSC_USE_CTABLE)
342             { PetscInt data;
343               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
344               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
345             }
346 #else
347             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
348 #endif
349 #endif
350 #if defined (PETSC_USE_CTABLE)
351 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
352             col  = (col - 1)/bs;
353 #else
354             col = (baij->colmap[in[j]] - 1)/bs;
355 #endif
356             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
357               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
358               col =  in[j];
359             }
360           }
361           else col = in[j];
362           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
363         }
364       }
365     } else {
366       if (!baij->donotstash) {
367         if (roworiented) {
368           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
369         } else {
370           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
371         }
372       }
373     }
374   }
375   PetscFunctionReturn(0);
376 }
377 
378 #define HASH_KEY 0.6180339887
379 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
380 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
381 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
382 #undef __FUNCT__
383 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
384 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
385 {
386   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
387   PetscTruth     roworiented = baij->roworiented;
388   PetscErrorCode ierr;
389   PetscInt       i,j,row,col;
390   PetscInt       rstart_orig=mat->rmap->rstart;
391   PetscInt       rend_orig=mat->rmap->rend,Nbs=baij->Nbs;
392   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
393   PetscReal      tmp;
394   MatScalar      **HD = baij->hd,value;
395 #if defined(PETSC_USE_DEBUG)
396   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
397 #endif
398 
399   PetscFunctionBegin;
400   if (v) PetscValidScalarPointer(v,6);
401   for (i=0; i<m; i++) {
402 #if defined(PETSC_USE_DEBUG)
403     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
404     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
405 #endif
406       row = im[i];
407     if (row >= rstart_orig && row < rend_orig) {
408       for (j=0; j<n; j++) {
409         col = in[j];
410         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
411         /* Look up PetscInto the Hash Table */
412         key = (row/bs)*Nbs+(col/bs)+1;
413         h1  = HASH(size,key,tmp);
414 
415 
416         idx = h1;
417 #if defined(PETSC_USE_DEBUG)
418         insert_ct++;
419         total_ct++;
420         if (HT[idx] != key) {
421           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
422           if (idx == size) {
423             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
424             if (idx == h1) {
425               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
426             }
427           }
428         }
429 #else
430         if (HT[idx] != key) {
431           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
432           if (idx == size) {
433             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
434             if (idx == h1) {
435               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
436             }
437           }
438         }
439 #endif
440         /* A HASH table entry is found, so insert the values at the correct address */
441         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
442         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
443       }
444     } else {
445       if (!baij->donotstash) {
446         if (roworiented) {
447           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
448         } else {
449           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
450         }
451       }
452     }
453   }
454 #if defined(PETSC_USE_DEBUG)
455   baij->ht_total_ct = total_ct;
456   baij->ht_insert_ct = insert_ct;
457 #endif
458   PetscFunctionReturn(0);
459 }
460 
461 #undef __FUNCT__
462 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
463 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
464 {
465   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
466   PetscTruth        roworiented = baij->roworiented;
467   PetscErrorCode    ierr;
468   PetscInt          i,j,ii,jj,row,col;
469   PetscInt          rstart=baij->rstartbs;
470   PetscInt          rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
471   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
472   PetscReal         tmp;
473   MatScalar         **HD = baij->hd,*baij_a;
474   const PetscScalar *v_t,*value;
475 #if defined(PETSC_USE_DEBUG)
476   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
477 #endif
478 
479   PetscFunctionBegin;
480 
481   if (roworiented) {
482     stepval = (n-1)*bs;
483   } else {
484     stepval = (m-1)*bs;
485   }
486   for (i=0; i<m; i++) {
487 #if defined(PETSC_USE_DEBUG)
488     if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
489     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
490 #endif
491     row   = im[i];
492     v_t   = v + i*nbs2;
493     if (row >= rstart && row < rend) {
494       for (j=0; j<n; j++) {
495         col = in[j];
496 
497         /* Look up into the Hash Table */
498         key = row*Nbs+col+1;
499         h1  = HASH(size,key,tmp);
500 
501         idx = h1;
502 #if defined(PETSC_USE_DEBUG)
503         total_ct++;
504         insert_ct++;
505        if (HT[idx] != key) {
506           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
507           if (idx == size) {
508             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
509             if (idx == h1) {
510               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
511             }
512           }
513         }
514 #else
515         if (HT[idx] != key) {
516           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
517           if (idx == size) {
518             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
519             if (idx == h1) {
520               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
521             }
522           }
523         }
524 #endif
525         baij_a = HD[idx];
526         if (roworiented) {
527           /*value = v + i*(stepval+bs)*bs + j*bs;*/
528           /* value = v + (i*(stepval+bs)+j)*bs; */
529           value = v_t;
530           v_t  += bs;
531           if (addv == ADD_VALUES) {
532             for (ii=0; ii<bs; ii++,value+=stepval) {
533               for (jj=ii; jj<bs2; jj+=bs) {
534                 baij_a[jj]  += *value++;
535               }
536             }
537           } else {
538             for (ii=0; ii<bs; ii++,value+=stepval) {
539               for (jj=ii; jj<bs2; jj+=bs) {
540                 baij_a[jj]  = *value++;
541               }
542             }
543           }
544         } else {
545           value = v + j*(stepval+bs)*bs + i*bs;
546           if (addv == ADD_VALUES) {
547             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
548               for (jj=0; jj<bs; jj++) {
549                 baij_a[jj]  += *value++;
550               }
551             }
552           } else {
553             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
554               for (jj=0; jj<bs; jj++) {
555                 baij_a[jj]  = *value++;
556               }
557             }
558           }
559         }
560       }
561     } else {
562       if (!baij->donotstash) {
563         if (roworiented) {
564           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
565         } else {
566           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
567         }
568       }
569     }
570   }
571 #if defined(PETSC_USE_DEBUG)
572   baij->ht_total_ct = total_ct;
573   baij->ht_insert_ct = insert_ct;
574 #endif
575   PetscFunctionReturn(0);
576 }
577 
578 #undef __FUNCT__
579 #define __FUNCT__ "MatGetValues_MPIBAIJ"
580 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
581 {
582   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
583   PetscErrorCode ierr;
584   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
585   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
586 
587   PetscFunctionBegin;
588   for (i=0; i<m; i++) {
589     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
590     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
591     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
592       row = idxm[i] - bsrstart;
593       for (j=0; j<n; j++) {
594         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
595         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
596         if (idxn[j] >= bscstart && idxn[j] < bscend){
597           col = idxn[j] - bscstart;
598           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
599         } else {
600           if (!baij->colmap) {
601             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
602           }
603 #if defined (PETSC_USE_CTABLE)
604           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
605           data --;
606 #else
607           data = baij->colmap[idxn[j]/bs]-1;
608 #endif
609           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
610           else {
611             col  = data + idxn[j]%bs;
612             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
613           }
614         }
615       }
616     } else {
617       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
618     }
619   }
620  PetscFunctionReturn(0);
621 }
622 
623 #undef __FUNCT__
624 #define __FUNCT__ "MatNorm_MPIBAIJ"
625 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
626 {
627   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
628   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
629   PetscErrorCode ierr;
630   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
631   PetscReal      sum = 0.0;
632   MatScalar      *v;
633 
634   PetscFunctionBegin;
635   if (baij->size == 1) {
636     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
637   } else {
638     if (type == NORM_FROBENIUS) {
639       v = amat->a;
640       nz = amat->nz*bs2;
641       for (i=0; i<nz; i++) {
642 #if defined(PETSC_USE_COMPLEX)
643         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
644 #else
645         sum += (*v)*(*v); v++;
646 #endif
647       }
648       v = bmat->a;
649       nz = bmat->nz*bs2;
650       for (i=0; i<nz; i++) {
651 #if defined(PETSC_USE_COMPLEX)
652         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
653 #else
654         sum += (*v)*(*v); v++;
655 #endif
656       }
657       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
658       *nrm = sqrt(*nrm);
659     } else if (type == NORM_1) { /* max column sum */
660       PetscReal *tmp,*tmp2;
661       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
662       ierr = PetscMalloc((2*mat->cmap->N+1)*sizeof(PetscReal),&tmp);CHKERRQ(ierr);
663       tmp2 = tmp + mat->cmap->N;
664       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
665       v = amat->a; jj = amat->j;
666       for (i=0; i<amat->nz; i++) {
667         for (j=0; j<bs; j++){
668           col = bs*(cstart + *jj) + j; /* column index */
669           for (row=0; row<bs; row++){
670             tmp[col] += PetscAbsScalar(*v);  v++;
671           }
672         }
673         jj++;
674       }
675       v = bmat->a; jj = bmat->j;
676       for (i=0; i<bmat->nz; i++) {
677         for (j=0; j<bs; j++){
678           col = bs*garray[*jj] + j;
679           for (row=0; row<bs; row++){
680             tmp[col] += PetscAbsScalar(*v); v++;
681           }
682         }
683         jj++;
684       }
685       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
686       *nrm = 0.0;
687       for (j=0; j<mat->cmap->N; j++) {
688         if (tmp2[j] > *nrm) *nrm = tmp2[j];
689       }
690       ierr = PetscFree(tmp);CHKERRQ(ierr);
691     } else if (type == NORM_INFINITY) { /* max row sum */
692       PetscReal *sums;
693       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr)
694       sum = 0.0;
695       for (j=0; j<amat->mbs; j++) {
696         for (row=0; row<bs; row++) sums[row] = 0.0;
697         v = amat->a + bs2*amat->i[j];
698         nz = amat->i[j+1]-amat->i[j];
699         for (i=0; i<nz; i++) {
700           for (col=0; col<bs; col++){
701             for (row=0; row<bs; row++){
702               sums[row] += PetscAbsScalar(*v); v++;
703             }
704           }
705         }
706         v = bmat->a + bs2*bmat->i[j];
707         nz = bmat->i[j+1]-bmat->i[j];
708         for (i=0; i<nz; i++) {
709           for (col=0; col<bs; col++){
710             for (row=0; row<bs; row++){
711               sums[row] += PetscAbsScalar(*v); v++;
712             }
713           }
714         }
715         for (row=0; row<bs; row++){
716           if (sums[row] > sum) sum = sums[row];
717         }
718       }
719       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
720       ierr = PetscFree(sums);CHKERRQ(ierr);
721     } else {
722       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
723     }
724   }
725   PetscFunctionReturn(0);
726 }
727 
728 /*
729   Creates the hash table, and sets the table
730   This table is created only once.
731   If new entried need to be added to the matrix
732   then the hash table has to be destroyed and
733   recreated.
734 */
735 #undef __FUNCT__
736 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
737 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
738 {
739   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
740   Mat            A = baij->A,B=baij->B;
741   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
742   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
743   PetscErrorCode ierr;
744   PetscInt       size,bs2=baij->bs2,rstart=baij->rstartbs;
745   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
746   PetscInt       *HT,key;
747   MatScalar      **HD;
748   PetscReal      tmp;
749 #if defined(PETSC_USE_INFO)
750   PetscInt       ct=0,max=0;
751 #endif
752 
753   PetscFunctionBegin;
754   baij->ht_size=(PetscInt)(factor*nz);
755   size = baij->ht_size;
756 
757   if (baij->ht) {
758     PetscFunctionReturn(0);
759   }
760 
761   /* Allocate Memory for Hash Table */
762   ierr     = PetscMalloc((size)*(sizeof(PetscInt)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr);
763   baij->ht = (PetscInt*)(baij->hd + size);
764   HD       = baij->hd;
765   HT       = baij->ht;
766 
767 
768   ierr = PetscMemzero(HD,size*(sizeof(PetscInt)+sizeof(PetscScalar*)));CHKERRQ(ierr);
769 
770 
771   /* Loop Over A */
772   for (i=0; i<a->mbs; i++) {
773     for (j=ai[i]; j<ai[i+1]; j++) {
774       row = i+rstart;
775       col = aj[j]+cstart;
776 
777       key = row*Nbs + col + 1;
778       h1  = HASH(size,key,tmp);
779       for (k=0; k<size; k++){
780         if (!HT[(h1+k)%size]) {
781           HT[(h1+k)%size] = key;
782           HD[(h1+k)%size] = a->a + j*bs2;
783           break;
784 #if defined(PETSC_USE_INFO)
785         } else {
786           ct++;
787 #endif
788         }
789       }
790 #if defined(PETSC_USE_INFO)
791       if (k> max) max = k;
792 #endif
793     }
794   }
795   /* Loop Over B */
796   for (i=0; i<b->mbs; i++) {
797     for (j=bi[i]; j<bi[i+1]; j++) {
798       row = i+rstart;
799       col = garray[bj[j]];
800       key = row*Nbs + col + 1;
801       h1  = HASH(size,key,tmp);
802       for (k=0; k<size; k++){
803         if (!HT[(h1+k)%size]) {
804           HT[(h1+k)%size] = key;
805           HD[(h1+k)%size] = b->a + j*bs2;
806           break;
807 #if defined(PETSC_USE_INFO)
808         } else {
809           ct++;
810 #endif
811         }
812       }
813 #if defined(PETSC_USE_INFO)
814       if (k> max) max = k;
815 #endif
816     }
817   }
818 
819   /* Print Summary */
820 #if defined(PETSC_USE_INFO)
821   for (i=0,j=0; i<size; i++) {
822     if (HT[i]) {j++;}
823   }
824   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
825 #endif
826   PetscFunctionReturn(0);
827 }
828 
829 #undef __FUNCT__
830 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
831 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
832 {
833   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
834   PetscErrorCode ierr;
835   PetscInt       nstash,reallocs;
836   InsertMode     addv;
837 
838   PetscFunctionBegin;
839   if (baij->donotstash) {
840     PetscFunctionReturn(0);
841   }
842 
843   /* make sure all processors are either in INSERTMODE or ADDMODE */
844   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
845   if (addv == (ADD_VALUES|INSERT_VALUES)) {
846     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
847   }
848   mat->insertmode = addv; /* in case this processor had no cache */
849 
850   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
851   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
852   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
853   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
854   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
855   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
856   PetscFunctionReturn(0);
857 }
858 
859 #undef __FUNCT__
860 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
861 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
862 {
863   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
864   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
865   PetscErrorCode ierr;
866   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
867   PetscInt       *row,*col;
868   PetscTruth     r1,r2,r3,other_disassembled;
869   MatScalar      *val;
870   InsertMode     addv = mat->insertmode;
871   PetscMPIInt    n;
872 
873   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
874   PetscFunctionBegin;
875   if (!baij->donotstash) {
876     while (1) {
877       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
878       if (!flg) break;
879 
880       for (i=0; i<n;) {
881         /* Now identify the consecutive vals belonging to the same row */
882         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
883         if (j < n) ncols = j-i;
884         else       ncols = n-i;
885         /* Now assemble all these values with a single function call */
886         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
887         i = j;
888       }
889     }
890     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
891     /* Now process the block-stash. Since the values are stashed column-oriented,
892        set the roworiented flag to column oriented, and after MatSetValues()
893        restore the original flags */
894     r1 = baij->roworiented;
895     r2 = a->roworiented;
896     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
897     baij->roworiented = PETSC_FALSE;
898     a->roworiented    = PETSC_FALSE;
899     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
900     while (1) {
901       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
902       if (!flg) break;
903 
904       for (i=0; i<n;) {
905         /* Now identify the consecutive vals belonging to the same row */
906         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
907         if (j < n) ncols = j-i;
908         else       ncols = n-i;
909         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
910         i = j;
911       }
912     }
913     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
914     baij->roworiented = r1;
915     a->roworiented    = r2;
916     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
917   }
918 
919   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
920   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
921 
922   /* determine if any processor has disassembled, if so we must
923      also disassemble ourselfs, in order that we may reassemble. */
924   /*
925      if nonzero structure of submatrix B cannot change then we know that
926      no processor disassembled thus we can skip this stuff
927   */
928   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
929     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
930     if (mat->was_assembled && !other_disassembled) {
931       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
932     }
933   }
934 
935   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
936     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
937   }
938   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
939   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
940   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
941 
942 #if defined(PETSC_USE_INFO)
943   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
944     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
945     baij->ht_total_ct  = 0;
946     baij->ht_insert_ct = 0;
947   }
948 #endif
949   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
950     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
951     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
952     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
953   }
954 
955   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
956   baij->rowvalues = 0;
957   PetscFunctionReturn(0);
958 }
959 
960 #undef __FUNCT__
961 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
962 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
963 {
964   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
965   PetscErrorCode    ierr;
966   PetscMPIInt       size = baij->size,rank = baij->rank;
967   PetscInt          bs = mat->rmap->bs;
968   PetscTruth        iascii,isdraw;
969   PetscViewer       sviewer;
970   PetscViewerFormat format;
971 
972   PetscFunctionBegin;
973   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
974   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
975   if (iascii) {
976     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
977     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
978       MatInfo info;
979       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
980       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
981       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
982               rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
983               mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
984       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
985       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
986       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
987       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
988       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
989       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
990       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
991       PetscFunctionReturn(0);
992     } else if (format == PETSC_VIEWER_ASCII_INFO) {
993       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
994       PetscFunctionReturn(0);
995     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
996       PetscFunctionReturn(0);
997     }
998   }
999 
1000   if (isdraw) {
1001     PetscDraw       draw;
1002     PetscTruth isnull;
1003     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1004     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1005   }
1006 
1007   if (size == 1) {
1008     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1009     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1010   } else {
1011     /* assemble the entire matrix onto first processor. */
1012     Mat         A;
1013     Mat_SeqBAIJ *Aloc;
1014     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1015     MatScalar   *a;
1016 
1017     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1018     /* Perhaps this should be the type of mat? */
1019     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1020     if (!rank) {
1021       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1022     } else {
1023       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1024     }
1025     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1026     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1027     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1028 
1029     /* copy over the A part */
1030     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1031     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1032     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1033 
1034     for (i=0; i<mbs; i++) {
1035       rvals[0] = bs*(baij->rstartbs + i);
1036       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1037       for (j=ai[i]; j<ai[i+1]; j++) {
1038         col = (baij->cstartbs+aj[j])*bs;
1039         for (k=0; k<bs; k++) {
1040           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1041           col++; a += bs;
1042         }
1043       }
1044     }
1045     /* copy over the B part */
1046     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1047     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1048     for (i=0; i<mbs; i++) {
1049       rvals[0] = bs*(baij->rstartbs + i);
1050       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1051       for (j=ai[i]; j<ai[i+1]; j++) {
1052         col = baij->garray[aj[j]]*bs;
1053         for (k=0; k<bs; k++) {
1054           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1055           col++; a += bs;
1056         }
1057       }
1058     }
1059     ierr = PetscFree(rvals);CHKERRQ(ierr);
1060     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1061     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1062     /*
1063        Everyone has to call to draw the matrix since the graphics waits are
1064        synchronized across all processors that share the PetscDraw object
1065     */
1066     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1067     if (!rank) {
1068       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1069       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1070     }
1071     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1072     ierr = MatDestroy(A);CHKERRQ(ierr);
1073   }
1074   PetscFunctionReturn(0);
1075 }
1076 
1077 #undef __FUNCT__
1078 #define __FUNCT__ "MatView_MPIBAIJ"
1079 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1080 {
1081   PetscErrorCode ierr;
1082   PetscTruth     iascii,isdraw,issocket,isbinary;
1083 
1084   PetscFunctionBegin;
1085   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1086   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1087   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1088   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1089   if (iascii || isdraw || issocket || isbinary) {
1090     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1091   } else {
1092     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1093   }
1094   PetscFunctionReturn(0);
1095 }
1096 
1097 #undef __FUNCT__
1098 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1099 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1100 {
1101   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1102   PetscErrorCode ierr;
1103 
1104   PetscFunctionBegin;
1105 #if defined(PETSC_USE_LOG)
1106   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1107 #endif
1108   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1109   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1110   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1111   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1112 #if defined (PETSC_USE_CTABLE)
1113   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
1114 #else
1115   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1116 #endif
1117   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1118   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1119   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1120   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1121   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1122   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
1123   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1124   ierr = PetscFree(baij);CHKERRQ(ierr);
1125 
1126   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1127   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1128   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1129   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1130   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1131   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1132   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1133   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1134   PetscFunctionReturn(0);
1135 }
1136 
1137 #undef __FUNCT__
1138 #define __FUNCT__ "MatMult_MPIBAIJ"
1139 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1140 {
1141   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1142   PetscErrorCode ierr;
1143   PetscInt       nt;
1144 
1145   PetscFunctionBegin;
1146   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1147   if (nt != A->cmap->n) {
1148     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1149   }
1150   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1151   if (nt != A->rmap->n) {
1152     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1153   }
1154   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1155   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1156   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1157   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1158   PetscFunctionReturn(0);
1159 }
1160 
1161 #undef __FUNCT__
1162 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1163 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1164 {
1165   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1166   PetscErrorCode ierr;
1167 
1168   PetscFunctionBegin;
1169   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1170   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1171   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1172   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 #undef __FUNCT__
1177 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1178 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1179 {
1180   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1181   PetscErrorCode ierr;
1182   PetscTruth     merged;
1183 
1184   PetscFunctionBegin;
1185   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1186   /* do nondiagonal part */
1187   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1188   if (!merged) {
1189     /* send it on its way */
1190     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1191     /* do local part */
1192     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1193     /* receive remote parts: note this assumes the values are not actually */
1194     /* inserted in yy until the next line */
1195     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1196   } else {
1197     /* do local part */
1198     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1199     /* send it on its way */
1200     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1201     /* values actually were received in the Begin() but we need to call this nop */
1202     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1203   }
1204   PetscFunctionReturn(0);
1205 }
1206 
1207 #undef __FUNCT__
1208 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1209 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1210 {
1211   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1212   PetscErrorCode ierr;
1213 
1214   PetscFunctionBegin;
1215   /* do nondiagonal part */
1216   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1217   /* send it on its way */
1218   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1219   /* do local part */
1220   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1221   /* receive remote parts: note this assumes the values are not actually */
1222   /* inserted in yy until the next line, which is true for my implementation*/
1223   /* but is not perhaps always true. */
1224   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 /*
1229   This only works correctly for square matrices where the subblock A->A is the
1230    diagonal block
1231 */
1232 #undef __FUNCT__
1233 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1234 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1235 {
1236   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1237   PetscErrorCode ierr;
1238 
1239   PetscFunctionBegin;
1240   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1241   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1242   PetscFunctionReturn(0);
1243 }
1244 
1245 #undef __FUNCT__
1246 #define __FUNCT__ "MatScale_MPIBAIJ"
1247 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1248 {
1249   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1250   PetscErrorCode ierr;
1251 
1252   PetscFunctionBegin;
1253   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1254   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #undef __FUNCT__
1259 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1260 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1261 {
1262   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1263   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1264   PetscErrorCode ierr;
1265   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1266   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1267   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1268 
1269   PetscFunctionBegin;
1270   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1271   mat->getrowactive = PETSC_TRUE;
1272 
1273   if (!mat->rowvalues && (idx || v)) {
1274     /*
1275         allocate enough space to hold information from the longest row.
1276     */
1277     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1278     PetscInt     max = 1,mbs = mat->mbs,tmp;
1279     for (i=0; i<mbs; i++) {
1280       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1281       if (max < tmp) { max = tmp; }
1282     }
1283     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1284     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1285   }
1286 
1287   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1288   lrow = row - brstart;
1289 
1290   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1291   if (!v)   {pvA = 0; pvB = 0;}
1292   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1293   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1294   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1295   nztot = nzA + nzB;
1296 
1297   cmap  = mat->garray;
1298   if (v  || idx) {
1299     if (nztot) {
1300       /* Sort by increasing column numbers, assuming A and B already sorted */
1301       PetscInt imark = -1;
1302       if (v) {
1303         *v = v_p = mat->rowvalues;
1304         for (i=0; i<nzB; i++) {
1305           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1306           else break;
1307         }
1308         imark = i;
1309         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1310         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1311       }
1312       if (idx) {
1313         *idx = idx_p = mat->rowindices;
1314         if (imark > -1) {
1315           for (i=0; i<imark; i++) {
1316             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1317           }
1318         } else {
1319           for (i=0; i<nzB; i++) {
1320             if (cmap[cworkB[i]/bs] < cstart)
1321               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1322             else break;
1323           }
1324           imark = i;
1325         }
1326         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1327         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1328       }
1329     } else {
1330       if (idx) *idx = 0;
1331       if (v)   *v   = 0;
1332     }
1333   }
1334   *nz = nztot;
1335   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1336   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1337   PetscFunctionReturn(0);
1338 }
1339 
1340 #undef __FUNCT__
1341 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1342 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1343 {
1344   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1345 
1346   PetscFunctionBegin;
1347   if (!baij->getrowactive) {
1348     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1349   }
1350   baij->getrowactive = PETSC_FALSE;
1351   PetscFunctionReturn(0);
1352 }
1353 
1354 #undef __FUNCT__
1355 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1356 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1357 {
1358   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1359   PetscErrorCode ierr;
1360 
1361   PetscFunctionBegin;
1362   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1363   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1364   PetscFunctionReturn(0);
1365 }
1366 
1367 #undef __FUNCT__
1368 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1369 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1370 {
1371   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1372   Mat            A = a->A,B = a->B;
1373   PetscErrorCode ierr;
1374   PetscReal      isend[5],irecv[5];
1375 
1376   PetscFunctionBegin;
1377   info->block_size     = (PetscReal)matin->rmap->bs;
1378   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1379   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1380   isend[3] = info->memory;  isend[4] = info->mallocs;
1381   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1382   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1383   isend[3] += info->memory;  isend[4] += info->mallocs;
1384   if (flag == MAT_LOCAL) {
1385     info->nz_used      = isend[0];
1386     info->nz_allocated = isend[1];
1387     info->nz_unneeded  = isend[2];
1388     info->memory       = isend[3];
1389     info->mallocs      = isend[4];
1390   } else if (flag == MAT_GLOBAL_MAX) {
1391     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1392     info->nz_used      = irecv[0];
1393     info->nz_allocated = irecv[1];
1394     info->nz_unneeded  = irecv[2];
1395     info->memory       = irecv[3];
1396     info->mallocs      = irecv[4];
1397   } else if (flag == MAT_GLOBAL_SUM) {
1398     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1399     info->nz_used      = irecv[0];
1400     info->nz_allocated = irecv[1];
1401     info->nz_unneeded  = irecv[2];
1402     info->memory       = irecv[3];
1403     info->mallocs      = irecv[4];
1404   } else {
1405     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1406   }
1407   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1408   info->fill_ratio_needed = 0;
1409   info->factor_mallocs    = 0;
1410   PetscFunctionReturn(0);
1411 }
1412 
1413 #undef __FUNCT__
1414 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1415 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscTruth flg)
1416 {
1417   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1418   PetscErrorCode ierr;
1419 
1420   PetscFunctionBegin;
1421   switch (op) {
1422   case MAT_NEW_NONZERO_LOCATIONS:
1423   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1424   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1425   case MAT_KEEP_NONZERO_PATTERN:
1426   case MAT_NEW_NONZERO_LOCATION_ERR:
1427     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1428     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1429     break;
1430   case MAT_ROW_ORIENTED:
1431     a->roworiented = flg;
1432     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1433     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1434     break;
1435   case MAT_NEW_DIAGONALS:
1436     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1437     break;
1438   case MAT_IGNORE_OFF_PROC_ENTRIES:
1439     a->donotstash = flg;
1440     break;
1441   case MAT_USE_HASH_TABLE:
1442     a->ht_flag = flg;
1443     break;
1444   case MAT_SYMMETRIC:
1445   case MAT_STRUCTURALLY_SYMMETRIC:
1446   case MAT_HERMITIAN:
1447   case MAT_SYMMETRY_ETERNAL:
1448     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1449     break;
1450   default:
1451     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1452   }
1453   PetscFunctionReturn(0);
1454 }
1455 
1456 #undef __FUNCT__
1457 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1458 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1459 {
1460   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1461   Mat_SeqBAIJ    *Aloc;
1462   Mat            B;
1463   PetscErrorCode ierr;
1464   PetscInt       M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1465   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1466   MatScalar      *a;
1467 
1468   PetscFunctionBegin;
1469   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1470   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1471     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1472     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1473     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1474     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1475   } else {
1476     B = *matout;
1477   }
1478 
1479   /* copy over the A part */
1480   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1481   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1482   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1483 
1484   for (i=0; i<mbs; i++) {
1485     rvals[0] = bs*(baij->rstartbs + i);
1486     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1487     for (j=ai[i]; j<ai[i+1]; j++) {
1488       col = (baij->cstartbs+aj[j])*bs;
1489       for (k=0; k<bs; k++) {
1490         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1491         col++; a += bs;
1492       }
1493     }
1494   }
1495   /* copy over the B part */
1496   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1497   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1498   for (i=0; i<mbs; i++) {
1499     rvals[0] = bs*(baij->rstartbs + i);
1500     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1501     for (j=ai[i]; j<ai[i+1]; j++) {
1502       col = baij->garray[aj[j]]*bs;
1503       for (k=0; k<bs; k++) {
1504         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1505         col++; a += bs;
1506       }
1507     }
1508   }
1509   ierr = PetscFree(rvals);CHKERRQ(ierr);
1510   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1511   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1512 
1513   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1514     *matout = B;
1515   } else {
1516     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1517   }
1518   PetscFunctionReturn(0);
1519 }
1520 
1521 #undef __FUNCT__
1522 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1523 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1524 {
1525   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1526   Mat            a = baij->A,b = baij->B;
1527   PetscErrorCode ierr;
1528   PetscInt       s1,s2,s3;
1529 
1530   PetscFunctionBegin;
1531   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1532   if (rr) {
1533     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1534     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1535     /* Overlap communication with computation. */
1536     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1537   }
1538   if (ll) {
1539     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1540     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1541     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1542   }
1543   /* scale  the diagonal block */
1544   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1545 
1546   if (rr) {
1547     /* Do a scatter end and then right scale the off-diagonal block */
1548     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1549     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1550   }
1551 
1552   PetscFunctionReturn(0);
1553 }
1554 
1555 #undef __FUNCT__
1556 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1557 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
1558 {
1559   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1560   PetscErrorCode ierr;
1561   PetscMPIInt    imdex,size = l->size,n,rank = l->rank;
1562   PetscInt       i,*owners = A->rmap->range;
1563   PetscInt       *nprocs,j,idx,nsends,row;
1564   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
1565   PetscInt       *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1566   PetscInt       *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1567   MPI_Comm       comm = ((PetscObject)A)->comm;
1568   MPI_Request    *send_waits,*recv_waits;
1569   MPI_Status     recv_status,*send_status;
1570 #if defined(PETSC_DEBUG)
1571   PetscTruth     found = PETSC_FALSE;
1572 #endif
1573 
1574   PetscFunctionBegin;
1575   /*  first count number of contributors to each processor */
1576   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1577   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1578   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1579   j = 0;
1580   for (i=0; i<N; i++) {
1581     if (lastidx > (idx = rows[i])) j = 0;
1582     lastidx = idx;
1583     for (; j<size; j++) {
1584       if (idx >= owners[j] && idx < owners[j+1]) {
1585         nprocs[2*j]++;
1586         nprocs[2*j+1] = 1;
1587         owner[i] = j;
1588 #if defined(PETSC_DEBUG)
1589         found = PETSC_TRUE;
1590 #endif
1591         break;
1592       }
1593     }
1594 #if defined(PETSC_DEBUG)
1595     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1596     found = PETSC_FALSE;
1597 #endif
1598   }
1599   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1600 
1601   /* inform other processors of number of messages and max length*/
1602   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1603 
1604   /* post receives:   */
1605   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1606   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1607   for (i=0; i<nrecvs; i++) {
1608     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1609   }
1610 
1611   /* do sends:
1612      1) starts[i] gives the starting index in svalues for stuff going to
1613      the ith processor
1614   */
1615   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1616   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1617   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1618   starts[0]  = 0;
1619   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1620   for (i=0; i<N; i++) {
1621     svalues[starts[owner[i]]++] = rows[i];
1622   }
1623 
1624   starts[0] = 0;
1625   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1626   count = 0;
1627   for (i=0; i<size; i++) {
1628     if (nprocs[2*i+1]) {
1629       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1630     }
1631   }
1632   ierr = PetscFree(starts);CHKERRQ(ierr);
1633 
1634   base = owners[rank];
1635 
1636   /*  wait on receives */
1637   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
1638   source = lens + nrecvs;
1639   count  = nrecvs; slen = 0;
1640   while (count) {
1641     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1642     /* unpack receives into our local space */
1643     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1644     source[imdex]  = recv_status.MPI_SOURCE;
1645     lens[imdex]    = n;
1646     slen          += n;
1647     count--;
1648   }
1649   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1650 
1651   /* move the data into the send scatter */
1652   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1653   count = 0;
1654   for (i=0; i<nrecvs; i++) {
1655     values = rvalues + i*nmax;
1656     for (j=0; j<lens[i]; j++) {
1657       lrows[count++] = values[j] - base;
1658     }
1659   }
1660   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1661   ierr = PetscFree(lens);CHKERRQ(ierr);
1662   ierr = PetscFree(owner);CHKERRQ(ierr);
1663   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1664 
1665   /* actually zap the local rows */
1666   /*
1667         Zero the required rows. If the "diagonal block" of the matrix
1668      is square and the user wishes to set the diagonal we use separate
1669      code so that MatSetValues() is not called for each diagonal allocating
1670      new memory, thus calling lots of mallocs and slowing things down.
1671 
1672   */
1673   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1674   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr);
1675   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1676     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr);
1677   } else if (diag != 0.0) {
1678     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1679     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1680       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1681 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1682     }
1683     for (i=0; i<slen; i++) {
1684       row  = lrows[i] + rstart_bs;
1685       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1686     }
1687     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1688     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1689   } else {
1690     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1691   }
1692 
1693   ierr = PetscFree(lrows);CHKERRQ(ierr);
1694 
1695   /* wait on sends */
1696   if (nsends) {
1697     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1698     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1699     ierr = PetscFree(send_status);CHKERRQ(ierr);
1700   }
1701   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1702   ierr = PetscFree(svalues);CHKERRQ(ierr);
1703 
1704   PetscFunctionReturn(0);
1705 }
1706 
1707 #undef __FUNCT__
1708 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1709 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1710 {
1711   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1712   PetscErrorCode ierr;
1713 
1714   PetscFunctionBegin;
1715   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1716   PetscFunctionReturn(0);
1717 }
1718 
1719 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1720 
1721 #undef __FUNCT__
1722 #define __FUNCT__ "MatEqual_MPIBAIJ"
1723 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1724 {
1725   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1726   Mat            a,b,c,d;
1727   PetscTruth     flg;
1728   PetscErrorCode ierr;
1729 
1730   PetscFunctionBegin;
1731   a = matA->A; b = matA->B;
1732   c = matB->A; d = matB->B;
1733 
1734   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1735   if (flg) {
1736     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1737   }
1738   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1739   PetscFunctionReturn(0);
1740 }
1741 
1742 #undef __FUNCT__
1743 #define __FUNCT__ "MatCopy_MPIBAIJ"
1744 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1745 {
1746   PetscErrorCode ierr;
1747   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1748   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1749 
1750   PetscFunctionBegin;
1751   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1752   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1753     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1754   } else {
1755     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1756     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1757   }
1758   PetscFunctionReturn(0);
1759 }
1760 
1761 #undef __FUNCT__
1762 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1763 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1764 {
1765   PetscErrorCode ierr;
1766 
1767   PetscFunctionBegin;
1768   ierr =  MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1769   PetscFunctionReturn(0);
1770 }
1771 
1772 #include "petscblaslapack.h"
1773 #undef __FUNCT__
1774 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1775 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1776 {
1777   PetscErrorCode ierr;
1778   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1779   PetscBLASInt   bnz,one=1;
1780   Mat_SeqBAIJ    *x,*y;
1781 
1782   PetscFunctionBegin;
1783   if (str == SAME_NONZERO_PATTERN) {
1784     PetscScalar alpha = a;
1785     x = (Mat_SeqBAIJ *)xx->A->data;
1786     y = (Mat_SeqBAIJ *)yy->A->data;
1787     bnz = PetscBLASIntCast(x->nz);
1788     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1789     x = (Mat_SeqBAIJ *)xx->B->data;
1790     y = (Mat_SeqBAIJ *)yy->B->data;
1791     bnz = PetscBLASIntCast(x->nz);
1792     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1793   } else {
1794     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1795   }
1796   PetscFunctionReturn(0);
1797 }
1798 
1799 #undef __FUNCT__
1800 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ"
1801 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs)
1802 {
1803   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1804   PetscInt rbs,cbs;
1805   PetscErrorCode ierr;
1806 
1807   PetscFunctionBegin;
1808   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1809   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1810   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
1811   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
1812   if (rbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
1813   if (cbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
1814   PetscFunctionReturn(0);
1815 }
1816 
1817 #undef __FUNCT__
1818 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1819 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1820 {
1821   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1822   PetscErrorCode ierr;
1823 
1824   PetscFunctionBegin;
1825   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1826   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1827   PetscFunctionReturn(0);
1828 }
1829 
1830 #undef __FUNCT__
1831 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
1832 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1833 {
1834   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1835   PetscErrorCode ierr;
1836 
1837   PetscFunctionBegin;
1838   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1839   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1840   PetscFunctionReturn(0);
1841 }
1842 
1843 #undef __FUNCT__
1844 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
1845 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1846 {
1847   PetscErrorCode ierr;
1848   IS             iscol_local;
1849   PetscInt       csize;
1850 
1851   PetscFunctionBegin;
1852   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1853   if (call == MAT_REUSE_MATRIX) {
1854     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1855     if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1856   } else {
1857     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1858   }
1859   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1860   if (call == MAT_INITIAL_MATRIX) {
1861     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1862     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
1863   }
1864   PetscFunctionReturn(0);
1865 }
1866 
1867 #undef __FUNCT__
1868 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
1869 /*
1870     Not great since it makes two copies of the submatrix, first an SeqBAIJ
1871   in local and then by concatenating the local matrices the end result.
1872   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
1873 */
1874 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
1875 {
1876   PetscErrorCode ierr;
1877   PetscMPIInt    rank,size;
1878   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
1879   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
1880   Mat            *local,M,Mreuse;
1881   MatScalar      *vwork,*aa;
1882   MPI_Comm       comm = ((PetscObject)mat)->comm;
1883   Mat_SeqBAIJ    *aij;
1884 
1885 
1886   PetscFunctionBegin;
1887   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1888   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1889 
1890   if (call ==  MAT_REUSE_MATRIX) {
1891     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
1892     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1893     local = &Mreuse;
1894     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
1895   } else {
1896     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
1897     Mreuse = *local;
1898     ierr   = PetscFree(local);CHKERRQ(ierr);
1899   }
1900 
1901   /*
1902       m - number of local rows
1903       n - number of columns (same on all processors)
1904       rstart - first row in new global matrix generated
1905   */
1906   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
1907   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1908   m    = m/bs;
1909   n    = n/bs;
1910 
1911   if (call == MAT_INITIAL_MATRIX) {
1912     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
1913     ii  = aij->i;
1914     jj  = aij->j;
1915 
1916     /*
1917         Determine the number of non-zeros in the diagonal and off-diagonal
1918         portions of the matrix in order to do correct preallocation
1919     */
1920 
1921     /* first get start and end of "diagonal" columns */
1922     if (csize == PETSC_DECIDE) {
1923       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
1924       if (mglobal == n*bs) { /* square matrix */
1925 	nlocal = m;
1926       } else {
1927         nlocal = n/size + ((n % size) > rank);
1928       }
1929     } else {
1930       nlocal = csize/bs;
1931     }
1932     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1933     rstart = rend - nlocal;
1934     if (rank == size - 1 && rend != n) {
1935       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
1936     }
1937 
1938     /* next, compute all the lengths */
1939     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
1940     olens = dlens + m;
1941     for (i=0; i<m; i++) {
1942       jend = ii[i+1] - ii[i];
1943       olen = 0;
1944       dlen = 0;
1945       for (j=0; j<jend; j++) {
1946         if (*jj < rstart || *jj >= rend) olen++;
1947         else dlen++;
1948         jj++;
1949       }
1950       olens[i] = olen;
1951       dlens[i] = dlen;
1952     }
1953     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
1954     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
1955     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
1956     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
1957     ierr = PetscFree(dlens);CHKERRQ(ierr);
1958   } else {
1959     PetscInt ml,nl;
1960 
1961     M = *newmat;
1962     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
1963     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
1964     ierr = MatZeroEntries(M);CHKERRQ(ierr);
1965     /*
1966          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
1967        rather than the slower MatSetValues().
1968     */
1969     M->was_assembled = PETSC_TRUE;
1970     M->assembled     = PETSC_FALSE;
1971   }
1972   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1973   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
1974   aij = (Mat_SeqBAIJ*)(Mreuse)->data;
1975   ii  = aij->i;
1976   jj  = aij->j;
1977   aa  = aij->a;
1978   for (i=0; i<m; i++) {
1979     row   = rstart/bs + i;
1980     nz    = ii[i+1] - ii[i];
1981     cwork = jj;     jj += nz;
1982     vwork = aa;     aa += nz;
1983     ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
1984   }
1985 
1986   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1987   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1988   *newmat = M;
1989 
1990   /* save submatrix used in processor for next request */
1991   if (call ==  MAT_INITIAL_MATRIX) {
1992     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
1993     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
1994   }
1995 
1996   PetscFunctionReturn(0);
1997 }
1998 
1999 #undef __FUNCT__
2000 #define __FUNCT__ "MatPermute_MPIBAIJ"
2001 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
2002 {
2003   MPI_Comm       comm,pcomm;
2004   PetscInt       first,local_size,nrows;
2005   const PetscInt *rows;
2006   PetscMPIInt    size;
2007   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
2008   PetscErrorCode ierr;
2009 
2010   PetscFunctionBegin;
2011   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2012   /* make a collective version of 'rowp' */
2013   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2014   if (pcomm==comm) {
2015     crowp = rowp;
2016   } else {
2017     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2018     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2019     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
2020     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2021   }
2022   /* collect the global row permutation and invert it */
2023   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
2024   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
2025   if (pcomm!=comm) {
2026     ierr = ISDestroy(crowp);CHKERRQ(ierr);
2027   }
2028   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
2029   /* get the local target indices */
2030   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
2031   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
2032   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
2033   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
2034   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
2035   ierr = ISDestroy(irowp);CHKERRQ(ierr);
2036   /* the column permutation is so much easier;
2037      make a local version of 'colp' and invert it */
2038   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2039   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2040   if (size==1) {
2041     lcolp = colp;
2042   } else {
2043     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
2044     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
2045     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
2046   }
2047   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2048   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
2049   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
2050   if (size>1) {
2051     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
2052     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
2053   }
2054   /* now we just get the submatrix */
2055   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2056   /* clean up */
2057   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
2058   ierr = ISDestroy(icolp);CHKERRQ(ierr);
2059   PetscFunctionReturn(0);
2060 }
2061 
2062 #undef __FUNCT__
2063 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2064 PetscErrorCode PETSCMAT_DLLEXPORT MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2065 {
2066   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*) mat->data;
2067   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
2068 
2069   PetscFunctionBegin;
2070   if (nghosts) { *nghosts = B->nbs;}
2071   if (ghosts) {*ghosts = baij->garray;}
2072   PetscFunctionReturn(0);
2073 }
2074 
2075 EXTERN PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat);
2076 
2077 #undef __FUNCT__
2078 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ"
2079 /*
2080     This routine is almost identical to MatFDColoringCreate_MPIBAIJ()!
2081 */
2082 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
2083 {
2084   Mat_MPIBAIJ            *baij = (Mat_MPIBAIJ*)mat->data;
2085   PetscErrorCode        ierr;
2086   PetscMPIInt           size,*ncolsonproc,*disp,nn;
2087   PetscInt              bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col;
2088   const PetscInt        *is;
2089   PetscInt              nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj;
2090   PetscInt              *rowhit,M,cstart,cend,colb;
2091   PetscInt              *columnsforrow,l;
2092   IS                    *isa;
2093   PetscTruth             done,flg;
2094   ISLocalToGlobalMapping map = mat->bmapping;
2095   PetscInt               *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype;
2096 
2097   PetscFunctionBegin;
2098   if (!mat->assembled) {
2099     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();");
2100   }
2101   if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock");
2102 
2103   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
2104 
2105   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2106   M                = mat->rmap->n/bs;
2107   cstart           = mat->cmap->rstart/bs;
2108   cend             = mat->cmap->rend/bs;
2109   c->M             = mat->rmap->N/bs;  /* set the global rows and columns and local rows */
2110   c->N             = mat->cmap->N/bs;
2111   c->m             = mat->rmap->n/bs;
2112   c->rstart        = mat->rmap->rstart/bs;
2113 
2114   c->ncolors       = nis;
2115   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
2116   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
2117   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
2118   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
2119   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
2120   ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
2121 
2122   /* Allow access to data structures of local part of matrix */
2123   if (!baij->colmap) {
2124     ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
2125   }
2126   ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2127   ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2128 
2129   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
2130   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
2131 
2132   for (i=0; i<nis; i++) {
2133     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
2134     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
2135     c->ncolumns[i] = n;
2136     if (n) {
2137       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
2138       ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr);
2139       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
2140     } else {
2141       c->columns[i]  = 0;
2142     }
2143 
2144     if (ctype == IS_COLORING_GLOBAL){
2145       /* Determine the total (parallel) number of columns of this color */
2146       ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
2147       ierr = PetscMalloc(2*size*sizeof(PetscInt*),&ncolsonproc);CHKERRQ(ierr);
2148       disp = ncolsonproc + size;
2149 
2150       nn   = PetscMPIIntCast(n);
2151       ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2152       nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];}
2153       if (!nctot) {
2154         ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
2155       }
2156 
2157       disp[0] = 0;
2158       for (j=1; j<size; j++) {
2159         disp[j] = disp[j-1] + ncolsonproc[j-1];
2160       }
2161 
2162       /* Get complete list of columns for color on each processor */
2163       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2164       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2165       ierr = PetscFree(ncolsonproc);CHKERRQ(ierr);
2166     } else if (ctype == IS_COLORING_GHOSTED){
2167       /* Determine local number of columns of this color on this process, including ghost points */
2168       nctot = n;
2169       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2170       ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
2171     } else {
2172       SETERRQ(PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
2173     }
2174 
2175     /*
2176        Mark all rows affect by these columns
2177     */
2178     /* Temporary option to allow for debugging/testing */
2179     flg  = PETSC_FALSE;
2180     ierr = PetscOptionsGetTruth(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr);
2181     if (!flg) {/*-----------------------------------------------------------------------------*/
2182       /* crude, fast version */
2183       ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
2184       /* loop over columns*/
2185       for (j=0; j<nctot; j++) {
2186         if (ctype == IS_COLORING_GHOSTED) {
2187           col = ltog[cols[j]];
2188         } else {
2189           col  = cols[j];
2190         }
2191         if (col >= cstart && col < cend) {
2192           /* column is in diagonal block of matrix */
2193           rows = A_cj + A_ci[col-cstart];
2194           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2195         } else {
2196 #if defined (PETSC_USE_CTABLE)
2197           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr)
2198 	  colb --;
2199 #else
2200           colb = baij->colmap[col] - 1;
2201 #endif
2202           if (colb == -1) {
2203             m = 0;
2204           } else {
2205             colb = colb/bs;
2206             rows = B_cj + B_ci[colb];
2207             m    = B_ci[colb+1] - B_ci[colb];
2208           }
2209         }
2210         /* loop over columns marking them in rowhit */
2211         for (k=0; k<m; k++) {
2212           rowhit[*rows++] = col + 1;
2213         }
2214       }
2215 
2216       /* count the number of hits */
2217       nrows = 0;
2218       for (j=0; j<M; j++) {
2219         if (rowhit[j]) nrows++;
2220       }
2221       c->nrows[i]         = nrows;
2222       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2223       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2224       ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2225       nrows = 0;
2226       for (j=0; j<M; j++) {
2227         if (rowhit[j]) {
2228           c->rows[i][nrows]           = j;
2229           c->columnsforrow[i][nrows] = rowhit[j] - 1;
2230           nrows++;
2231         }
2232       }
2233     } else {/*-------------------------------------------------------------------------------*/
2234       /* slow version, using rowhit as a linked list */
2235       PetscInt currentcol,fm,mfm;
2236       rowhit[M] = M;
2237       nrows     = 0;
2238       /* loop over columns*/
2239       for (j=0; j<nctot; j++) {
2240         if (ctype == IS_COLORING_GHOSTED) {
2241           col = ltog[cols[j]];
2242         } else {
2243           col  = cols[j];
2244         }
2245         if (col >= cstart && col < cend) {
2246           /* column is in diagonal block of matrix */
2247           rows = A_cj + A_ci[col-cstart];
2248           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2249         } else {
2250 #if defined (PETSC_USE_CTABLE)
2251           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2252           colb --;
2253 #else
2254           colb = baij->colmap[col] - 1;
2255 #endif
2256           if (colb == -1) {
2257             m = 0;
2258           } else {
2259             colb = colb/bs;
2260             rows = B_cj + B_ci[colb];
2261             m    = B_ci[colb+1] - B_ci[colb];
2262           }
2263         }
2264 
2265         /* loop over columns marking them in rowhit */
2266         fm    = M; /* fm points to first entry in linked list */
2267         for (k=0; k<m; k++) {
2268           currentcol = *rows++;
2269 	  /* is it already in the list? */
2270           do {
2271             mfm  = fm;
2272             fm   = rowhit[fm];
2273           } while (fm < currentcol);
2274           /* not in list so add it */
2275           if (fm != currentcol) {
2276             nrows++;
2277             columnsforrow[currentcol] = col;
2278             /* next three lines insert new entry into linked list */
2279             rowhit[mfm]               = currentcol;
2280             rowhit[currentcol]        = fm;
2281             fm                        = currentcol;
2282             /* fm points to present position in list since we know the columns are sorted */
2283           } else {
2284             SETERRQ(PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
2285           }
2286         }
2287       }
2288       c->nrows[i]         = nrows;
2289       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2290       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2291       ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2292       /* now store the linked list of rows into c->rows[i] */
2293       nrows = 0;
2294       fm    = rowhit[M];
2295       do {
2296         c->rows[i][nrows]            = fm;
2297         c->columnsforrow[i][nrows++] = columnsforrow[fm];
2298         fm                           = rowhit[fm];
2299       } while (fm < M);
2300     } /* ---------------------------------------------------------------------------------------*/
2301     ierr = PetscFree(cols);CHKERRQ(ierr);
2302   }
2303 
2304   /* Optimize by adding the vscale, and scaleforrow[][] fields */
2305   /*
2306        vscale will contain the "diagonal" on processor scalings followed by the off processor
2307   */
2308   if (ctype == IS_COLORING_GLOBAL) {
2309     PetscInt *garray;
2310     ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2311     for (i=0; i<baij->B->cmap->n/bs; i++) {
2312       for (j=0; j<bs; j++) {
2313         garray[i*bs+j] = bs*baij->garray[i]+j;
2314       }
2315     }
2316     ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr);
2317     ierr = PetscFree(garray);CHKERRQ(ierr);
2318     CHKMEMQ;
2319     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2320     for (k=0; k<c->ncolors; k++) {
2321       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2322       for (l=0; l<c->nrows[k]; l++) {
2323         col = c->columnsforrow[k][l];
2324         if (col >= cstart && col < cend) {
2325           /* column is in diagonal block of matrix */
2326           colb = col - cstart;
2327         } else {
2328           /* column  is in "off-processor" part */
2329 #if defined (PETSC_USE_CTABLE)
2330           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2331           colb --;
2332 #else
2333           colb = baij->colmap[col] - 1;
2334 #endif
2335           colb = colb/bs;
2336           colb += cend - cstart;
2337         }
2338         c->vscaleforrow[k][l] = colb;
2339       }
2340     }
2341   } else if (ctype == IS_COLORING_GHOSTED) {
2342     /* Get gtol mapping */
2343     PetscInt N = mat->cmap->N, *gtol;
2344     ierr = PetscMalloc((N+1)*sizeof(PetscInt),&gtol);CHKERRQ(ierr);
2345     for (i=0; i<N; i++) gtol[i] = -1;
2346     for (i=0; i<map->n; i++) gtol[ltog[i]] = i;
2347 
2348     c->vscale = 0; /* will be created in MatFDColoringApply() */
2349     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2350     for (k=0; k<c->ncolors; k++) {
2351       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2352       for (l=0; l<c->nrows[k]; l++) {
2353         col = c->columnsforrow[k][l];      /* global column index */
2354         c->vscaleforrow[k][l] = gtol[col]; /* local column index */
2355       }
2356     }
2357     ierr = PetscFree(gtol);CHKERRQ(ierr);
2358   }
2359   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
2360 
2361   ierr = PetscFree(rowhit);CHKERRQ(ierr);
2362   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
2363   ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2364   ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2365     CHKMEMQ;
2366   PetscFunctionReturn(0);
2367 }
2368 
2369 #undef __FUNCT__
2370 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ"
2371 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat)
2372 {
2373   Mat            B;
2374   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
2375   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2376   Mat_SeqAIJ     *b;
2377   PetscErrorCode ierr;
2378   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2379   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2380   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2381 
2382   PetscFunctionBegin;
2383   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2384   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2385 
2386   /* ----------------------------------------------------------------
2387      Tell every processor the number of nonzeros per row
2388   */
2389   ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
2390   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2391     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2392   }
2393   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2394   ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr);
2395   displs     = recvcounts + size;
2396   for (i=0; i<size; i++) {
2397     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2398     displs[i]     = A->rmap->range[i]/bs;
2399   }
2400 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2401   ierr  = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2402 #else
2403   ierr  = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2404 #endif
2405   /* ---------------------------------------------------------------
2406      Create the sequential matrix of the same type as the local block diagonal
2407   */
2408   ierr  = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2409   ierr  = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2410   ierr  = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2411   ierr  = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2412   b = (Mat_SeqAIJ *)B->data;
2413 
2414   /*--------------------------------------------------------------------
2415     Copy my part of matrix column indices over
2416   */
2417   sendcount  = ad->nz + bd->nz;
2418   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2419   a_jsendbuf = ad->j;
2420   b_jsendbuf = bd->j;
2421   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2422   cnt        = 0;
2423   for (i=0; i<n; i++) {
2424 
2425     /* put in lower diagonal portion */
2426     m = bd->i[i+1] - bd->i[i];
2427     while (m > 0) {
2428       /* is it above diagonal (in bd (compressed) numbering) */
2429       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2430       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2431       m--;
2432     }
2433 
2434     /* put in diagonal portion */
2435     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2436       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2437     }
2438 
2439     /* put in upper diagonal portion */
2440     while (m-- > 0) {
2441       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2442     }
2443   }
2444   if (cnt != sendcount) SETERRQ2(PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2445 
2446   /*--------------------------------------------------------------------
2447     Gather all column indices to all processors
2448   */
2449   for (i=0; i<size; i++) {
2450     recvcounts[i] = 0;
2451     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2452       recvcounts[i] += lens[j];
2453     }
2454   }
2455   displs[0]  = 0;
2456   for (i=1; i<size; i++) {
2457     displs[i] = displs[i-1] + recvcounts[i-1];
2458   }
2459 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2460   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2461 #else
2462   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2463 #endif
2464   /*--------------------------------------------------------------------
2465     Assemble the matrix into useable form (note numerical values not yet set)
2466   */
2467   /* set the b->ilen (length of each row) values */
2468   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2469   /* set the b->i indices */
2470   b->i[0] = 0;
2471   for (i=1; i<=A->rmap->N/bs; i++) {
2472     b->i[i] = b->i[i-1] + lens[i-1];
2473   }
2474   ierr = PetscFree(lens);CHKERRQ(ierr);
2475   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2476   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2477   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2478 
2479   if (A->symmetric){
2480     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2481   } else if (A->hermitian) {
2482     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2483   } else if (A->structurally_symmetric) {
2484     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2485   }
2486   *newmat = B;
2487   PetscFunctionReturn(0);
2488 }
2489 
2490 #undef __FUNCT__
2491 #define __FUNCT__ "MatSOR_MPIBAIJ"
2492 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2493 {
2494   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2495   PetscErrorCode ierr;
2496   Vec            bb1 = 0;
2497 
2498   PetscFunctionBegin;
2499   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2500     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2501   }
2502 
2503   if (flag == SOR_APPLY_UPPER) {
2504     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2505     PetscFunctionReturn(0);
2506   }
2507 
2508   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2509     if (flag & SOR_ZERO_INITIAL_GUESS) {
2510       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2511       its--;
2512     }
2513 
2514     while (its--) {
2515       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2516       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2517 
2518       /* update rhs: bb1 = bb - B*x */
2519       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2520       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2521 
2522       /* local sweep */
2523       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2524     }
2525   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
2526     if (flag & SOR_ZERO_INITIAL_GUESS) {
2527       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2528       its--;
2529     }
2530     while (its--) {
2531       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2532       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2533 
2534       /* update rhs: bb1 = bb - B*x */
2535       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2536       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2537 
2538       /* local sweep */
2539       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2540     }
2541   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
2542     if (flag & SOR_ZERO_INITIAL_GUESS) {
2543       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2544       its--;
2545     }
2546     while (its--) {
2547       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2548       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2549 
2550       /* update rhs: bb1 = bb - B*x */
2551       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2552       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2553 
2554       /* local sweep */
2555       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2556     }
2557   } else {
2558     SETERRQ(PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2559   }
2560 
2561   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
2562   PetscFunctionReturn(0);
2563 }
2564 
2565 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2566 
2567 
2568 /* -------------------------------------------------------------------*/
2569 static struct _MatOps MatOps_Values = {
2570        MatSetValues_MPIBAIJ,
2571        MatGetRow_MPIBAIJ,
2572        MatRestoreRow_MPIBAIJ,
2573        MatMult_MPIBAIJ,
2574 /* 4*/ MatMultAdd_MPIBAIJ,
2575        MatMultTranspose_MPIBAIJ,
2576        MatMultTransposeAdd_MPIBAIJ,
2577        0,
2578        0,
2579        0,
2580 /*10*/ 0,
2581        0,
2582        0,
2583        MatSOR_MPIBAIJ,
2584        MatTranspose_MPIBAIJ,
2585 /*15*/ MatGetInfo_MPIBAIJ,
2586        MatEqual_MPIBAIJ,
2587        MatGetDiagonal_MPIBAIJ,
2588        MatDiagonalScale_MPIBAIJ,
2589        MatNorm_MPIBAIJ,
2590 /*20*/ MatAssemblyBegin_MPIBAIJ,
2591        MatAssemblyEnd_MPIBAIJ,
2592        MatSetOption_MPIBAIJ,
2593        MatZeroEntries_MPIBAIJ,
2594 /*24*/ MatZeroRows_MPIBAIJ,
2595        0,
2596        0,
2597        0,
2598        0,
2599 /*29*/ MatSetUpPreallocation_MPIBAIJ,
2600        0,
2601        0,
2602        0,
2603        0,
2604 /*34*/ MatDuplicate_MPIBAIJ,
2605        0,
2606        0,
2607        0,
2608        0,
2609 /*39*/ MatAXPY_MPIBAIJ,
2610        MatGetSubMatrices_MPIBAIJ,
2611        MatIncreaseOverlap_MPIBAIJ,
2612        MatGetValues_MPIBAIJ,
2613        MatCopy_MPIBAIJ,
2614 /*44*/ 0,
2615        MatScale_MPIBAIJ,
2616        0,
2617        0,
2618        0,
2619 /*49*/ MatSetBlockSize_MPIBAIJ,
2620        0,
2621        0,
2622        0,
2623        0,
2624 /*54*/ MatFDColoringCreate_MPIBAIJ,
2625        0,
2626        MatSetUnfactored_MPIBAIJ,
2627        MatPermute_MPIBAIJ,
2628        MatSetValuesBlocked_MPIBAIJ,
2629 /*59*/ MatGetSubMatrix_MPIBAIJ,
2630        MatDestroy_MPIBAIJ,
2631        MatView_MPIBAIJ,
2632        0,
2633        0,
2634 /*64*/ 0,
2635        0,
2636        0,
2637        0,
2638        0,
2639 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2640        0,
2641        0,
2642        0,
2643        0,
2644 /*74*/ 0,
2645        MatFDColoringApply_BAIJ,
2646        0,
2647        0,
2648        0,
2649 /*79*/ 0,
2650        0,
2651        0,
2652        0,
2653        MatLoad_MPIBAIJ,
2654 /*84*/ 0,
2655        0,
2656        0,
2657        0,
2658        0,
2659 /*89*/ 0,
2660        0,
2661        0,
2662        0,
2663        0,
2664 /*94*/ 0,
2665        0,
2666        0,
2667        0,
2668        0,
2669 /*99*/ 0,
2670        0,
2671        0,
2672        0,
2673        0,
2674 /*104*/0,
2675        MatRealPart_MPIBAIJ,
2676        MatImaginaryPart_MPIBAIJ,
2677        0,
2678        0,
2679 /*109*/0,
2680        0,
2681        0,
2682        0,
2683        0,
2684 /*114*/MatGetSeqNonzerostructure_MPIBAIJ,
2685        0,
2686        MatGetGhosts_MPIBAIJ
2687 };
2688 
2689 EXTERN_C_BEGIN
2690 #undef __FUNCT__
2691 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2692 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
2693 {
2694   PetscFunctionBegin;
2695   *a      = ((Mat_MPIBAIJ *)A->data)->A;
2696   *iscopy = PETSC_FALSE;
2697   PetscFunctionReturn(0);
2698 }
2699 EXTERN_C_END
2700 
2701 EXTERN_C_BEGIN
2702 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2703 EXTERN_C_END
2704 
2705 EXTERN_C_BEGIN
2706 #undef __FUNCT__
2707 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2708 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2709 {
2710   PetscInt       m,rstart,cstart,cend;
2711   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2712   const PetscInt *JJ=0;
2713   PetscScalar    *values=0;
2714   PetscErrorCode ierr;
2715 
2716   PetscFunctionBegin;
2717 
2718   if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2719   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2720   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2721   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2722   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2723   m      = B->rmap->n/bs;
2724   rstart = B->rmap->rstart/bs;
2725   cstart = B->cmap->rstart/bs;
2726   cend   = B->cmap->rend/bs;
2727 
2728   if (ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2729   ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
2730   o_nnz = d_nnz + m;
2731   for (i=0; i<m; i++) {
2732     nz = ii[i+1] - ii[i];
2733     if (nz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2734     nz_max = PetscMax(nz_max,nz);
2735     JJ  = jj + ii[i];
2736     for (j=0; j<nz; j++) {
2737       if (*JJ >= cstart) break;
2738       JJ++;
2739     }
2740     d = 0;
2741     for (; j<nz; j++) {
2742       if (*JJ++ >= cend) break;
2743       d++;
2744     }
2745     d_nnz[i] = d;
2746     o_nnz[i] = nz - d;
2747   }
2748   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2749   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
2750 
2751   values = (PetscScalar*)V;
2752   if (!values) {
2753     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2754     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2755   }
2756   for (i=0; i<m; i++) {
2757     PetscInt          row    = i + rstart;
2758     PetscInt          ncols  = ii[i+1] - ii[i];
2759     const PetscInt    *icols = jj + ii[i];
2760     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2761     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2762   }
2763 
2764   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2765   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2766   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2767 
2768   PetscFunctionReturn(0);
2769 }
2770 EXTERN_C_END
2771 
2772 #undef __FUNCT__
2773 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2774 /*@C
2775    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2776    (the default parallel PETSc format).
2777 
2778    Collective on MPI_Comm
2779 
2780    Input Parameters:
2781 +  A - the matrix
2782 .  i - the indices into j for the start of each local row (starts with zero)
2783 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2784 -  v - optional values in the matrix
2785 
2786    Level: developer
2787 
2788 .keywords: matrix, aij, compressed row, sparse, parallel
2789 
2790 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2791 @*/
2792 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2793 {
2794   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
2795 
2796   PetscFunctionBegin;
2797   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2798   if (f) {
2799     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
2800   }
2801   PetscFunctionReturn(0);
2802 }
2803 
2804 EXTERN_C_BEGIN
2805 #undef __FUNCT__
2806 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2807 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2808 {
2809   Mat_MPIBAIJ    *b;
2810   PetscErrorCode ierr;
2811   PetscInt       i, newbs = PetscAbs(bs);
2812 
2813   PetscFunctionBegin;
2814   if (bs < 0) {
2815     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr);
2816       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2817     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2818     bs   = PetscAbs(bs);
2819   }
2820   if ((d_nnz || o_nnz) && newbs != bs) {
2821     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
2822   }
2823   bs = newbs;
2824 
2825 
2826   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2827   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2828   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2829   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2830   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2831 
2832   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2833   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2834   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2835   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2836 
2837   if (d_nnz) {
2838     for (i=0; i<B->rmap->n/bs; i++) {
2839       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2840     }
2841   }
2842   if (o_nnz) {
2843     for (i=0; i<B->rmap->n/bs; i++) {
2844       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2845     }
2846   }
2847 
2848   b = (Mat_MPIBAIJ*)B->data;
2849   b->bs2 = bs*bs;
2850   b->mbs = B->rmap->n/bs;
2851   b->nbs = B->cmap->n/bs;
2852   b->Mbs = B->rmap->N/bs;
2853   b->Nbs = B->cmap->N/bs;
2854 
2855   for (i=0; i<=b->size; i++) {
2856     b->rangebs[i] = B->rmap->range[i]/bs;
2857   }
2858   b->rstartbs = B->rmap->rstart/bs;
2859   b->rendbs   = B->rmap->rend/bs;
2860   b->cstartbs = B->cmap->rstart/bs;
2861   b->cendbs   = B->cmap->rend/bs;
2862 
2863   if (!B->preallocated) {
2864     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2865     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2866     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2867     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2868     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2869     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2870     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2871     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2872     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
2873   }
2874 
2875   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2876   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2877   B->preallocated = PETSC_TRUE;
2878   PetscFunctionReturn(0);
2879 }
2880 EXTERN_C_END
2881 
2882 EXTERN_C_BEGIN
2883 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2884 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2885 EXTERN_C_END
2886 
2887 
2888 EXTERN_C_BEGIN
2889 #undef __FUNCT__
2890 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
2891 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj)
2892 {
2893   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2894   PetscErrorCode ierr;
2895   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2896   PetscInt       M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2897   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2898 
2899   PetscFunctionBegin;
2900   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr);
2901   ii[0] = 0;
2902   CHKMEMQ;
2903   for (i=0; i<M; i++) {
2904     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2905     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
2906     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2907     /* remove one from count of matrix has diagonal */
2908     for (j=id[i]; j<id[i+1]; j++) {
2909       if (jd[j] == i) {ii[i+1]--;break;}
2910     }
2911   CHKMEMQ;
2912   }
2913   ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2914   cnt = 0;
2915   for (i=0; i<M; i++) {
2916     for (j=io[i]; j<io[i+1]; j++) {
2917       if (garray[jo[j]] > rstart) break;
2918       jj[cnt++] = garray[jo[j]];
2919   CHKMEMQ;
2920     }
2921     for (k=id[i]; k<id[i+1]; k++) {
2922       if (jd[k] != i) {
2923         jj[cnt++] = rstart + jd[k];
2924   CHKMEMQ;
2925       }
2926     }
2927     for (;j<io[i+1]; j++) {
2928       jj[cnt++] = garray[jo[j]];
2929   CHKMEMQ;
2930     }
2931   }
2932   ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr);
2933   PetscFunctionReturn(0);
2934 }
2935 EXTERN_C_END
2936 
2937 /*MC
2938    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2939 
2940    Options Database Keys:
2941 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2942 . -mat_block_size <bs> - set the blocksize used to store the matrix
2943 - -mat_use_hash_table <fact>
2944 
2945   Level: beginner
2946 
2947 .seealso: MatCreateMPIBAIJ
2948 M*/
2949 
2950 EXTERN_C_BEGIN
2951 #undef __FUNCT__
2952 #define __FUNCT__ "MatCreate_MPIBAIJ"
2953 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B)
2954 {
2955   Mat_MPIBAIJ    *b;
2956   PetscErrorCode ierr;
2957   PetscTruth     flg;
2958 
2959   PetscFunctionBegin;
2960   ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
2961   B->data = (void*)b;
2962 
2963 
2964   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2965   B->mapping    = 0;
2966   B->assembled  = PETSC_FALSE;
2967 
2968   B->insertmode = NOT_SET_VALUES;
2969   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
2970   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
2971 
2972   /* build local table of row and column ownerships */
2973   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
2974 
2975   /* build cache for off array entries formed */
2976   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
2977   b->donotstash  = PETSC_FALSE;
2978   b->colmap      = PETSC_NULL;
2979   b->garray      = PETSC_NULL;
2980   b->roworiented = PETSC_TRUE;
2981 
2982   /* stuff used in block assembly */
2983   b->barray       = 0;
2984 
2985   /* stuff used for matrix vector multiply */
2986   b->lvec         = 0;
2987   b->Mvctx        = 0;
2988 
2989   /* stuff for MatGetRow() */
2990   b->rowindices   = 0;
2991   b->rowvalues    = 0;
2992   b->getrowactive = PETSC_FALSE;
2993 
2994   /* hash table stuff */
2995   b->ht           = 0;
2996   b->hd           = 0;
2997   b->ht_size      = 0;
2998   b->ht_flag      = PETSC_FALSE;
2999   b->ht_fact      = 0;
3000   b->ht_total_ct  = 0;
3001   b->ht_insert_ct = 0;
3002 
3003   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
3004     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3005     if (flg) {
3006       PetscReal fact = 1.39;
3007       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
3008       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
3009       if (fact <= 1.0) fact = 1.39;
3010       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3011       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3012     }
3013   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3014 
3015   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",
3016                                      "MatConvert_MPIBAIJ_MPIAdj",
3017                                       MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3018   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3019                                      "MatStoreValues_MPIBAIJ",
3020                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3021   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3022                                      "MatRetrieveValues_MPIBAIJ",
3023                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3024   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3025                                      "MatGetDiagonalBlock_MPIBAIJ",
3026                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3027   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
3028                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
3029                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3030   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
3031 				     "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
3032 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3033   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3034                                      "MatDiagonalScaleLocal_MPIBAIJ",
3035                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3036   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
3037                                      "MatSetHashTableFactor_MPIBAIJ",
3038                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3039   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3040   PetscFunctionReturn(0);
3041 }
3042 EXTERN_C_END
3043 
3044 /*MC
3045    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3046 
3047    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3048    and MATMPIBAIJ otherwise.
3049 
3050    Options Database Keys:
3051 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3052 
3053   Level: beginner
3054 
3055 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3056 M*/
3057 
3058 EXTERN_C_BEGIN
3059 #undef __FUNCT__
3060 #define __FUNCT__ "MatCreate_BAIJ"
3061 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A)
3062 {
3063   PetscErrorCode ierr;
3064   PetscMPIInt    size;
3065 
3066   PetscFunctionBegin;
3067   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
3068   if (size == 1) {
3069     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
3070   } else {
3071     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
3072   }
3073   PetscFunctionReturn(0);
3074 }
3075 EXTERN_C_END
3076 
3077 #undef __FUNCT__
3078 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3079 /*@C
3080    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3081    (block compressed row).  For good matrix assembly performance
3082    the user should preallocate the matrix storage by setting the parameters
3083    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3084    performance can be increased by more than a factor of 50.
3085 
3086    Collective on Mat
3087 
3088    Input Parameters:
3089 +  A - the matrix
3090 .  bs   - size of blockk
3091 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3092            submatrix  (same for all local rows)
3093 .  d_nnz - array containing the number of block nonzeros in the various block rows
3094            of the in diagonal portion of the local (possibly different for each block
3095            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3096 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3097            submatrix (same for all local rows).
3098 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3099            off-diagonal portion of the local submatrix (possibly different for
3100            each block row) or PETSC_NULL.
3101 
3102    If the *_nnz parameter is given then the *_nz parameter is ignored
3103 
3104    Options Database Keys:
3105 +   -mat_block_size - size of the blocks to use
3106 -   -mat_use_hash_table <fact>
3107 
3108    Notes:
3109    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3110    than it must be used on all processors that share the object for that argument.
3111 
3112    Storage Information:
3113    For a square global matrix we define each processor's diagonal portion
3114    to be its local rows and the corresponding columns (a square submatrix);
3115    each processor's off-diagonal portion encompasses the remainder of the
3116    local matrix (a rectangular submatrix).
3117 
3118    The user can specify preallocated storage for the diagonal part of
3119    the local submatrix with either d_nz or d_nnz (not both).  Set
3120    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3121    memory allocation.  Likewise, specify preallocated storage for the
3122    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3123 
3124    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3125    the figure below we depict these three local rows and all columns (0-11).
3126 
3127 .vb
3128            0 1 2 3 4 5 6 7 8 9 10 11
3129           -------------------
3130    row 3  |  o o o d d d o o o o o o
3131    row 4  |  o o o d d d o o o o o o
3132    row 5  |  o o o d d d o o o o o o
3133           -------------------
3134 .ve
3135 
3136    Thus, any entries in the d locations are stored in the d (diagonal)
3137    submatrix, and any entries in the o locations are stored in the
3138    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3139    stored simply in the MATSEQBAIJ format for compressed row storage.
3140 
3141    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3142    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3143    In general, for PDE problems in which most nonzeros are near the diagonal,
3144    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3145    or you will get TERRIBLE performance; see the users' manual chapter on
3146    matrices.
3147 
3148    You can call MatGetInfo() to get information on how effective the preallocation was;
3149    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3150    You can also run with the option -info and look for messages with the string
3151    malloc in them to see if additional memory allocation was needed.
3152 
3153    Level: intermediate
3154 
3155 .keywords: matrix, block, aij, compressed row, sparse, parallel
3156 
3157 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
3158 @*/
3159 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3160 {
3161   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3162 
3163   PetscFunctionBegin;
3164   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3165   if (f) {
3166     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3167   }
3168   PetscFunctionReturn(0);
3169 }
3170 
3171 #undef __FUNCT__
3172 #define __FUNCT__ "MatCreateMPIBAIJ"
3173 /*@C
3174    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
3175    (block compressed row).  For good matrix assembly performance
3176    the user should preallocate the matrix storage by setting the parameters
3177    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3178    performance can be increased by more than a factor of 50.
3179 
3180    Collective on MPI_Comm
3181 
3182    Input Parameters:
3183 +  comm - MPI communicator
3184 .  bs   - size of blockk
3185 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3186            This value should be the same as the local size used in creating the
3187            y vector for the matrix-vector product y = Ax.
3188 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3189            This value should be the same as the local size used in creating the
3190            x vector for the matrix-vector product y = Ax.
3191 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3192 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3193 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3194            submatrix  (same for all local rows)
3195 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3196            of the in diagonal portion of the local (possibly different for each block
3197            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3198 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3199            submatrix (same for all local rows).
3200 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3201            off-diagonal portion of the local submatrix (possibly different for
3202            each block row) or PETSC_NULL.
3203 
3204    Output Parameter:
3205 .  A - the matrix
3206 
3207    Options Database Keys:
3208 +   -mat_block_size - size of the blocks to use
3209 -   -mat_use_hash_table <fact>
3210 
3211    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3212    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3213    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3214 
3215    Notes:
3216    If the *_nnz parameter is given then the *_nz parameter is ignored
3217 
3218    A nonzero block is any block that as 1 or more nonzeros in it
3219 
3220    The user MUST specify either the local or global matrix dimensions
3221    (possibly both).
3222 
3223    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3224    than it must be used on all processors that share the object for that argument.
3225 
3226    Storage Information:
3227    For a square global matrix we define each processor's diagonal portion
3228    to be its local rows and the corresponding columns (a square submatrix);
3229    each processor's off-diagonal portion encompasses the remainder of the
3230    local matrix (a rectangular submatrix).
3231 
3232    The user can specify preallocated storage for the diagonal part of
3233    the local submatrix with either d_nz or d_nnz (not both).  Set
3234    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3235    memory allocation.  Likewise, specify preallocated storage for the
3236    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3237 
3238    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3239    the figure below we depict these three local rows and all columns (0-11).
3240 
3241 .vb
3242            0 1 2 3 4 5 6 7 8 9 10 11
3243           -------------------
3244    row 3  |  o o o d d d o o o o o o
3245    row 4  |  o o o d d d o o o o o o
3246    row 5  |  o o o d d d o o o o o o
3247           -------------------
3248 .ve
3249 
3250    Thus, any entries in the d locations are stored in the d (diagonal)
3251    submatrix, and any entries in the o locations are stored in the
3252    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3253    stored simply in the MATSEQBAIJ format for compressed row storage.
3254 
3255    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3256    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3257    In general, for PDE problems in which most nonzeros are near the diagonal,
3258    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3259    or you will get TERRIBLE performance; see the users' manual chapter on
3260    matrices.
3261 
3262    Level: intermediate
3263 
3264 .keywords: matrix, block, aij, compressed row, sparse, parallel
3265 
3266 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3267 @*/
3268 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3269 {
3270   PetscErrorCode ierr;
3271   PetscMPIInt    size;
3272 
3273   PetscFunctionBegin;
3274   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3275   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3276   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3277   if (size > 1) {
3278     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3279     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3280   } else {
3281     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3282     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3283   }
3284   PetscFunctionReturn(0);
3285 }
3286 
3287 #undef __FUNCT__
3288 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3289 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3290 {
3291   Mat            mat;
3292   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3293   PetscErrorCode ierr;
3294   PetscInt       len=0;
3295 
3296   PetscFunctionBegin;
3297   *newmat       = 0;
3298   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3299   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3300   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3301   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3302 
3303   mat->factor       = matin->factor;
3304   mat->preallocated = PETSC_TRUE;
3305   mat->assembled    = PETSC_TRUE;
3306   mat->insertmode   = NOT_SET_VALUES;
3307 
3308   a      = (Mat_MPIBAIJ*)mat->data;
3309   mat->rmap->bs  = matin->rmap->bs;
3310   a->bs2   = oldmat->bs2;
3311   a->mbs   = oldmat->mbs;
3312   a->nbs   = oldmat->nbs;
3313   a->Mbs   = oldmat->Mbs;
3314   a->Nbs   = oldmat->Nbs;
3315 
3316   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3317   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3318 
3319   a->size         = oldmat->size;
3320   a->rank         = oldmat->rank;
3321   a->donotstash   = oldmat->donotstash;
3322   a->roworiented  = oldmat->roworiented;
3323   a->rowindices   = 0;
3324   a->rowvalues    = 0;
3325   a->getrowactive = PETSC_FALSE;
3326   a->barray       = 0;
3327   a->rstartbs     = oldmat->rstartbs;
3328   a->rendbs       = oldmat->rendbs;
3329   a->cstartbs     = oldmat->cstartbs;
3330   a->cendbs       = oldmat->cendbs;
3331 
3332   /* hash table stuff */
3333   a->ht           = 0;
3334   a->hd           = 0;
3335   a->ht_size      = 0;
3336   a->ht_flag      = oldmat->ht_flag;
3337   a->ht_fact      = oldmat->ht_fact;
3338   a->ht_total_ct  = 0;
3339   a->ht_insert_ct = 0;
3340 
3341   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3342   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
3343   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3344   if (oldmat->colmap) {
3345 #if defined (PETSC_USE_CTABLE)
3346   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3347 #else
3348   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3349   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3350   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3351 #endif
3352   } else a->colmap = 0;
3353 
3354   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3355     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3356     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3357     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3358   } else a->garray = 0;
3359 
3360   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3361   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3362   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3363   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3364 
3365   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3366   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3367   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3368   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3369   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3370   *newmat = mat;
3371 
3372   PetscFunctionReturn(0);
3373 }
3374 
3375 #include "petscsys.h"
3376 
3377 #undef __FUNCT__
3378 #define __FUNCT__ "MatLoad_MPIBAIJ"
3379 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
3380 {
3381   Mat            A;
3382   PetscErrorCode ierr;
3383   int            fd;
3384   PetscInt       i,nz,j,rstart,rend;
3385   PetscScalar    *vals,*buf;
3386   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3387   MPI_Status     status;
3388   PetscMPIInt    rank,size,maxnz;
3389   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3390   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
3391   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3392   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
3393   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
3394   PetscInt       dcount,kmax,k,nzcount,tmp,mend;
3395 
3396   PetscFunctionBegin;
3397   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3398     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3399   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3400 
3401   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3402   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3403   if (!rank) {
3404     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3405     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3406     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3407   }
3408 
3409   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3410   M = header[1]; N = header[2];
3411 
3412   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
3413 
3414   /*
3415      This code adds extra rows to make sure the number of rows is
3416      divisible by the blocksize
3417   */
3418   Mbs        = M/bs;
3419   extra_rows = bs - M + bs*Mbs;
3420   if (extra_rows == bs) extra_rows = 0;
3421   else                  Mbs++;
3422   if (extra_rows && !rank) {
3423     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3424   }
3425 
3426   /* determine ownership of all rows */
3427   mbs        = Mbs/size + ((Mbs % size) > rank);
3428   m          = mbs*bs;
3429   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
3430   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3431 
3432   /* process 0 needs enough room for process with most rows */
3433   if (!rank) {
3434     mmax = rowners[1];
3435     for (i=2; i<size; i++) {
3436       mmax = PetscMax(mmax,rowners[i]);
3437     }
3438     mmax*=bs;
3439   } else mmax = m;
3440 
3441   rowners[0] = 0;
3442   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
3443   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
3444   rstart = rowners[rank];
3445   rend   = rowners[rank+1];
3446 
3447   /* distribute row lengths to all processors */
3448   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
3449   if (!rank) {
3450     mend = m;
3451     if (size == 1) mend = mend - extra_rows;
3452     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3453     for (j=mend; j<m; j++) locrowlens[j] = 1;
3454     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3455     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3456     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3457     for (j=0; j<m; j++) {
3458       procsnz[0] += locrowlens[j];
3459     }
3460     for (i=1; i<size; i++) {
3461       mend = browners[i+1] - browners[i];
3462       if (i == size-1) mend = mend - extra_rows;
3463       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3464       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3465       /* calculate the number of nonzeros on each processor */
3466       for (j=0; j<browners[i+1]-browners[i]; j++) {
3467         procsnz[i] += rowlengths[j];
3468       }
3469       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3470     }
3471     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3472   } else {
3473     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3474   }
3475 
3476   if (!rank) {
3477     /* determine max buffer needed and allocate it */
3478     maxnz = procsnz[0];
3479     for (i=1; i<size; i++) {
3480       maxnz = PetscMax(maxnz,procsnz[i]);
3481     }
3482     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3483 
3484     /* read in my part of the matrix column indices  */
3485     nz     = procsnz[0];
3486     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3487     mycols = ibuf;
3488     if (size == 1)  nz -= extra_rows;
3489     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3490     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
3491 
3492     /* read in every ones (except the last) and ship off */
3493     for (i=1; i<size-1; i++) {
3494       nz   = procsnz[i];
3495       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3496       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3497     }
3498     /* read in the stuff for the last proc */
3499     if (size != 1) {
3500       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3501       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3502       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3503       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3504     }
3505     ierr = PetscFree(cols);CHKERRQ(ierr);
3506   } else {
3507     /* determine buffer space needed for message */
3508     nz = 0;
3509     for (i=0; i<m; i++) {
3510       nz += locrowlens[i];
3511     }
3512     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3513     mycols = ibuf;
3514     /* receive message of column indices*/
3515     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3516     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3517     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3518   }
3519 
3520   /* loop over local rows, determining number of off diagonal entries */
3521   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
3522   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
3523   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3524   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3525   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3526   rowcount = 0; nzcount = 0;
3527   for (i=0; i<mbs; i++) {
3528     dcount  = 0;
3529     odcount = 0;
3530     for (j=0; j<bs; j++) {
3531       kmax = locrowlens[rowcount];
3532       for (k=0; k<kmax; k++) {
3533         tmp = mycols[nzcount++]/bs;
3534         if (!mask[tmp]) {
3535           mask[tmp] = 1;
3536           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3537           else masked1[dcount++] = tmp;
3538         }
3539       }
3540       rowcount++;
3541     }
3542 
3543     dlens[i]  = dcount;
3544     odlens[i] = odcount;
3545 
3546     /* zero out the mask elements we set */
3547     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3548     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3549   }
3550 
3551   /* create our matrix */
3552   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
3553   ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3554   ierr = MatSetType(A,type);CHKERRQ(ierr)
3555   ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3556 
3557   if (!rank) {
3558     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3559     /* read in my part of the matrix numerical values  */
3560     nz = procsnz[0];
3561     vals = buf;
3562     mycols = ibuf;
3563     if (size == 1)  nz -= extra_rows;
3564     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3565     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
3566 
3567     /* insert into matrix */
3568     jj      = rstart*bs;
3569     for (i=0; i<m; i++) {
3570       ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3571       mycols += locrowlens[i];
3572       vals   += locrowlens[i];
3573       jj++;
3574     }
3575     /* read in other processors (except the last one) and ship out */
3576     for (i=1; i<size-1; i++) {
3577       nz   = procsnz[i];
3578       vals = buf;
3579       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3580       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
3581     }
3582     /* the last proc */
3583     if (size != 1){
3584       nz   = procsnz[i] - extra_rows;
3585       vals = buf;
3586       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3587       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3588       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
3589     }
3590     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3591   } else {
3592     /* receive numeric values */
3593     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3594 
3595     /* receive message of values*/
3596     vals   = buf;
3597     mycols = ibuf;
3598     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
3599     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
3600     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3601 
3602     /* insert into matrix */
3603     jj      = rstart*bs;
3604     for (i=0; i<m; i++) {
3605       ierr    = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3606       mycols += locrowlens[i];
3607       vals   += locrowlens[i];
3608       jj++;
3609     }
3610   }
3611   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3612   ierr = PetscFree(buf);CHKERRQ(ierr);
3613   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3614   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3615   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3616   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3617   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3618   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3619 
3620   *newmat = A;
3621   PetscFunctionReturn(0);
3622 }
3623 
3624 #undef __FUNCT__
3625 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3626 /*@
3627    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3628 
3629    Input Parameters:
3630 .  mat  - the matrix
3631 .  fact - factor
3632 
3633    Collective on Mat
3634 
3635    Level: advanced
3636 
3637   Notes:
3638    This can also be set by the command line option: -mat_use_hash_table <fact>
3639 
3640 .keywords: matrix, hashtable, factor, HT
3641 
3642 .seealso: MatSetOption()
3643 @*/
3644 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3645 {
3646   PetscErrorCode ierr,(*f)(Mat,PetscReal);
3647 
3648   PetscFunctionBegin;
3649   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
3650   if (f) {
3651     ierr = (*f)(mat,fact);CHKERRQ(ierr);
3652   }
3653   PetscFunctionReturn(0);
3654 }
3655 
3656 EXTERN_C_BEGIN
3657 #undef __FUNCT__
3658 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3659 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3660 {
3661   Mat_MPIBAIJ *baij;
3662 
3663   PetscFunctionBegin;
3664   baij = (Mat_MPIBAIJ*)mat->data;
3665   baij->ht_fact = fact;
3666   PetscFunctionReturn(0);
3667 }
3668 EXTERN_C_END
3669 
3670 #undef __FUNCT__
3671 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3672 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3673 {
3674   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3675   PetscFunctionBegin;
3676   *Ad     = a->A;
3677   *Ao     = a->B;
3678   *colmap = a->garray;
3679   PetscFunctionReturn(0);
3680 }
3681 
3682 /*
3683     Special version for direct calls from Fortran (to eliminate two function call overheads
3684 */
3685 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3686 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3687 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3688 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3689 #endif
3690 
3691 #undef __FUNCT__
3692 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3693 /*@C
3694   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3695 
3696   Collective on Mat
3697 
3698   Input Parameters:
3699 + mat - the matrix
3700 . min - number of input rows
3701 . im - input rows
3702 . nin - number of input columns
3703 . in - input columns
3704 . v - numerical values input
3705 - addvin - INSERT_VALUES or ADD_VALUES
3706 
3707   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3708 
3709   Level: advanced
3710 
3711 .seealso:   MatSetValuesBlocked()
3712 @*/
3713 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3714 {
3715   /* convert input arguments to C version */
3716   Mat             mat = *matin;
3717   PetscInt        m = *min, n = *nin;
3718   InsertMode      addv = *addvin;
3719 
3720   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3721   const MatScalar *value;
3722   MatScalar       *barray=baij->barray;
3723   PetscTruth      roworiented = baij->roworiented;
3724   PetscErrorCode  ierr;
3725   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3726   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3727   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3728 
3729   PetscFunctionBegin;
3730   /* tasks normally handled by MatSetValuesBlocked() */
3731   if (mat->insertmode == NOT_SET_VALUES) {
3732     mat->insertmode = addv;
3733   }
3734 #if defined(PETSC_USE_DEBUG)
3735   else if (mat->insertmode != addv) {
3736     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3737   }
3738   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3739 #endif
3740   if (mat->assembled) {
3741     mat->was_assembled = PETSC_TRUE;
3742     mat->assembled     = PETSC_FALSE;
3743   }
3744   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3745 
3746 
3747   if(!barray) {
3748     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
3749     baij->barray = barray;
3750   }
3751 
3752   if (roworiented) {
3753     stepval = (n-1)*bs;
3754   } else {
3755     stepval = (m-1)*bs;
3756   }
3757   for (i=0; i<m; i++) {
3758     if (im[i] < 0) continue;
3759 #if defined(PETSC_USE_DEBUG)
3760     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3761 #endif
3762     if (im[i] >= rstart && im[i] < rend) {
3763       row = im[i] - rstart;
3764       for (j=0; j<n; j++) {
3765         /* If NumCol = 1 then a copy is not required */
3766         if ((roworiented) && (n == 1)) {
3767           barray = (MatScalar*)v + i*bs2;
3768         } else if((!roworiented) && (m == 1)) {
3769           barray = (MatScalar*)v + j*bs2;
3770         } else { /* Here a copy is required */
3771           if (roworiented) {
3772             value = v + i*(stepval+bs)*bs + j*bs;
3773           } else {
3774             value = v + j*(stepval+bs)*bs + i*bs;
3775           }
3776           for (ii=0; ii<bs; ii++,value+=stepval) {
3777             for (jj=0; jj<bs; jj++) {
3778               *barray++  = *value++;
3779             }
3780           }
3781           barray -=bs2;
3782         }
3783 
3784         if (in[j] >= cstart && in[j] < cend){
3785           col  = in[j] - cstart;
3786           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3787         }
3788         else if (in[j] < 0) continue;
3789 #if defined(PETSC_USE_DEBUG)
3790         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
3791 #endif
3792         else {
3793           if (mat->was_assembled) {
3794             if (!baij->colmap) {
3795               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3796             }
3797 
3798 #if defined(PETSC_USE_DEBUG)
3799 #if defined (PETSC_USE_CTABLE)
3800             { PetscInt data;
3801               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3802               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
3803             }
3804 #else
3805             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
3806 #endif
3807 #endif
3808 #if defined (PETSC_USE_CTABLE)
3809 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3810             col  = (col - 1)/bs;
3811 #else
3812             col = (baij->colmap[in[j]] - 1)/bs;
3813 #endif
3814             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3815               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3816               col =  in[j];
3817             }
3818           }
3819           else col = in[j];
3820           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3821         }
3822       }
3823     } else {
3824       if (!baij->donotstash) {
3825         if (roworiented) {
3826           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3827         } else {
3828           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3829         }
3830       }
3831     }
3832   }
3833 
3834   /* task normally handled by MatSetValuesBlocked() */
3835   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3836   PetscFunctionReturn(0);
3837 }
3838