xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 81f96dcc196638099f4f8062b7ddb72e8b817c95)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4 
5 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
6 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
7 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,PetscInt,IS[],PetscInt);
8 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat *[]);
9 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],PetscScalar []);
10 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
11 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
12 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
13 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,PetscInt,PetscInt*,PetscInt*[],PetscScalar*[]);
14 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscScalar);
15 
16 #undef __FUNCT__
17 #define __FUNCT__ "MatGetRowMaxAbs_MPIBAIJ"
18 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A,Vec v,PetscInt idx[])
19 {
20   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
21   PetscErrorCode ierr;
22   PetscInt       i,*idxb = 0;
23   PetscScalar    *va,*vb;
24   Vec            vtmp;
25 
26   PetscFunctionBegin;
27   ierr = MatGetRowMaxAbs(a->A,v,idx);CHKERRQ(ierr);
28   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
29   if (idx) {
30     for (i=0; i<A->rmap->n; i++) {if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;}
31   }
32 
33   ierr = VecCreateSeq(PETSC_COMM_SELF,A->rmap->n,&vtmp);CHKERRQ(ierr);
34   if (idx) {ierr = PetscMalloc(A->rmap->n*sizeof(PetscInt),&idxb);CHKERRQ(ierr);}
35   ierr = MatGetRowMaxAbs(a->B,vtmp,idxb);CHKERRQ(ierr);
36   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
37 
38   for (i=0; i<A->rmap->n; i++){
39     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {va[i] = vb[i]; if (idx) idx[i] = A->cmap->bs*a->garray[idxb[i]/A->cmap->bs] + (idxb[i] % A->cmap->bs);}
40   }
41 
42   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
43   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
44   if (idxb) {ierr = PetscFree(idxb);CHKERRQ(ierr);}
45   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
46   PetscFunctionReturn(0);
47 }
48 
49 EXTERN_C_BEGIN
50 #undef __FUNCT__
51 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
52 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPIBAIJ(Mat mat)
53 {
54   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
55   PetscErrorCode ierr;
56 
57   PetscFunctionBegin;
58   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
59   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
60   PetscFunctionReturn(0);
61 }
62 EXTERN_C_END
63 
64 EXTERN_C_BEGIN
65 #undef __FUNCT__
66 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
67 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPIBAIJ(Mat mat)
68 {
69   Mat_MPIBAIJ    *aij = (Mat_MPIBAIJ *)mat->data;
70   PetscErrorCode ierr;
71 
72   PetscFunctionBegin;
73   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
74   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
75   PetscFunctionReturn(0);
76 }
77 EXTERN_C_END
78 
79 /*
80      Local utility routine that creates a mapping from the global column
81    number to the local number in the off-diagonal part of the local
82    storage of the matrix.  This is done in a non scalable way since the
83    length of colmap equals the global matrix length.
84 */
85 #undef __FUNCT__
86 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
87 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
88 {
89   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
90   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
91   PetscErrorCode ierr;
92   PetscInt       nbs = B->nbs,i,bs=mat->rmap->bs;
93 
94   PetscFunctionBegin;
95 #if defined (PETSC_USE_CTABLE)
96   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
97   for (i=0; i<nbs; i++){
98     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
99   }
100 #else
101   ierr = PetscMalloc((baij->Nbs+1)*sizeof(PetscInt),&baij->colmap);CHKERRQ(ierr);
102   ierr = PetscLogObjectMemory(mat,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
103   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(PetscInt));CHKERRQ(ierr);
104   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
105 #endif
106   PetscFunctionReturn(0);
107 }
108 
109 #define CHUNKSIZE  10
110 
111 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
112 { \
113  \
114     brow = row/bs;  \
115     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
116     rmax = aimax[brow]; nrow = ailen[brow]; \
117       bcol = col/bs; \
118       ridx = row % bs; cidx = col % bs; \
119       low = 0; high = nrow; \
120       while (high-low > 3) { \
121         t = (low+high)/2; \
122         if (rp[t] > bcol) high = t; \
123         else              low  = t; \
124       } \
125       for (_i=low; _i<high; _i++) { \
126         if (rp[_i] > bcol) break; \
127         if (rp[_i] == bcol) { \
128           bap  = ap +  bs2*_i + bs*cidx + ridx; \
129           if (addv == ADD_VALUES) *bap += value;  \
130           else                    *bap  = value;  \
131           goto a_noinsert; \
132         } \
133       } \
134       if (a->nonew == 1) goto a_noinsert; \
135       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
136       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
137       N = nrow++ - 1;  \
138       /* shift up all the later entries in this row */ \
139       for (ii=N; ii>=_i; ii--) { \
140         rp[ii+1] = rp[ii]; \
141         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
142       } \
143       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
144       rp[_i]                      = bcol;  \
145       ap[bs2*_i + bs*cidx + ridx] = value;  \
146       a_noinsert:; \
147     ailen[brow] = nrow; \
148 }
149 
150 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
151 { \
152     brow = row/bs;  \
153     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
154     rmax = bimax[brow]; nrow = bilen[brow]; \
155       bcol = col/bs; \
156       ridx = row % bs; cidx = col % bs; \
157       low = 0; high = nrow; \
158       while (high-low > 3) { \
159         t = (low+high)/2; \
160         if (rp[t] > bcol) high = t; \
161         else              low  = t; \
162       } \
163       for (_i=low; _i<high; _i++) { \
164         if (rp[_i] > bcol) break; \
165         if (rp[_i] == bcol) { \
166           bap  = ap +  bs2*_i + bs*cidx + ridx; \
167           if (addv == ADD_VALUES) *bap += value;  \
168           else                    *bap  = value;  \
169           goto b_noinsert; \
170         } \
171       } \
172       if (b->nonew == 1) goto b_noinsert; \
173       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
174       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
175       CHKMEMQ;\
176       N = nrow++ - 1;  \
177       /* shift up all the later entries in this row */ \
178       for (ii=N; ii>=_i; ii--) { \
179         rp[ii+1] = rp[ii]; \
180         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
181       } \
182       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
183       rp[_i]                      = bcol;  \
184       ap[bs2*_i + bs*cidx + ridx] = value;  \
185       b_noinsert:; \
186     bilen[brow] = nrow; \
187 }
188 
189 #undef __FUNCT__
190 #define __FUNCT__ "MatSetValues_MPIBAIJ"
191 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
192 {
193   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
194   MatScalar      value;
195   PetscTruth     roworiented = baij->roworiented;
196   PetscErrorCode ierr;
197   PetscInt       i,j,row,col;
198   PetscInt       rstart_orig=mat->rmap->rstart;
199   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
200   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
201 
202   /* Some Variables required in the macro */
203   Mat            A = baij->A;
204   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)(A)->data;
205   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
206   MatScalar      *aa=a->a;
207 
208   Mat            B = baij->B;
209   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(B)->data;
210   PetscInt       *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
211   MatScalar      *ba=b->a;
212 
213   PetscInt       *rp,ii,nrow,_i,rmax,N,brow,bcol;
214   PetscInt       low,high,t,ridx,cidx,bs2=a->bs2;
215   MatScalar      *ap,*bap;
216 
217   PetscFunctionBegin;
218   if (v) PetscValidScalarPointer(v,6);
219   for (i=0; i<m; i++) {
220     if (im[i] < 0) continue;
221 #if defined(PETSC_USE_DEBUG)
222     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
223 #endif
224     if (im[i] >= rstart_orig && im[i] < rend_orig) {
225       row = im[i] - rstart_orig;
226       for (j=0; j<n; j++) {
227         if (in[j] >= cstart_orig && in[j] < cend_orig){
228           col = in[j] - cstart_orig;
229           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
230           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         } else if (in[j] < 0) continue;
233 #if defined(PETSC_USE_DEBUG)
234         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[i],mat->cmap->N-1);}
235 #endif
236         else {
237           if (mat->was_assembled) {
238             if (!baij->colmap) {
239               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
240             }
241 #if defined (PETSC_USE_CTABLE)
242             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
243             col  = col - 1;
244 #else
245             col = baij->colmap[in[j]/bs] - 1;
246 #endif
247             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
248               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
249               col =  in[j];
250               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
251               B = baij->B;
252               b = (Mat_SeqBAIJ*)(B)->data;
253               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
254               ba=b->a;
255             } else col += in[j]%bs;
256           } else col = in[j];
257           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
258           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
259           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
260         }
261       }
262     } else {
263       if (!baij->donotstash) {
264         if (roworiented) {
265           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
266         } else {
267           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
268         }
269       }
270     }
271   }
272   PetscFunctionReturn(0);
273 }
274 
275 #undef __FUNCT__
276 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
277 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
278 {
279   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
280   const PetscScalar *value;
281   MatScalar         *barray=baij->barray;
282   PetscTruth        roworiented = baij->roworiented;
283   PetscErrorCode    ierr;
284   PetscInt          i,j,ii,jj,row,col,rstart=baij->rstartbs;
285   PetscInt          rend=baij->rendbs,cstart=baij->cstartbs,stepval;
286   PetscInt          cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
287 
288   PetscFunctionBegin;
289   if(!barray) {
290     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
291     baij->barray = barray;
292   }
293 
294   if (roworiented) {
295     stepval = (n-1)*bs;
296   } else {
297     stepval = (m-1)*bs;
298   }
299   for (i=0; i<m; i++) {
300     if (im[i] < 0) continue;
301 #if defined(PETSC_USE_DEBUG)
302     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
303 #endif
304     if (im[i] >= rstart && im[i] < rend) {
305       row = im[i] - rstart;
306       for (j=0; j<n; j++) {
307         /* If NumCol = 1 then a copy is not required */
308         if ((roworiented) && (n == 1)) {
309           barray = (MatScalar*)v + i*bs2;
310         } else if((!roworiented) && (m == 1)) {
311           barray = (MatScalar*)v + j*bs2;
312         } else { /* Here a copy is required */
313           if (roworiented) {
314             value = v + i*(stepval+bs)*bs + j*bs;
315           } else {
316             value = v + j*(stepval+bs)*bs + i*bs;
317           }
318           for (ii=0; ii<bs; ii++,value+=stepval) {
319             for (jj=0; jj<bs; jj++) {
320               *barray++  = *value++;
321             }
322           }
323           barray -=bs2;
324         }
325 
326         if (in[j] >= cstart && in[j] < cend){
327           col  = in[j] - cstart;
328           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
329         }
330         else if (in[j] < 0) continue;
331 #if defined(PETSC_USE_DEBUG)
332         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
333 #endif
334         else {
335           if (mat->was_assembled) {
336             if (!baij->colmap) {
337               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
338             }
339 
340 #if defined(PETSC_USE_DEBUG)
341 #if defined (PETSC_USE_CTABLE)
342             { PetscInt data;
343               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
344               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
345             }
346 #else
347             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
348 #endif
349 #endif
350 #if defined (PETSC_USE_CTABLE)
351 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
352             col  = (col - 1)/bs;
353 #else
354             col = (baij->colmap[in[j]] - 1)/bs;
355 #endif
356             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
357               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
358               col =  in[j];
359             }
360           }
361           else col = in[j];
362           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
363         }
364       }
365     } else {
366       if (!baij->donotstash) {
367         if (roworiented) {
368           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
369         } else {
370           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
371         }
372       }
373     }
374   }
375   PetscFunctionReturn(0);
376 }
377 
378 #define HASH_KEY 0.6180339887
379 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(PetscInt)((size)*(tmp-(PetscInt)tmp)))
380 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
381 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */
382 #undef __FUNCT__
383 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
384 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
385 {
386   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
387   PetscTruth     roworiented = baij->roworiented;
388   PetscErrorCode ierr;
389   PetscInt       i,j,row,col;
390   PetscInt       rstart_orig=mat->rmap->rstart;
391   PetscInt       rend_orig=mat->rmap->rend,Nbs=baij->Nbs;
392   PetscInt       h1,key,size=baij->ht_size,bs=mat->rmap->bs,*HT=baij->ht,idx;
393   PetscReal      tmp;
394   MatScalar      **HD = baij->hd,value;
395 #if defined(PETSC_USE_DEBUG)
396   PetscInt       total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
397 #endif
398 
399   PetscFunctionBegin;
400   if (v) PetscValidScalarPointer(v,6);
401   for (i=0; i<m; i++) {
402 #if defined(PETSC_USE_DEBUG)
403     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
404     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
405 #endif
406       row = im[i];
407     if (row >= rstart_orig && row < rend_orig) {
408       for (j=0; j<n; j++) {
409         col = in[j];
410         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
411         /* Look up PetscInto the Hash Table */
412         key = (row/bs)*Nbs+(col/bs)+1;
413         h1  = HASH(size,key,tmp);
414 
415 
416         idx = h1;
417 #if defined(PETSC_USE_DEBUG)
418         insert_ct++;
419         total_ct++;
420         if (HT[idx] != key) {
421           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
422           if (idx == size) {
423             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
424             if (idx == h1) {
425               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
426             }
427           }
428         }
429 #else
430         if (HT[idx] != key) {
431           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
432           if (idx == size) {
433             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
434             if (idx == h1) {
435               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
436             }
437           }
438         }
439 #endif
440         /* A HASH table entry is found, so insert the values at the correct address */
441         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
442         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
443       }
444     } else {
445       if (!baij->donotstash) {
446         if (roworiented) {
447           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,PETSC_FALSE);CHKERRQ(ierr);
448         } else {
449           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,PETSC_FALSE);CHKERRQ(ierr);
450         }
451       }
452     }
453   }
454 #if defined(PETSC_USE_DEBUG)
455   baij->ht_total_ct = total_ct;
456   baij->ht_insert_ct = insert_ct;
457 #endif
458   PetscFunctionReturn(0);
459 }
460 
461 #undef __FUNCT__
462 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
463 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
464 {
465   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
466   PetscTruth        roworiented = baij->roworiented;
467   PetscErrorCode    ierr;
468   PetscInt          i,j,ii,jj,row,col;
469   PetscInt          rstart=baij->rstartbs;
470   PetscInt          rend=mat->rmap->rend,stepval,bs=mat->rmap->bs,bs2=baij->bs2,nbs2=n*bs2;
471   PetscInt          h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
472   PetscReal         tmp;
473   MatScalar         **HD = baij->hd,*baij_a;
474   const PetscScalar *v_t,*value;
475 #if defined(PETSC_USE_DEBUG)
476   PetscInt          total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
477 #endif
478 
479   PetscFunctionBegin;
480 
481   if (roworiented) {
482     stepval = (n-1)*bs;
483   } else {
484     stepval = (m-1)*bs;
485   }
486   for (i=0; i<m; i++) {
487 #if defined(PETSC_USE_DEBUG)
488     if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",im[i]);
489     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],baij->Mbs-1);
490 #endif
491     row   = im[i];
492     v_t   = v + i*nbs2;
493     if (row >= rstart && row < rend) {
494       for (j=0; j<n; j++) {
495         col = in[j];
496 
497         /* Look up into the Hash Table */
498         key = row*Nbs+col+1;
499         h1  = HASH(size,key,tmp);
500 
501         idx = h1;
502 #if defined(PETSC_USE_DEBUG)
503         total_ct++;
504         insert_ct++;
505        if (HT[idx] != key) {
506           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
507           if (idx == size) {
508             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
509             if (idx == h1) {
510               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
511             }
512           }
513         }
514 #else
515         if (HT[idx] != key) {
516           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
517           if (idx == size) {
518             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
519             if (idx == h1) {
520               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%D,%D) has no entry in the hash table", row, col);
521             }
522           }
523         }
524 #endif
525         baij_a = HD[idx];
526         if (roworiented) {
527           /*value = v + i*(stepval+bs)*bs + j*bs;*/
528           /* value = v + (i*(stepval+bs)+j)*bs; */
529           value = v_t;
530           v_t  += bs;
531           if (addv == ADD_VALUES) {
532             for (ii=0; ii<bs; ii++,value+=stepval) {
533               for (jj=ii; jj<bs2; jj+=bs) {
534                 baij_a[jj]  += *value++;
535               }
536             }
537           } else {
538             for (ii=0; ii<bs; ii++,value+=stepval) {
539               for (jj=ii; jj<bs2; jj+=bs) {
540                 baij_a[jj]  = *value++;
541               }
542             }
543           }
544         } else {
545           value = v + j*(stepval+bs)*bs + i*bs;
546           if (addv == ADD_VALUES) {
547             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
548               for (jj=0; jj<bs; jj++) {
549                 baij_a[jj]  += *value++;
550               }
551             }
552           } else {
553             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
554               for (jj=0; jj<bs; jj++) {
555                 baij_a[jj]  = *value++;
556               }
557             }
558           }
559         }
560       }
561     } else {
562       if (!baij->donotstash) {
563         if (roworiented) {
564           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
565         } else {
566           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
567         }
568       }
569     }
570   }
571 #if defined(PETSC_USE_DEBUG)
572   baij->ht_total_ct = total_ct;
573   baij->ht_insert_ct = insert_ct;
574 #endif
575   PetscFunctionReturn(0);
576 }
577 
578 #undef __FUNCT__
579 #define __FUNCT__ "MatGetValues_MPIBAIJ"
580 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
581 {
582   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
583   PetscErrorCode ierr;
584   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
585   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
586 
587   PetscFunctionBegin;
588   for (i=0; i<m; i++) {
589     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
590     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
591     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
592       row = idxm[i] - bsrstart;
593       for (j=0; j<n; j++) {
594         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
595         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
596         if (idxn[j] >= bscstart && idxn[j] < bscend){
597           col = idxn[j] - bscstart;
598           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
599         } else {
600           if (!baij->colmap) {
601             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
602           }
603 #if defined (PETSC_USE_CTABLE)
604           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
605           data --;
606 #else
607           data = baij->colmap[idxn[j]/bs]-1;
608 #endif
609           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
610           else {
611             col  = data + idxn[j]%bs;
612             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
613           }
614         }
615       }
616     } else {
617       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
618     }
619   }
620  PetscFunctionReturn(0);
621 }
622 
623 #undef __FUNCT__
624 #define __FUNCT__ "MatNorm_MPIBAIJ"
625 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
626 {
627   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
628   Mat_SeqBAIJ    *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
629   PetscErrorCode ierr;
630   PetscInt       i,j,bs2=baij->bs2,bs=baij->A->rmap->bs,nz,row,col;
631   PetscReal      sum = 0.0;
632   MatScalar      *v;
633 
634   PetscFunctionBegin;
635   if (baij->size == 1) {
636     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
637   } else {
638     if (type == NORM_FROBENIUS) {
639       v = amat->a;
640       nz = amat->nz*bs2;
641       for (i=0; i<nz; i++) {
642 #if defined(PETSC_USE_COMPLEX)
643         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
644 #else
645         sum += (*v)*(*v); v++;
646 #endif
647       }
648       v = bmat->a;
649       nz = bmat->nz*bs2;
650       for (i=0; i<nz; i++) {
651 #if defined(PETSC_USE_COMPLEX)
652         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
653 #else
654         sum += (*v)*(*v); v++;
655 #endif
656       }
657       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
658       *nrm = sqrt(*nrm);
659     } else if (type == NORM_1) { /* max column sum */
660       PetscReal *tmp,*tmp2;
661       PetscInt  *jj,*garray=baij->garray,cstart=baij->rstartbs;
662       ierr = PetscMalloc2(mat->cmap->N,PetscReal,&tmp,mat->cmap->N,PetscReal,&tmp2);CHKERRQ(ierr);
663       ierr = PetscMemzero(tmp,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
664       v = amat->a; jj = amat->j;
665       for (i=0; i<amat->nz; i++) {
666         for (j=0; j<bs; j++){
667           col = bs*(cstart + *jj) + j; /* column index */
668           for (row=0; row<bs; row++){
669             tmp[col] += PetscAbsScalar(*v);  v++;
670           }
671         }
672         jj++;
673       }
674       v = bmat->a; jj = bmat->j;
675       for (i=0; i<bmat->nz; i++) {
676         for (j=0; j<bs; j++){
677           col = bs*garray[*jj] + j;
678           for (row=0; row<bs; row++){
679             tmp[col] += PetscAbsScalar(*v); v++;
680           }
681         }
682         jj++;
683       }
684       ierr = MPI_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
685       *nrm = 0.0;
686       for (j=0; j<mat->cmap->N; j++) {
687         if (tmp2[j] > *nrm) *nrm = tmp2[j];
688       }
689       ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr);
690     } else if (type == NORM_INFINITY) { /* max row sum */
691       PetscReal *sums;
692       ierr = PetscMalloc(bs*sizeof(PetscReal),&sums);CHKERRQ(ierr)
693       sum = 0.0;
694       for (j=0; j<amat->mbs; j++) {
695         for (row=0; row<bs; row++) sums[row] = 0.0;
696         v = amat->a + bs2*amat->i[j];
697         nz = amat->i[j+1]-amat->i[j];
698         for (i=0; i<nz; i++) {
699           for (col=0; col<bs; col++){
700             for (row=0; row<bs; row++){
701               sums[row] += PetscAbsScalar(*v); v++;
702             }
703           }
704         }
705         v = bmat->a + bs2*bmat->i[j];
706         nz = bmat->i[j+1]-bmat->i[j];
707         for (i=0; i<nz; i++) {
708           for (col=0; col<bs; col++){
709             for (row=0; row<bs; row++){
710               sums[row] += PetscAbsScalar(*v); v++;
711             }
712           }
713         }
714         for (row=0; row<bs; row++){
715           if (sums[row] > sum) sum = sums[row];
716         }
717       }
718       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_MAX,((PetscObject)mat)->comm);CHKERRQ(ierr);
719       ierr = PetscFree(sums);CHKERRQ(ierr);
720     } else {
721       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
722     }
723   }
724   PetscFunctionReturn(0);
725 }
726 
727 /*
728   Creates the hash table, and sets the table
729   This table is created only once.
730   If new entried need to be added to the matrix
731   then the hash table has to be destroyed and
732   recreated.
733 */
734 #undef __FUNCT__
735 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
736 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
737 {
738   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
739   Mat            A = baij->A,B=baij->B;
740   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
741   PetscInt       i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
742   PetscErrorCode ierr;
743   PetscInt       ht_size,bs2=baij->bs2,rstart=baij->rstartbs;
744   PetscInt       cstart=baij->cstartbs,*garray=baij->garray,row,col,Nbs=baij->Nbs;
745   PetscInt       *HT,key;
746   MatScalar      **HD;
747   PetscReal      tmp;
748 #if defined(PETSC_USE_INFO)
749   PetscInt       ct=0,max=0;
750 #endif
751 
752   PetscFunctionBegin;
753   if (baij->ht) PetscFunctionReturn(0);
754 
755   baij->ht_size = (PetscInt)(factor*nz);
756   ht_size       = baij->ht_size;
757 
758   /* Allocate Memory for Hash Table */
759   ierr = PetscMalloc2(ht_size,MatScalar*,&baij->hd,ht_size,PetscInt,&baij->ht);CHKERRQ(ierr);
760   ierr = PetscMemzero(baij->hd,ht_size*sizeof(MatScalar*));CHKERRQ(ierr);
761   ierr = PetscMemzero(baij->ht,ht_size*sizeof(PetscInt));CHKERRQ(ierr);
762   HD   = baij->hd;
763   HT   = baij->ht;
764 
765   /* Loop Over A */
766   for (i=0; i<a->mbs; i++) {
767     for (j=ai[i]; j<ai[i+1]; j++) {
768       row = i+rstart;
769       col = aj[j]+cstart;
770 
771       key = row*Nbs + col + 1;
772       h1  = HASH(ht_size,key,tmp);
773       for (k=0; k<ht_size; k++){
774         if (!HT[(h1+k)%ht_size]) {
775           HT[(h1+k)%ht_size] = key;
776           HD[(h1+k)%ht_size] = a->a + j*bs2;
777           break;
778 #if defined(PETSC_USE_INFO)
779         } else {
780           ct++;
781 #endif
782         }
783       }
784 #if defined(PETSC_USE_INFO)
785       if (k> max) max = k;
786 #endif
787     }
788   }
789   /* Loop Over B */
790   for (i=0; i<b->mbs; i++) {
791     for (j=bi[i]; j<bi[i+1]; j++) {
792       row = i+rstart;
793       col = garray[bj[j]];
794       key = row*Nbs + col + 1;
795       h1  = HASH(ht_size,key,tmp);
796       for (k=0; k<ht_size; k++){
797         if (!HT[(h1+k)%ht_size]) {
798           HT[(h1+k)%ht_size] = key;
799           HD[(h1+k)%ht_size] = b->a + j*bs2;
800           break;
801 #if defined(PETSC_USE_INFO)
802         } else {
803           ct++;
804 #endif
805         }
806       }
807 #if defined(PETSC_USE_INFO)
808       if (k> max) max = k;
809 #endif
810     }
811   }
812 
813   /* Print Summary */
814 #if defined(PETSC_USE_INFO)
815   for (i=0,j=0; i<ht_size; i++) {
816     if (HT[i]) {j++;}
817   }
818   ierr = PetscInfo2(mat,"Average Search = %5.2f,max search = %D\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);CHKERRQ(ierr);
819 #endif
820   PetscFunctionReturn(0);
821 }
822 
823 #undef __FUNCT__
824 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
825 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
826 {
827   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
828   PetscErrorCode ierr;
829   PetscInt       nstash,reallocs;
830   InsertMode     addv;
831 
832   PetscFunctionBegin;
833   if (baij->donotstash) {
834     PetscFunctionReturn(0);
835   }
836 
837   /* make sure all processors are either in INSERTMODE or ADDMODE */
838   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
839   if (addv == (ADD_VALUES|INSERT_VALUES)) {
840     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
841   }
842   mat->insertmode = addv; /* in case this processor had no cache */
843 
844   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
845   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
846   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
847   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
848   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
849   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
850   PetscFunctionReturn(0);
851 }
852 
853 #undef __FUNCT__
854 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
855 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
856 {
857   Mat_MPIBAIJ    *baij=(Mat_MPIBAIJ*)mat->data;
858   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)baij->A->data;
859   PetscErrorCode ierr;
860   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
861   PetscInt       *row,*col;
862   PetscTruth     r1,r2,r3,other_disassembled;
863   MatScalar      *val;
864   InsertMode     addv = mat->insertmode;
865   PetscMPIInt    n;
866 
867   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
868   PetscFunctionBegin;
869   if (!baij->donotstash) {
870     while (1) {
871       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
872       if (!flg) break;
873 
874       for (i=0; i<n;) {
875         /* Now identify the consecutive vals belonging to the same row */
876         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
877         if (j < n) ncols = j-i;
878         else       ncols = n-i;
879         /* Now assemble all these values with a single function call */
880         ierr = MatSetValues_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
881         i = j;
882       }
883     }
884     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
885     /* Now process the block-stash. Since the values are stashed column-oriented,
886        set the roworiented flag to column oriented, and after MatSetValues()
887        restore the original flags */
888     r1 = baij->roworiented;
889     r2 = a->roworiented;
890     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
891     baij->roworiented = PETSC_FALSE;
892     a->roworiented    = PETSC_FALSE;
893     (((Mat_SeqBAIJ*)baij->B->data))->roworiented    = PETSC_FALSE; /* b->roworiented */
894     while (1) {
895       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
896       if (!flg) break;
897 
898       for (i=0; i<n;) {
899         /* Now identify the consecutive vals belonging to the same row */
900         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
901         if (j < n) ncols = j-i;
902         else       ncols = n-i;
903         ierr = MatSetValuesBlocked_MPIBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
904         i = j;
905       }
906     }
907     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
908     baij->roworiented = r1;
909     a->roworiented    = r2;
910     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworiented */
911   }
912 
913   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
914   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
915 
916   /* determine if any processor has disassembled, if so we must
917      also disassemble ourselfs, in order that we may reassemble. */
918   /*
919      if nonzero structure of submatrix B cannot change then we know that
920      no processor disassembled thus we can skip this stuff
921   */
922   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
923     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
924     if (mat->was_assembled && !other_disassembled) {
925       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
926     }
927   }
928 
929   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
930     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
931   }
932   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
933   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
934   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
935 
936 #if defined(PETSC_USE_INFO)
937   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
938     ierr = PetscInfo1(mat,"Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);CHKERRQ(ierr);
939     baij->ht_total_ct  = 0;
940     baij->ht_insert_ct = 0;
941   }
942 #endif
943   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
944     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
945     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
946     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
947   }
948 
949   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
950   baij->rowvalues = 0;
951   PetscFunctionReturn(0);
952 }
953 
954 #undef __FUNCT__
955 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
956 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
957 {
958   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
959   PetscErrorCode    ierr;
960   PetscMPIInt       size = baij->size,rank = baij->rank;
961   PetscInt          bs = mat->rmap->bs;
962   PetscTruth        iascii,isdraw;
963   PetscViewer       sviewer;
964   PetscViewerFormat format;
965 
966   PetscFunctionBegin;
967   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
968   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
969   if (iascii) {
970     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
971     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
972       MatInfo info;
973       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
974       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
975       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
976               rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
977               mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
978       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
979       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
980       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
981       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
982       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
983       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
984       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
985       PetscFunctionReturn(0);
986     } else if (format == PETSC_VIEWER_ASCII_INFO) {
987       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
988       PetscFunctionReturn(0);
989     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
990       PetscFunctionReturn(0);
991     }
992   }
993 
994   if (isdraw) {
995     PetscDraw       draw;
996     PetscTruth isnull;
997     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
998     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
999   }
1000 
1001   if (size == 1) {
1002     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1003     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1004   } else {
1005     /* assemble the entire matrix onto first processor. */
1006     Mat         A;
1007     Mat_SeqBAIJ *Aloc;
1008     PetscInt    M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1009     MatScalar   *a;
1010 
1011     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1012     /* Perhaps this should be the type of mat? */
1013     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
1014     if (!rank) {
1015       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
1016     } else {
1017       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
1018     }
1019     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1020     ierr = MatMPIBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1021     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
1022 
1023     /* copy over the A part */
1024     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1025     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1026     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1027 
1028     for (i=0; i<mbs; i++) {
1029       rvals[0] = bs*(baij->rstartbs + i);
1030       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1031       for (j=ai[i]; j<ai[i+1]; j++) {
1032         col = (baij->cstartbs+aj[j])*bs;
1033         for (k=0; k<bs; k++) {
1034           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1035           col++; a += bs;
1036         }
1037       }
1038     }
1039     /* copy over the B part */
1040     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1041     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1042     for (i=0; i<mbs; i++) {
1043       rvals[0] = bs*(baij->rstartbs + i);
1044       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1045       for (j=ai[i]; j<ai[i+1]; j++) {
1046         col = baij->garray[aj[j]]*bs;
1047         for (k=0; k<bs; k++) {
1048           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1049           col++; a += bs;
1050         }
1051       }
1052     }
1053     ierr = PetscFree(rvals);CHKERRQ(ierr);
1054     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1055     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1056     /*
1057        Everyone has to call to draw the matrix since the graphics waits are
1058        synchronized across all processors that share the PetscDraw object
1059     */
1060     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1061     if (!rank) {
1062       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1063       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1064     }
1065     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1066     ierr = MatDestroy(A);CHKERRQ(ierr);
1067   }
1068   PetscFunctionReturn(0);
1069 }
1070 
1071 #undef __FUNCT__
1072 #define __FUNCT__ "MatView_MPIBAIJ"
1073 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1074 {
1075   PetscErrorCode ierr;
1076   PetscTruth     iascii,isdraw,issocket,isbinary;
1077 
1078   PetscFunctionBegin;
1079   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1080   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1081   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1082   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1083   if (iascii || isdraw || issocket || isbinary) {
1084     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1085   } else {
1086     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1087   }
1088   PetscFunctionReturn(0);
1089 }
1090 
1091 #undef __FUNCT__
1092 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1093 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1094 {
1095   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1096   PetscErrorCode ierr;
1097 
1098   PetscFunctionBegin;
1099 #if defined(PETSC_USE_LOG)
1100   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
1101 #endif
1102   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1103   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1104   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1105   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1106 #if defined (PETSC_USE_CTABLE)
1107   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
1108 #else
1109   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
1110 #endif
1111   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
1112   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1113   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1114   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
1115   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
1116   ierr = PetscFree2(baij->hd,baij->ht);CHKERRQ(ierr);
1117   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
1118   ierr = PetscFree(baij);CHKERRQ(ierr);
1119 
1120   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
1121   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1122   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1123   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
1124   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1125   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1126   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C","",PETSC_NULL);CHKERRQ(ierr);
1127   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatSetHashTableFactor_C","",PETSC_NULL);CHKERRQ(ierr);
1128   PetscFunctionReturn(0);
1129 }
1130 
1131 #undef __FUNCT__
1132 #define __FUNCT__ "MatMult_MPIBAIJ"
1133 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1134 {
1135   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1136   PetscErrorCode ierr;
1137   PetscInt       nt;
1138 
1139   PetscFunctionBegin;
1140   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1141   if (nt != A->cmap->n) {
1142     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1143   }
1144   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1145   if (nt != A->rmap->n) {
1146     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1147   }
1148   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1149   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1150   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1151   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1152   PetscFunctionReturn(0);
1153 }
1154 
1155 #undef __FUNCT__
1156 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1157 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1158 {
1159   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1160   PetscErrorCode ierr;
1161 
1162   PetscFunctionBegin;
1163   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1164   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1165   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1166   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1167   PetscFunctionReturn(0);
1168 }
1169 
1170 #undef __FUNCT__
1171 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1172 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1173 {
1174   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1175   PetscErrorCode ierr;
1176   PetscTruth     merged;
1177 
1178   PetscFunctionBegin;
1179   ierr = VecScatterGetMerged(a->Mvctx,&merged);CHKERRQ(ierr);
1180   /* do nondiagonal part */
1181   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1182   if (!merged) {
1183     /* send it on its way */
1184     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1185     /* do local part */
1186     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1187     /* receive remote parts: note this assumes the values are not actually */
1188     /* inserted in yy until the next line */
1189     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1190   } else {
1191     /* do local part */
1192     ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1193     /* send it on its way */
1194     ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1195     /* values actually were received in the Begin() but we need to call this nop */
1196     ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1197   }
1198   PetscFunctionReturn(0);
1199 }
1200 
1201 #undef __FUNCT__
1202 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1203 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1204 {
1205   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1206   PetscErrorCode ierr;
1207 
1208   PetscFunctionBegin;
1209   /* do nondiagonal part */
1210   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1211   /* send it on its way */
1212   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1213   /* do local part */
1214   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1215   /* receive remote parts: note this assumes the values are not actually */
1216   /* inserted in yy until the next line, which is true for my implementation*/
1217   /* but is not perhaps always true. */
1218   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1219   PetscFunctionReturn(0);
1220 }
1221 
1222 /*
1223   This only works correctly for square matrices where the subblock A->A is the
1224    diagonal block
1225 */
1226 #undef __FUNCT__
1227 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1228 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1229 {
1230   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1231   PetscErrorCode ierr;
1232 
1233   PetscFunctionBegin;
1234   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1235   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1236   PetscFunctionReturn(0);
1237 }
1238 
1239 #undef __FUNCT__
1240 #define __FUNCT__ "MatScale_MPIBAIJ"
1241 PetscErrorCode MatScale_MPIBAIJ(Mat A,PetscScalar aa)
1242 {
1243   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1244   PetscErrorCode ierr;
1245 
1246   PetscFunctionBegin;
1247   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1248   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1249   PetscFunctionReturn(0);
1250 }
1251 
1252 #undef __FUNCT__
1253 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1254 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1255 {
1256   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
1257   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1258   PetscErrorCode ierr;
1259   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1260   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
1261   PetscInt       *cmap,*idx_p,cstart = mat->cstartbs;
1262 
1263   PetscFunctionBegin;
1264   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1265   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1266   mat->getrowactive = PETSC_TRUE;
1267 
1268   if (!mat->rowvalues && (idx || v)) {
1269     /*
1270         allocate enough space to hold information from the longest row.
1271     */
1272     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1273     PetscInt     max = 1,mbs = mat->mbs,tmp;
1274     for (i=0; i<mbs; i++) {
1275       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1276       if (max < tmp) { max = tmp; }
1277     }
1278     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1279   }
1280   lrow = row - brstart;
1281 
1282   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1283   if (!v)   {pvA = 0; pvB = 0;}
1284   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1285   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1286   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1287   nztot = nzA + nzB;
1288 
1289   cmap  = mat->garray;
1290   if (v  || idx) {
1291     if (nztot) {
1292       /* Sort by increasing column numbers, assuming A and B already sorted */
1293       PetscInt imark = -1;
1294       if (v) {
1295         *v = v_p = mat->rowvalues;
1296         for (i=0; i<nzB; i++) {
1297           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1298           else break;
1299         }
1300         imark = i;
1301         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1302         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1303       }
1304       if (idx) {
1305         *idx = idx_p = mat->rowindices;
1306         if (imark > -1) {
1307           for (i=0; i<imark; i++) {
1308             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1309           }
1310         } else {
1311           for (i=0; i<nzB; i++) {
1312             if (cmap[cworkB[i]/bs] < cstart)
1313               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1314             else break;
1315           }
1316           imark = i;
1317         }
1318         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1319         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1320       }
1321     } else {
1322       if (idx) *idx = 0;
1323       if (v)   *v   = 0;
1324     }
1325   }
1326   *nz = nztot;
1327   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1328   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1329   PetscFunctionReturn(0);
1330 }
1331 
1332 #undef __FUNCT__
1333 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1334 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1335 {
1336   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1337 
1338   PetscFunctionBegin;
1339   if (!baij->getrowactive) {
1340     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1341   }
1342   baij->getrowactive = PETSC_FALSE;
1343   PetscFunctionReturn(0);
1344 }
1345 
1346 #undef __FUNCT__
1347 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1348 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1349 {
1350   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1351   PetscErrorCode ierr;
1352 
1353   PetscFunctionBegin;
1354   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1355   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1356   PetscFunctionReturn(0);
1357 }
1358 
1359 #undef __FUNCT__
1360 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1361 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1362 {
1363   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)matin->data;
1364   Mat            A = a->A,B = a->B;
1365   PetscErrorCode ierr;
1366   PetscReal      isend[5],irecv[5];
1367 
1368   PetscFunctionBegin;
1369   info->block_size     = (PetscReal)matin->rmap->bs;
1370   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1371   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1372   isend[3] = info->memory;  isend[4] = info->mallocs;
1373   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1374   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1375   isend[3] += info->memory;  isend[4] += info->mallocs;
1376   if (flag == MAT_LOCAL) {
1377     info->nz_used      = isend[0];
1378     info->nz_allocated = isend[1];
1379     info->nz_unneeded  = isend[2];
1380     info->memory       = isend[3];
1381     info->mallocs      = isend[4];
1382   } else if (flag == MAT_GLOBAL_MAX) {
1383     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1384     info->nz_used      = irecv[0];
1385     info->nz_allocated = irecv[1];
1386     info->nz_unneeded  = irecv[2];
1387     info->memory       = irecv[3];
1388     info->mallocs      = irecv[4];
1389   } else if (flag == MAT_GLOBAL_SUM) {
1390     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1391     info->nz_used      = irecv[0];
1392     info->nz_allocated = irecv[1];
1393     info->nz_unneeded  = irecv[2];
1394     info->memory       = irecv[3];
1395     info->mallocs      = irecv[4];
1396   } else {
1397     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1398   }
1399   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1400   info->fill_ratio_needed = 0;
1401   info->factor_mallocs    = 0;
1402   PetscFunctionReturn(0);
1403 }
1404 
1405 #undef __FUNCT__
1406 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1407 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op,PetscTruth flg)
1408 {
1409   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ*)A->data;
1410   PetscErrorCode ierr;
1411 
1412   PetscFunctionBegin;
1413   switch (op) {
1414   case MAT_NEW_NONZERO_LOCATIONS:
1415   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1416   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1417   case MAT_KEEP_NONZERO_PATTERN:
1418   case MAT_NEW_NONZERO_LOCATION_ERR:
1419     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1420     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1421     break;
1422   case MAT_ROW_ORIENTED:
1423     a->roworiented = flg;
1424     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1425     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1426     break;
1427   case MAT_NEW_DIAGONALS:
1428     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1429     break;
1430   case MAT_IGNORE_OFF_PROC_ENTRIES:
1431     a->donotstash = flg;
1432     break;
1433   case MAT_USE_HASH_TABLE:
1434     a->ht_flag = flg;
1435     break;
1436   case MAT_SYMMETRIC:
1437   case MAT_STRUCTURALLY_SYMMETRIC:
1438   case MAT_HERMITIAN:
1439   case MAT_SYMMETRY_ETERNAL:
1440     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1441     break;
1442   default:
1443     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1444   }
1445   PetscFunctionReturn(0);
1446 }
1447 
1448 #undef __FUNCT__
1449 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1450 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,MatReuse reuse,Mat *matout)
1451 {
1452   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)A->data;
1453   Mat_SeqBAIJ    *Aloc;
1454   Mat            B;
1455   PetscErrorCode ierr;
1456   PetscInt       M=A->rmap->N,N=A->cmap->N,*ai,*aj,i,*rvals,j,k,col;
1457   PetscInt       bs=A->rmap->bs,mbs=baij->mbs;
1458   MatScalar      *a;
1459 
1460   PetscFunctionBegin;
1461   if (reuse == MAT_REUSE_MATRIX && A == *matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1462   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1463     ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
1464     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1465     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1466     ierr = MatMPIBAIJSetPreallocation(B,A->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1467   } else {
1468     B = *matout;
1469   }
1470 
1471   /* copy over the A part */
1472   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1473   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1474   ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
1475 
1476   for (i=0; i<mbs; i++) {
1477     rvals[0] = bs*(baij->rstartbs + i);
1478     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1479     for (j=ai[i]; j<ai[i+1]; j++) {
1480       col = (baij->cstartbs+aj[j])*bs;
1481       for (k=0; k<bs; k++) {
1482         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1483         col++; a += bs;
1484       }
1485     }
1486   }
1487   /* copy over the B part */
1488   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1489   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1490   for (i=0; i<mbs; i++) {
1491     rvals[0] = bs*(baij->rstartbs + i);
1492     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1493     for (j=ai[i]; j<ai[i+1]; j++) {
1494       col = baij->garray[aj[j]]*bs;
1495       for (k=0; k<bs; k++) {
1496         ierr = MatSetValues_MPIBAIJ(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1497         col++; a += bs;
1498       }
1499     }
1500   }
1501   ierr = PetscFree(rvals);CHKERRQ(ierr);
1502   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1503   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1504 
1505   if (reuse == MAT_INITIAL_MATRIX || *matout != A) {
1506     *matout = B;
1507   } else {
1508     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1509   }
1510   PetscFunctionReturn(0);
1511 }
1512 
1513 #undef __FUNCT__
1514 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1515 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1516 {
1517   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*)mat->data;
1518   Mat            a = baij->A,b = baij->B;
1519   PetscErrorCode ierr;
1520   PetscInt       s1,s2,s3;
1521 
1522   PetscFunctionBegin;
1523   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1524   if (rr) {
1525     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1526     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1527     /* Overlap communication with computation. */
1528     ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1529   }
1530   if (ll) {
1531     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1532     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1533     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1534   }
1535   /* scale  the diagonal block */
1536   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1537 
1538   if (rr) {
1539     /* Do a scatter end and then right scale the off-diagonal block */
1540     ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1541     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1542   }
1543 
1544   PetscFunctionReturn(0);
1545 }
1546 
1547 #undef __FUNCT__
1548 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1549 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag)
1550 {
1551   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1552   PetscErrorCode ierr;
1553   PetscMPIInt    imdex,size = l->size,n,rank = l->rank;
1554   PetscInt       i,*owners = A->rmap->range;
1555   PetscInt       *nprocs,j,idx,nsends,row;
1556   PetscInt       nmax,*svalues,*starts,*owner,nrecvs;
1557   PetscInt       *rvalues,tag = ((PetscObject)A)->tag,count,base,slen,*source,lastidx = -1;
1558   PetscInt       *lens,*lrows,*values,rstart_bs=A->rmap->rstart;
1559   MPI_Comm       comm = ((PetscObject)A)->comm;
1560   MPI_Request    *send_waits,*recv_waits;
1561   MPI_Status     recv_status,*send_status;
1562 #if defined(PETSC_DEBUG)
1563   PetscTruth     found = PETSC_FALSE;
1564 #endif
1565 
1566   PetscFunctionBegin;
1567   /*  first count number of contributors to each processor */
1568   ierr  = PetscMalloc(2*size*sizeof(PetscInt),&nprocs);CHKERRQ(ierr);
1569   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscInt));CHKERRQ(ierr);
1570   ierr  = PetscMalloc((N+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); /* see note*/
1571   j = 0;
1572   for (i=0; i<N; i++) {
1573     if (lastidx > (idx = rows[i])) j = 0;
1574     lastidx = idx;
1575     for (; j<size; j++) {
1576       if (idx >= owners[j] && idx < owners[j+1]) {
1577         nprocs[2*j]++;
1578         nprocs[2*j+1] = 1;
1579         owner[i] = j;
1580 #if defined(PETSC_DEBUG)
1581         found = PETSC_TRUE;
1582 #endif
1583         break;
1584       }
1585     }
1586 #if defined(PETSC_DEBUG)
1587     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1588     found = PETSC_FALSE;
1589 #endif
1590   }
1591   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1592 
1593   /* inform other processors of number of messages and max length*/
1594   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1595 
1596   /* post receives:   */
1597   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(PetscInt),&rvalues);CHKERRQ(ierr);
1598   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1599   for (i=0; i<nrecvs; i++) {
1600     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1601   }
1602 
1603   /* do sends:
1604      1) starts[i] gives the starting index in svalues for stuff going to
1605      the ith processor
1606   */
1607   ierr = PetscMalloc((N+1)*sizeof(PetscInt),&svalues);CHKERRQ(ierr);
1608   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1609   ierr = PetscMalloc((size+1)*sizeof(PetscInt),&starts);CHKERRQ(ierr);
1610   starts[0]  = 0;
1611   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1612   for (i=0; i<N; i++) {
1613     svalues[starts[owner[i]]++] = rows[i];
1614   }
1615 
1616   starts[0] = 0;
1617   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1618   count = 0;
1619   for (i=0; i<size; i++) {
1620     if (nprocs[2*i+1]) {
1621       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPIU_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1622     }
1623   }
1624   ierr = PetscFree(starts);CHKERRQ(ierr);
1625 
1626   base = owners[rank];
1627 
1628   /*  wait on receives */
1629   ierr   = PetscMalloc2(nrecvs+1,PetscInt,&lens,nrecvs+1,PetscInt,&source);CHKERRQ(ierr);
1630   count  = nrecvs;
1631   slen = 0;
1632   while (count) {
1633     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1634     /* unpack receives into our local space */
1635     ierr = MPI_Get_count(&recv_status,MPIU_INT,&n);CHKERRQ(ierr);
1636     source[imdex]  = recv_status.MPI_SOURCE;
1637     lens[imdex]    = n;
1638     slen          += n;
1639     count--;
1640   }
1641   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1642 
1643   /* move the data into the send scatter */
1644   ierr = PetscMalloc((slen+1)*sizeof(PetscInt),&lrows);CHKERRQ(ierr);
1645   count = 0;
1646   for (i=0; i<nrecvs; i++) {
1647     values = rvalues + i*nmax;
1648     for (j=0; j<lens[i]; j++) {
1649       lrows[count++] = values[j] - base;
1650     }
1651   }
1652   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1653   ierr = PetscFree2(lens,source);CHKERRQ(ierr);
1654   ierr = PetscFree(owner);CHKERRQ(ierr);
1655   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1656 
1657   /* actually zap the local rows */
1658   /*
1659         Zero the required rows. If the "diagonal block" of the matrix
1660      is square and the user wishes to set the diagonal we use separate
1661      code so that MatSetValues() is not called for each diagonal allocating
1662      new memory, thus calling lots of mallocs and slowing things down.
1663 
1664   */
1665   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1666   ierr = MatZeroRows_SeqBAIJ(l->B,slen,lrows,0.0);CHKERRQ(ierr);
1667   if ((diag != 0.0) && (l->A->rmap->N == l->A->cmap->N)) {
1668     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,diag);CHKERRQ(ierr);
1669   } else if (diag != 0.0) {
1670     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1671     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1672       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1673 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1674     }
1675     for (i=0; i<slen; i++) {
1676       row  = lrows[i] + rstart_bs;
1677       ierr = MatSetValues(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
1678     }
1679     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1680     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1681   } else {
1682     ierr = MatZeroRows_SeqBAIJ(l->A,slen,lrows,0.0);CHKERRQ(ierr);
1683   }
1684 
1685   ierr = PetscFree(lrows);CHKERRQ(ierr);
1686 
1687   /* wait on sends */
1688   if (nsends) {
1689     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1690     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1691     ierr = PetscFree(send_status);CHKERRQ(ierr);
1692   }
1693   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1694   ierr = PetscFree(svalues);CHKERRQ(ierr);
1695 
1696   PetscFunctionReturn(0);
1697 }
1698 
1699 #undef __FUNCT__
1700 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1701 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1702 {
1703   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1704   PetscErrorCode ierr;
1705 
1706   PetscFunctionBegin;
1707   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1708   PetscFunctionReturn(0);
1709 }
1710 
1711 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1712 
1713 #undef __FUNCT__
1714 #define __FUNCT__ "MatEqual_MPIBAIJ"
1715 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1716 {
1717   Mat_MPIBAIJ    *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1718   Mat            a,b,c,d;
1719   PetscTruth     flg;
1720   PetscErrorCode ierr;
1721 
1722   PetscFunctionBegin;
1723   a = matA->A; b = matA->B;
1724   c = matB->A; d = matB->B;
1725 
1726   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1727   if (flg) {
1728     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1729   }
1730   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1731   PetscFunctionReturn(0);
1732 }
1733 
1734 #undef __FUNCT__
1735 #define __FUNCT__ "MatCopy_MPIBAIJ"
1736 PetscErrorCode MatCopy_MPIBAIJ(Mat A,Mat B,MatStructure str)
1737 {
1738   PetscErrorCode ierr;
1739   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
1740   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ *)B->data;
1741 
1742   PetscFunctionBegin;
1743   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1744   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1745     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1746   } else {
1747     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1748     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1749   }
1750   PetscFunctionReturn(0);
1751 }
1752 
1753 #undef __FUNCT__
1754 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1755 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1756 {
1757   PetscErrorCode ierr;
1758 
1759   PetscFunctionBegin;
1760   ierr =  MatMPIBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1761   PetscFunctionReturn(0);
1762 }
1763 
1764 #undef __FUNCT__
1765 #define __FUNCT__ "MatAXPY_MPIBAIJ"
1766 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1767 {
1768   PetscErrorCode ierr;
1769   Mat_MPIBAIJ    *xx=(Mat_MPIBAIJ *)X->data,*yy=(Mat_MPIBAIJ *)Y->data;
1770   PetscBLASInt   bnz,one=1;
1771   Mat_SeqBAIJ    *x,*y;
1772 
1773   PetscFunctionBegin;
1774   if (str == SAME_NONZERO_PATTERN) {
1775     PetscScalar alpha = a;
1776     x = (Mat_SeqBAIJ *)xx->A->data;
1777     y = (Mat_SeqBAIJ *)yy->A->data;
1778     bnz = PetscBLASIntCast(x->nz);
1779     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1780     x = (Mat_SeqBAIJ *)xx->B->data;
1781     y = (Mat_SeqBAIJ *)yy->B->data;
1782     bnz = PetscBLASIntCast(x->nz);
1783     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1784   } else {
1785     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1786   }
1787   PetscFunctionReturn(0);
1788 }
1789 
1790 #undef __FUNCT__
1791 #define __FUNCT__ "MatSetBlockSize_MPIBAIJ"
1792 PetscErrorCode MatSetBlockSize_MPIBAIJ(Mat A,PetscInt bs)
1793 {
1794   Mat_MPIBAIJ    *a   = (Mat_MPIBAIJ*)A->data;
1795   PetscInt rbs,cbs;
1796   PetscErrorCode ierr;
1797 
1798   PetscFunctionBegin;
1799   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1800   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1801   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
1802   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
1803   if (rbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
1804   if (cbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
1805   PetscFunctionReturn(0);
1806 }
1807 
1808 #undef __FUNCT__
1809 #define __FUNCT__ "MatRealPart_MPIBAIJ"
1810 PetscErrorCode MatRealPart_MPIBAIJ(Mat A)
1811 {
1812   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1813   PetscErrorCode ierr;
1814 
1815   PetscFunctionBegin;
1816   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1817   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1818   PetscFunctionReturn(0);
1819 }
1820 
1821 #undef __FUNCT__
1822 #define __FUNCT__ "MatImaginaryPart_MPIBAIJ"
1823 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A)
1824 {
1825   Mat_MPIBAIJ   *a = (Mat_MPIBAIJ*)A->data;
1826   PetscErrorCode ierr;
1827 
1828   PetscFunctionBegin;
1829   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1830   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1831   PetscFunctionReturn(0);
1832 }
1833 
1834 #undef __FUNCT__
1835 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
1836 PetscErrorCode MatGetSubMatrix_MPIBAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
1837 {
1838   PetscErrorCode ierr;
1839   IS             iscol_local;
1840   PetscInt       csize;
1841 
1842   PetscFunctionBegin;
1843   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
1844   if (call == MAT_REUSE_MATRIX) {
1845     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
1846     if (!iscol_local) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1847   } else {
1848     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
1849   }
1850   ierr = MatGetSubMatrix_MPIBAIJ_Private(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
1851   if (call == MAT_INITIAL_MATRIX) {
1852     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
1853     ierr = ISDestroy(iscol_local);CHKERRQ(ierr);
1854   }
1855   PetscFunctionReturn(0);
1856 }
1857 
1858 #undef __FUNCT__
1859 #define __FUNCT__ "MatGetSubMatrix_MPIBAIJ"
1860 /*
1861     Not great since it makes two copies of the submatrix, first an SeqBAIJ
1862   in local and then by concatenating the local matrices the end result.
1863   Writing it directly would be much like MatGetSubMatrices_MPIBAIJ()
1864 */
1865 PetscErrorCode MatGetSubMatrix_MPIBAIJ_Private(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
1866 {
1867   PetscErrorCode ierr;
1868   PetscMPIInt    rank,size;
1869   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs;
1870   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
1871   Mat            *local,M,Mreuse;
1872   MatScalar      *vwork,*aa;
1873   MPI_Comm       comm = ((PetscObject)mat)->comm;
1874   Mat_SeqBAIJ    *aij;
1875 
1876 
1877   PetscFunctionBegin;
1878   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1879   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1880 
1881   if (call ==  MAT_REUSE_MATRIX) {
1882     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject *)&Mreuse);CHKERRQ(ierr);
1883     if (!Mreuse) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
1884     local = &Mreuse;
1885     ierr  = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,&local);CHKERRQ(ierr);
1886   } else {
1887     ierr   = MatGetSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&local);CHKERRQ(ierr);
1888     Mreuse = *local;
1889     ierr   = PetscFree(local);CHKERRQ(ierr);
1890   }
1891 
1892   /*
1893       m - number of local rows
1894       n - number of columns (same on all processors)
1895       rstart - first row in new global matrix generated
1896   */
1897   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
1898   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
1899   m    = m/bs;
1900   n    = n/bs;
1901 
1902   if (call == MAT_INITIAL_MATRIX) {
1903     aij = (Mat_SeqBAIJ*)(Mreuse)->data;
1904     ii  = aij->i;
1905     jj  = aij->j;
1906 
1907     /*
1908         Determine the number of non-zeros in the diagonal and off-diagonal
1909         portions of the matrix in order to do correct preallocation
1910     */
1911 
1912     /* first get start and end of "diagonal" columns */
1913     if (csize == PETSC_DECIDE) {
1914       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
1915       if (mglobal == n*bs) { /* square matrix */
1916 	nlocal = m;
1917       } else {
1918         nlocal = n/size + ((n % size) > rank);
1919       }
1920     } else {
1921       nlocal = csize/bs;
1922     }
1923     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
1924     rstart = rend - nlocal;
1925     if (rank == size - 1 && rend != n) {
1926       SETERRQ2(PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
1927     }
1928 
1929     /* next, compute all the lengths */
1930     ierr  = PetscMalloc((2*m+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
1931     olens = dlens + m;
1932     for (i=0; i<m; i++) {
1933       jend = ii[i+1] - ii[i];
1934       olen = 0;
1935       dlen = 0;
1936       for (j=0; j<jend; j++) {
1937         if (*jj < rstart || *jj >= rend) olen++;
1938         else dlen++;
1939         jj++;
1940       }
1941       olens[i] = olen;
1942       dlens[i] = dlen;
1943     }
1944     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
1945     ierr = MatSetSizes(M,bs*m,bs*nlocal,PETSC_DECIDE,bs*n);CHKERRQ(ierr);
1946     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
1947     ierr = MatMPIBAIJSetPreallocation(M,bs,0,dlens,0,olens);CHKERRQ(ierr);
1948     ierr = PetscFree(dlens);CHKERRQ(ierr);
1949   } else {
1950     PetscInt ml,nl;
1951 
1952     M = *newmat;
1953     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
1954     if (ml != m) SETERRQ(PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
1955     ierr = MatZeroEntries(M);CHKERRQ(ierr);
1956     /*
1957          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
1958        rather than the slower MatSetValues().
1959     */
1960     M->was_assembled = PETSC_TRUE;
1961     M->assembled     = PETSC_FALSE;
1962   }
1963   ierr = MatSetOption(M,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr);
1964   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
1965   aij = (Mat_SeqBAIJ*)(Mreuse)->data;
1966   ii  = aij->i;
1967   jj  = aij->j;
1968   aa  = aij->a;
1969   for (i=0; i<m; i++) {
1970     row   = rstart/bs + i;
1971     nz    = ii[i+1] - ii[i];
1972     cwork = jj;     jj += nz;
1973     vwork = aa;     aa += nz;
1974     ierr = MatSetValuesBlocked_MPIBAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
1975   }
1976 
1977   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1978   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1979   *newmat = M;
1980 
1981   /* save submatrix used in processor for next request */
1982   if (call ==  MAT_INITIAL_MATRIX) {
1983     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
1984     ierr = PetscObjectDereference((PetscObject)Mreuse);CHKERRQ(ierr);
1985   }
1986 
1987   PetscFunctionReturn(0);
1988 }
1989 
1990 #undef __FUNCT__
1991 #define __FUNCT__ "MatPermute_MPIBAIJ"
1992 PetscErrorCode MatPermute_MPIBAIJ(Mat A,IS rowp,IS colp,Mat *B)
1993 {
1994   MPI_Comm       comm,pcomm;
1995   PetscInt       first,local_size,nrows;
1996   const PetscInt *rows;
1997   PetscMPIInt    size;
1998   IS             crowp,growp,irowp,lrowp,lcolp,icolp;
1999   PetscErrorCode ierr;
2000 
2001   PetscFunctionBegin;
2002   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
2003   /* make a collective version of 'rowp' */
2004   ierr = PetscObjectGetComm((PetscObject)rowp,&pcomm);CHKERRQ(ierr);
2005   if (pcomm==comm) {
2006     crowp = rowp;
2007   } else {
2008     ierr = ISGetSize(rowp,&nrows);CHKERRQ(ierr);
2009     ierr = ISGetIndices(rowp,&rows);CHKERRQ(ierr);
2010     ierr = ISCreateGeneral(comm,nrows,rows,&crowp);CHKERRQ(ierr);
2011     ierr = ISRestoreIndices(rowp,&rows);CHKERRQ(ierr);
2012   }
2013   /* collect the global row permutation and invert it */
2014   ierr = ISAllGather(crowp,&growp);CHKERRQ(ierr);
2015   ierr = ISSetPermutation(growp);CHKERRQ(ierr);
2016   if (pcomm!=comm) {
2017     ierr = ISDestroy(crowp);CHKERRQ(ierr);
2018   }
2019   ierr = ISInvertPermutation(growp,PETSC_DECIDE,&irowp);CHKERRQ(ierr);
2020   /* get the local target indices */
2021   ierr = MatGetOwnershipRange(A,&first,PETSC_NULL);CHKERRQ(ierr);
2022   ierr = MatGetLocalSize(A,&local_size,PETSC_NULL);CHKERRQ(ierr);
2023   ierr = ISGetIndices(irowp,&rows);CHKERRQ(ierr);
2024   ierr = ISCreateGeneral(MPI_COMM_SELF,local_size,rows+first,&lrowp);CHKERRQ(ierr);
2025   ierr = ISRestoreIndices(irowp,&rows);CHKERRQ(ierr);
2026   ierr = ISDestroy(irowp);CHKERRQ(ierr);
2027   /* the column permutation is so much easier;
2028      make a local version of 'colp' and invert it */
2029   ierr = PetscObjectGetComm((PetscObject)colp,&pcomm);CHKERRQ(ierr);
2030   ierr = MPI_Comm_size(pcomm,&size);CHKERRQ(ierr);
2031   if (size==1) {
2032     lcolp = colp;
2033   } else {
2034     ierr = ISGetSize(colp,&nrows);CHKERRQ(ierr);
2035     ierr = ISGetIndices(colp,&rows);CHKERRQ(ierr);
2036     ierr = ISCreateGeneral(MPI_COMM_SELF,nrows,rows,&lcolp);CHKERRQ(ierr);
2037   }
2038   ierr = ISSetPermutation(lcolp);CHKERRQ(ierr);
2039   ierr = ISInvertPermutation(lcolp,PETSC_DECIDE,&icolp);CHKERRQ(ierr);
2040   ierr = ISSetPermutation(icolp);CHKERRQ(ierr);
2041   if (size>1) {
2042     ierr = ISRestoreIndices(colp,&rows);CHKERRQ(ierr);
2043     ierr = ISDestroy(lcolp);CHKERRQ(ierr);
2044   }
2045   /* now we just get the submatrix */
2046   ierr = MatGetSubMatrix_MPIBAIJ_Private(A,lrowp,icolp,local_size,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr);
2047   /* clean up */
2048   ierr = ISDestroy(lrowp);CHKERRQ(ierr);
2049   ierr = ISDestroy(icolp);CHKERRQ(ierr);
2050   PetscFunctionReturn(0);
2051 }
2052 
2053 #undef __FUNCT__
2054 #define __FUNCT__ "MatGetGhosts_MPIBAIJ"
2055 PetscErrorCode PETSCMAT_DLLEXPORT MatGetGhosts_MPIBAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
2056 {
2057   Mat_MPIBAIJ    *baij = (Mat_MPIBAIJ*) mat->data;
2058   Mat_SeqBAIJ    *B = (Mat_SeqBAIJ*)baij->B->data;
2059 
2060   PetscFunctionBegin;
2061   if (nghosts) { *nghosts = B->nbs;}
2062   if (ghosts) {*ghosts = baij->garray;}
2063   PetscFunctionReturn(0);
2064 }
2065 
2066 EXTERN PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat);
2067 
2068 #undef __FUNCT__
2069 #define __FUNCT__ "MatFDColoringCreate_MPIBAIJ"
2070 /*
2071     This routine is almost identical to MatFDColoringCreate_MPIBAIJ()!
2072 */
2073 PetscErrorCode MatFDColoringCreate_MPIBAIJ(Mat mat,ISColoring iscoloring,MatFDColoring c)
2074 {
2075   Mat_MPIBAIJ            *baij = (Mat_MPIBAIJ*)mat->data;
2076   PetscErrorCode        ierr;
2077   PetscMPIInt           size,*ncolsonproc,*disp,nn;
2078   PetscInt              bs,i,n,nrows,j,k,m,*rows = 0,*A_ci,*A_cj,ncols,col;
2079   const PetscInt        *is;
2080   PetscInt              nis = iscoloring->n,nctot,*cols,*B_ci,*B_cj;
2081   PetscInt              *rowhit,M,cstart,cend,colb;
2082   PetscInt              *columnsforrow,l;
2083   IS                    *isa;
2084   PetscTruth             done,flg;
2085   ISLocalToGlobalMapping map = mat->bmapping;
2086   PetscInt               *ltog = (map ? map->indices : (PetscInt*) PETSC_NULL) ,ctype=c->ctype;
2087 
2088   PetscFunctionBegin;
2089   if (!mat->assembled) {
2090     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Matrix must be assembled first; MatAssemblyBegin/End();");
2091   }
2092   if (ctype == IS_COLORING_GHOSTED && !map) SETERRQ(PETSC_ERR_ARG_INCOMP,"When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMappingBlock");
2093 
2094   ierr = ISColoringGetIS(iscoloring,PETSC_IGNORE,&isa);CHKERRQ(ierr);
2095 
2096   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
2097   M                = mat->rmap->n/bs;
2098   cstart           = mat->cmap->rstart/bs;
2099   cend             = mat->cmap->rend/bs;
2100   c->M             = mat->rmap->N/bs;  /* set the global rows and columns and local rows */
2101   c->N             = mat->cmap->N/bs;
2102   c->m             = mat->rmap->n/bs;
2103   c->rstart        = mat->rmap->rstart/bs;
2104 
2105   c->ncolors       = nis;
2106   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->ncolumns);CHKERRQ(ierr);
2107   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columns);CHKERRQ(ierr);
2108   ierr             = PetscMalloc(nis*sizeof(PetscInt),&c->nrows);CHKERRQ(ierr);
2109   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->rows);CHKERRQ(ierr);
2110   ierr             = PetscMalloc(nis*sizeof(PetscInt*),&c->columnsforrow);CHKERRQ(ierr);
2111   ierr = PetscLogObjectMemory(c,5*nis*sizeof(PetscInt));CHKERRQ(ierr);
2112 
2113   /* Allow access to data structures of local part of matrix */
2114   if (!baij->colmap) {
2115     ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
2116   }
2117   ierr = MatGetColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2118   ierr = MatGetColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2119 
2120   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&rowhit);CHKERRQ(ierr);
2121   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&columnsforrow);CHKERRQ(ierr);
2122 
2123   for (i=0; i<nis; i++) {
2124     ierr = ISGetLocalSize(isa[i],&n);CHKERRQ(ierr);
2125     ierr = ISGetIndices(isa[i],&is);CHKERRQ(ierr);
2126     c->ncolumns[i] = n;
2127     if (n) {
2128       ierr = PetscMalloc(n*sizeof(PetscInt),&c->columns[i]);CHKERRQ(ierr);
2129       ierr = PetscLogObjectMemory(c,n*sizeof(PetscInt));CHKERRQ(ierr);
2130       ierr = PetscMemcpy(c->columns[i],is,n*sizeof(PetscInt));CHKERRQ(ierr);
2131     } else {
2132       c->columns[i]  = 0;
2133     }
2134 
2135     if (ctype == IS_COLORING_GLOBAL){
2136       /* Determine the total (parallel) number of columns of this color */
2137       ierr = MPI_Comm_size(((PetscObject)mat)->comm,&size);CHKERRQ(ierr);
2138       ierr = PetscMalloc2(size,PetscMPIInt,&ncolsonproc,size,PetscMPIInt,&disp);CHKERRQ(ierr);
2139 
2140       nn   = PetscMPIIntCast(n);
2141       ierr = MPI_Allgather(&nn,1,MPI_INT,ncolsonproc,1,MPI_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2142       nctot = 0; for (j=0; j<size; j++) {nctot += ncolsonproc[j];}
2143       if (!nctot) {
2144         ierr = PetscInfo(mat,"Coloring of matrix has some unneeded colors with no corresponding rows\n");CHKERRQ(ierr);
2145       }
2146 
2147       disp[0] = 0;
2148       for (j=1; j<size; j++) {
2149         disp[j] = disp[j-1] + ncolsonproc[j-1];
2150       }
2151 
2152       /* Get complete list of columns for color on each processor */
2153       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2154       ierr = MPI_Allgatherv((void*)is,n,MPIU_INT,cols,ncolsonproc,disp,MPIU_INT,((PetscObject)mat)->comm);CHKERRQ(ierr);
2155       ierr = PetscFree2(ncolsonproc,disp);CHKERRQ(ierr);
2156     } else if (ctype == IS_COLORING_GHOSTED){
2157       /* Determine local number of columns of this color on this process, including ghost points */
2158       nctot = n;
2159       ierr = PetscMalloc((nctot+1)*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2160       ierr = PetscMemcpy(cols,is,n*sizeof(PetscInt));CHKERRQ(ierr);
2161     } else {
2162       SETERRQ(PETSC_ERR_SUP,"Not provided for this MatFDColoring type");
2163     }
2164 
2165     /*
2166        Mark all rows affect by these columns
2167     */
2168     /* Temporary option to allow for debugging/testing */
2169     flg  = PETSC_FALSE;
2170     ierr = PetscOptionsGetTruth(PETSC_NULL,"-matfdcoloring_slow",&flg,PETSC_NULL);CHKERRQ(ierr);
2171     if (!flg) {/*-----------------------------------------------------------------------------*/
2172       /* crude, fast version */
2173       ierr = PetscMemzero(rowhit,M*sizeof(PetscInt));CHKERRQ(ierr);
2174       /* loop over columns*/
2175       for (j=0; j<nctot; j++) {
2176         if (ctype == IS_COLORING_GHOSTED) {
2177           col = ltog[cols[j]];
2178         } else {
2179           col  = cols[j];
2180         }
2181         if (col >= cstart && col < cend) {
2182           /* column is in diagonal block of matrix */
2183           rows = A_cj + A_ci[col-cstart];
2184           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2185         } else {
2186 #if defined (PETSC_USE_CTABLE)
2187           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr)
2188 	  colb --;
2189 #else
2190           colb = baij->colmap[col] - 1;
2191 #endif
2192           if (colb == -1) {
2193             m = 0;
2194           } else {
2195             colb = colb/bs;
2196             rows = B_cj + B_ci[colb];
2197             m    = B_ci[colb+1] - B_ci[colb];
2198           }
2199         }
2200         /* loop over columns marking them in rowhit */
2201         for (k=0; k<m; k++) {
2202           rowhit[*rows++] = col + 1;
2203         }
2204       }
2205 
2206       /* count the number of hits */
2207       nrows = 0;
2208       for (j=0; j<M; j++) {
2209         if (rowhit[j]) nrows++;
2210       }
2211       c->nrows[i]         = nrows;
2212       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2213       ierr                = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2214       ierr = PetscLogObjectMemory(c,2*(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2215       nrows = 0;
2216       for (j=0; j<M; j++) {
2217         if (rowhit[j]) {
2218           c->rows[i][nrows]           = j;
2219           c->columnsforrow[i][nrows] = rowhit[j] - 1;
2220           nrows++;
2221         }
2222       }
2223     } else {/*-------------------------------------------------------------------------------*/
2224       /* slow version, using rowhit as a linked list */
2225       PetscInt currentcol,fm,mfm;
2226       rowhit[M] = M;
2227       nrows     = 0;
2228       /* loop over columns*/
2229       for (j=0; j<nctot; j++) {
2230         if (ctype == IS_COLORING_GHOSTED) {
2231           col = ltog[cols[j]];
2232         } else {
2233           col  = cols[j];
2234         }
2235         if (col >= cstart && col < cend) {
2236           /* column is in diagonal block of matrix */
2237           rows = A_cj + A_ci[col-cstart];
2238           m    = A_ci[col-cstart+1] - A_ci[col-cstart];
2239         } else {
2240 #if defined (PETSC_USE_CTABLE)
2241           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2242           colb --;
2243 #else
2244           colb = baij->colmap[col] - 1;
2245 #endif
2246           if (colb == -1) {
2247             m = 0;
2248           } else {
2249             colb = colb/bs;
2250             rows = B_cj + B_ci[colb];
2251             m    = B_ci[colb+1] - B_ci[colb];
2252           }
2253         }
2254 
2255         /* loop over columns marking them in rowhit */
2256         fm    = M; /* fm points to first entry in linked list */
2257         for (k=0; k<m; k++) {
2258           currentcol = *rows++;
2259 	  /* is it already in the list? */
2260           do {
2261             mfm  = fm;
2262             fm   = rowhit[fm];
2263           } while (fm < currentcol);
2264           /* not in list so add it */
2265           if (fm != currentcol) {
2266             nrows++;
2267             columnsforrow[currentcol] = col;
2268             /* next three lines insert new entry into linked list */
2269             rowhit[mfm]               = currentcol;
2270             rowhit[currentcol]        = fm;
2271             fm                        = currentcol;
2272             /* fm points to present position in list since we know the columns are sorted */
2273           } else {
2274             SETERRQ(PETSC_ERR_PLIB,"Invalid coloring of matrix detected");
2275           }
2276         }
2277       }
2278       c->nrows[i]         = nrows;
2279       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->rows[i]);CHKERRQ(ierr);
2280       ierr = PetscMalloc((nrows+1)*sizeof(PetscInt),&c->columnsforrow[i]);CHKERRQ(ierr);
2281       ierr = PetscLogObjectMemory(c,(nrows+1)*sizeof(PetscInt));CHKERRQ(ierr);
2282       /* now store the linked list of rows into c->rows[i] */
2283       nrows = 0;
2284       fm    = rowhit[M];
2285       do {
2286         c->rows[i][nrows]            = fm;
2287         c->columnsforrow[i][nrows++] = columnsforrow[fm];
2288         fm                           = rowhit[fm];
2289       } while (fm < M);
2290     } /* ---------------------------------------------------------------------------------------*/
2291     ierr = PetscFree(cols);CHKERRQ(ierr);
2292   }
2293 
2294   /* Optimize by adding the vscale, and scaleforrow[][] fields */
2295   /*
2296        vscale will contain the "diagonal" on processor scalings followed by the off processor
2297   */
2298   if (ctype == IS_COLORING_GLOBAL) {
2299     PetscInt *garray;
2300     ierr = PetscMalloc(baij->B->cmap->n*sizeof(PetscInt),&garray);CHKERRQ(ierr);
2301     for (i=0; i<baij->B->cmap->n/bs; i++) {
2302       for (j=0; j<bs; j++) {
2303         garray[i*bs+j] = bs*baij->garray[i]+j;
2304       }
2305     }
2306     ierr = VecCreateGhost(((PetscObject)mat)->comm,baij->A->rmap->n,PETSC_DETERMINE,baij->B->cmap->n,garray,&c->vscale);CHKERRQ(ierr);
2307     ierr = PetscFree(garray);CHKERRQ(ierr);
2308     CHKMEMQ;
2309     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2310     for (k=0; k<c->ncolors; k++) {
2311       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2312       for (l=0; l<c->nrows[k]; l++) {
2313         col = c->columnsforrow[k][l];
2314         if (col >= cstart && col < cend) {
2315           /* column is in diagonal block of matrix */
2316           colb = col - cstart;
2317         } else {
2318           /* column  is in "off-processor" part */
2319 #if defined (PETSC_USE_CTABLE)
2320           ierr = PetscTableFind(baij->colmap,col+1,&colb);CHKERRQ(ierr);
2321           colb --;
2322 #else
2323           colb = baij->colmap[col] - 1;
2324 #endif
2325           colb = colb/bs;
2326           colb += cend - cstart;
2327         }
2328         c->vscaleforrow[k][l] = colb;
2329       }
2330     }
2331   } else if (ctype == IS_COLORING_GHOSTED) {
2332     /* Get gtol mapping */
2333     PetscInt N = mat->cmap->N, *gtol;
2334     ierr = PetscMalloc((N+1)*sizeof(PetscInt),&gtol);CHKERRQ(ierr);
2335     for (i=0; i<N; i++) gtol[i] = -1;
2336     for (i=0; i<map->n; i++) gtol[ltog[i]] = i;
2337 
2338     c->vscale = 0; /* will be created in MatFDColoringApply() */
2339     ierr = PetscMalloc(c->ncolors*sizeof(PetscInt*),&c->vscaleforrow);CHKERRQ(ierr);
2340     for (k=0; k<c->ncolors; k++) {
2341       ierr = PetscMalloc((c->nrows[k]+1)*sizeof(PetscInt),&c->vscaleforrow[k]);CHKERRQ(ierr);
2342       for (l=0; l<c->nrows[k]; l++) {
2343         col = c->columnsforrow[k][l];      /* global column index */
2344         c->vscaleforrow[k][l] = gtol[col]; /* local column index */
2345       }
2346     }
2347     ierr = PetscFree(gtol);CHKERRQ(ierr);
2348   }
2349   ierr = ISColoringRestoreIS(iscoloring,&isa);CHKERRQ(ierr);
2350 
2351   ierr = PetscFree(rowhit);CHKERRQ(ierr);
2352   ierr = PetscFree(columnsforrow);CHKERRQ(ierr);
2353   ierr = MatRestoreColumnIJ(baij->A,0,PETSC_FALSE,PETSC_FALSE,&ncols,&A_ci,&A_cj,&done);CHKERRQ(ierr);
2354   ierr = MatRestoreColumnIJ(baij->B,0,PETSC_FALSE,PETSC_FALSE,&ncols,&B_ci,&B_cj,&done);CHKERRQ(ierr);
2355     CHKMEMQ;
2356   PetscFunctionReturn(0);
2357 }
2358 
2359 #undef __FUNCT__
2360 #define __FUNCT__ "MatGetSeqNonzerostructure_MPIBAIJ"
2361 PetscErrorCode MatGetSeqNonzerostructure_MPIBAIJ(Mat A,Mat *newmat)
2362 {
2363   Mat            B;
2364   Mat_MPIBAIJ    *a = (Mat_MPIBAIJ *)A->data;
2365   Mat_SeqBAIJ    *ad = (Mat_SeqBAIJ*)a->A->data,*bd = (Mat_SeqBAIJ*)a->B->data;
2366   Mat_SeqAIJ     *b;
2367   PetscErrorCode ierr;
2368   PetscMPIInt    size,rank,*recvcounts = 0,*displs = 0;
2369   PetscInt       sendcount,i,*rstarts = A->rmap->range,n,cnt,j,bs = A->rmap->bs;
2370   PetscInt       m,*garray = a->garray,*lens,*jsendbuf,*a_jsendbuf,*b_jsendbuf;
2371 
2372   PetscFunctionBegin;
2373   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2374   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2375 
2376   /* ----------------------------------------------------------------
2377      Tell every processor the number of nonzeros per row
2378   */
2379   ierr = PetscMalloc((A->rmap->N/bs)*sizeof(PetscInt),&lens);CHKERRQ(ierr);
2380   for (i=A->rmap->rstart/bs; i<A->rmap->rend/bs; i++) {
2381     lens[i] = ad->i[i-A->rmap->rstart/bs+1] - ad->i[i-A->rmap->rstart/bs] + bd->i[i-A->rmap->rstart/bs+1] - bd->i[i-A->rmap->rstart/bs];
2382   }
2383   sendcount = A->rmap->rend/bs - A->rmap->rstart/bs;
2384   ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&recvcounts);CHKERRQ(ierr);
2385   displs     = recvcounts + size;
2386   for (i=0; i<size; i++) {
2387     recvcounts[i] = A->rmap->range[i+1]/bs - A->rmap->range[i]/bs;
2388     displs[i]     = A->rmap->range[i]/bs;
2389   }
2390 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2391   ierr  = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2392 #else
2393   ierr  = MPI_Allgatherv(lens+A->rmap->rstart/bs,sendcount,MPIU_INT,lens,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2394 #endif
2395   /* ---------------------------------------------------------------
2396      Create the sequential matrix of the same type as the local block diagonal
2397   */
2398   ierr  = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
2399   ierr  = MatSetSizes(B,A->rmap->N/bs,A->cmap->N/bs,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2400   ierr  = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
2401   ierr  = MatSeqAIJSetPreallocation(B,0,lens);CHKERRQ(ierr);
2402   b = (Mat_SeqAIJ *)B->data;
2403 
2404   /*--------------------------------------------------------------------
2405     Copy my part of matrix column indices over
2406   */
2407   sendcount  = ad->nz + bd->nz;
2408   jsendbuf   = b->j + b->i[rstarts[rank]/bs];
2409   a_jsendbuf = ad->j;
2410   b_jsendbuf = bd->j;
2411   n          = A->rmap->rend/bs - A->rmap->rstart/bs;
2412   cnt        = 0;
2413   for (i=0; i<n; i++) {
2414 
2415     /* put in lower diagonal portion */
2416     m = bd->i[i+1] - bd->i[i];
2417     while (m > 0) {
2418       /* is it above diagonal (in bd (compressed) numbering) */
2419       if (garray[*b_jsendbuf] > A->rmap->rstart/bs + i) break;
2420       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2421       m--;
2422     }
2423 
2424     /* put in diagonal portion */
2425     for (j=ad->i[i]; j<ad->i[i+1]; j++) {
2426       jsendbuf[cnt++] = A->rmap->rstart/bs + *a_jsendbuf++;
2427     }
2428 
2429     /* put in upper diagonal portion */
2430     while (m-- > 0) {
2431       jsendbuf[cnt++] = garray[*b_jsendbuf++];
2432     }
2433   }
2434   if (cnt != sendcount) SETERRQ2(PETSC_ERR_PLIB,"Corrupted PETSc matrix: nz given %D actual nz %D",sendcount,cnt);
2435 
2436   /*--------------------------------------------------------------------
2437     Gather all column indices to all processors
2438   */
2439   for (i=0; i<size; i++) {
2440     recvcounts[i] = 0;
2441     for (j=A->rmap->range[i]/bs; j<A->rmap->range[i+1]/bs; j++) {
2442       recvcounts[i] += lens[j];
2443     }
2444   }
2445   displs[0]  = 0;
2446   for (i=1; i<size; i++) {
2447     displs[i] = displs[i-1] + recvcounts[i-1];
2448   }
2449 #if defined(PETSC_HAVE_MPI_IN_PLACE)
2450   ierr = MPI_Allgatherv(MPI_IN_PLACE,0,MPI_DATATYPE_NULL,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2451 #else
2452   ierr = MPI_Allgatherv(jsendbuf,sendcount,MPIU_INT,b->j,recvcounts,displs,MPIU_INT,((PetscObject)A)->comm);CHKERRQ(ierr);
2453 #endif
2454   /*--------------------------------------------------------------------
2455     Assemble the matrix into useable form (note numerical values not yet set)
2456   */
2457   /* set the b->ilen (length of each row) values */
2458   ierr = PetscMemcpy(b->ilen,lens,(A->rmap->N/bs)*sizeof(PetscInt));CHKERRQ(ierr);
2459   /* set the b->i indices */
2460   b->i[0] = 0;
2461   for (i=1; i<=A->rmap->N/bs; i++) {
2462     b->i[i] = b->i[i-1] + lens[i-1];
2463   }
2464   ierr = PetscFree(lens);CHKERRQ(ierr);
2465   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2466   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2467   ierr = PetscFree(recvcounts);CHKERRQ(ierr);
2468 
2469   if (A->symmetric){
2470     ierr = MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2471   } else if (A->hermitian) {
2472     ierr = MatSetOption(B,MAT_HERMITIAN,PETSC_TRUE);CHKERRQ(ierr);
2473   } else if (A->structurally_symmetric) {
2474     ierr = MatSetOption(B,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr);
2475   }
2476   *newmat = B;
2477   PetscFunctionReturn(0);
2478 }
2479 
2480 #undef __FUNCT__
2481 #define __FUNCT__ "MatSOR_MPIBAIJ"
2482 PetscErrorCode MatSOR_MPIBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2483 {
2484   Mat_MPIBAIJ    *mat = (Mat_MPIBAIJ*)matin->data;
2485   PetscErrorCode ierr;
2486   Vec            bb1 = 0;
2487 
2488   PetscFunctionBegin;
2489   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) {
2490     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2491   }
2492 
2493   if (flag == SOR_APPLY_UPPER) {
2494     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2495     PetscFunctionReturn(0);
2496   }
2497 
2498   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2499     if (flag & SOR_ZERO_INITIAL_GUESS) {
2500       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2501       its--;
2502     }
2503 
2504     while (its--) {
2505       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2506       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2507 
2508       /* update rhs: bb1 = bb - B*x */
2509       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2510       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2511 
2512       /* local sweep */
2513       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2514     }
2515   } else if (flag & SOR_LOCAL_FORWARD_SWEEP){
2516     if (flag & SOR_ZERO_INITIAL_GUESS) {
2517       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2518       its--;
2519     }
2520     while (its--) {
2521       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2522       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2523 
2524       /* update rhs: bb1 = bb - B*x */
2525       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2526       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2527 
2528       /* local sweep */
2529       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2530     }
2531   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP){
2532     if (flag & SOR_ZERO_INITIAL_GUESS) {
2533       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2534       its--;
2535     }
2536     while (its--) {
2537       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2538       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2539 
2540       /* update rhs: bb1 = bb - B*x */
2541       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2542       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
2543 
2544       /* local sweep */
2545       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
2546     }
2547   } else {
2548     SETERRQ(PETSC_ERR_SUP,"Parallel version of SOR requested not supported");
2549   }
2550 
2551   if (bb1) {ierr = VecDestroy(bb1);CHKERRQ(ierr);}
2552   PetscFunctionReturn(0);
2553 }
2554 
2555 extern PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat,MatFDColoring,Vec,MatStructure*,void*);
2556 
2557 
2558 /* -------------------------------------------------------------------*/
2559 static struct _MatOps MatOps_Values = {
2560        MatSetValues_MPIBAIJ,
2561        MatGetRow_MPIBAIJ,
2562        MatRestoreRow_MPIBAIJ,
2563        MatMult_MPIBAIJ,
2564 /* 4*/ MatMultAdd_MPIBAIJ,
2565        MatMultTranspose_MPIBAIJ,
2566        MatMultTransposeAdd_MPIBAIJ,
2567        0,
2568        0,
2569        0,
2570 /*10*/ 0,
2571        0,
2572        0,
2573        MatSOR_MPIBAIJ,
2574        MatTranspose_MPIBAIJ,
2575 /*15*/ MatGetInfo_MPIBAIJ,
2576        MatEqual_MPIBAIJ,
2577        MatGetDiagonal_MPIBAIJ,
2578        MatDiagonalScale_MPIBAIJ,
2579        MatNorm_MPIBAIJ,
2580 /*20*/ MatAssemblyBegin_MPIBAIJ,
2581        MatAssemblyEnd_MPIBAIJ,
2582        MatSetOption_MPIBAIJ,
2583        MatZeroEntries_MPIBAIJ,
2584 /*24*/ MatZeroRows_MPIBAIJ,
2585        0,
2586        0,
2587        0,
2588        0,
2589 /*29*/ MatSetUpPreallocation_MPIBAIJ,
2590        0,
2591        0,
2592        0,
2593        0,
2594 /*34*/ MatDuplicate_MPIBAIJ,
2595        0,
2596        0,
2597        0,
2598        0,
2599 /*39*/ MatAXPY_MPIBAIJ,
2600        MatGetSubMatrices_MPIBAIJ,
2601        MatIncreaseOverlap_MPIBAIJ,
2602        MatGetValues_MPIBAIJ,
2603        MatCopy_MPIBAIJ,
2604 /*44*/ 0,
2605        MatScale_MPIBAIJ,
2606        0,
2607        0,
2608        0,
2609 /*49*/ MatSetBlockSize_MPIBAIJ,
2610        0,
2611        0,
2612        0,
2613        0,
2614 /*54*/ MatFDColoringCreate_MPIBAIJ,
2615        0,
2616        MatSetUnfactored_MPIBAIJ,
2617        MatPermute_MPIBAIJ,
2618        MatSetValuesBlocked_MPIBAIJ,
2619 /*59*/ MatGetSubMatrix_MPIBAIJ,
2620        MatDestroy_MPIBAIJ,
2621        MatView_MPIBAIJ,
2622        0,
2623        0,
2624 /*64*/ 0,
2625        0,
2626        0,
2627        0,
2628        0,
2629 /*69*/ MatGetRowMaxAbs_MPIBAIJ,
2630        0,
2631        0,
2632        0,
2633        0,
2634 /*74*/ 0,
2635        MatFDColoringApply_BAIJ,
2636        0,
2637        0,
2638        0,
2639 /*79*/ 0,
2640        0,
2641        0,
2642        0,
2643        MatLoad_MPIBAIJ,
2644 /*84*/ 0,
2645        0,
2646        0,
2647        0,
2648        0,
2649 /*89*/ 0,
2650        0,
2651        0,
2652        0,
2653        0,
2654 /*94*/ 0,
2655        0,
2656        0,
2657        0,
2658        0,
2659 /*99*/ 0,
2660        0,
2661        0,
2662        0,
2663        0,
2664 /*104*/0,
2665        MatRealPart_MPIBAIJ,
2666        MatImaginaryPart_MPIBAIJ,
2667        0,
2668        0,
2669 /*109*/0,
2670        0,
2671        0,
2672        0,
2673        0,
2674 /*114*/MatGetSeqNonzerostructure_MPIBAIJ,
2675        0,
2676        MatGetGhosts_MPIBAIJ
2677 };
2678 
2679 EXTERN_C_BEGIN
2680 #undef __FUNCT__
2681 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
2682 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
2683 {
2684   PetscFunctionBegin;
2685   *a      = ((Mat_MPIBAIJ *)A->data)->A;
2686   *iscopy = PETSC_FALSE;
2687   PetscFunctionReturn(0);
2688 }
2689 EXTERN_C_END
2690 
2691 EXTERN_C_BEGIN
2692 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType,MatReuse,Mat*);
2693 EXTERN_C_END
2694 
2695 EXTERN_C_BEGIN
2696 #undef __FUNCT__
2697 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR_MPIBAIJ"
2698 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2699 {
2700   PetscInt       m,rstart,cstart,cend;
2701   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
2702   const PetscInt *JJ=0;
2703   PetscScalar    *values=0;
2704   PetscErrorCode ierr;
2705 
2706   PetscFunctionBegin;
2707 
2708   if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2709   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2710   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2711   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2712   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2713   m      = B->rmap->n/bs;
2714   rstart = B->rmap->rstart/bs;
2715   cstart = B->cmap->rstart/bs;
2716   cend   = B->cmap->rend/bs;
2717 
2718   if (ii[0]) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
2719   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
2720   for (i=0; i<m; i++) {
2721     nz = ii[i+1] - ii[i];
2722     if (nz < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
2723     nz_max = PetscMax(nz_max,nz);
2724     JJ  = jj + ii[i];
2725     for (j=0; j<nz; j++) {
2726       if (*JJ >= cstart) break;
2727       JJ++;
2728     }
2729     d = 0;
2730     for (; j<nz; j++) {
2731       if (*JJ++ >= cend) break;
2732       d++;
2733     }
2734     d_nnz[i] = d;
2735     o_nnz[i] = nz - d;
2736   }
2737   ierr = MatMPIBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
2738   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
2739 
2740   values = (PetscScalar*)V;
2741   if (!values) {
2742     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2743     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2744   }
2745   for (i=0; i<m; i++) {
2746     PetscInt          row    = i + rstart;
2747     PetscInt          ncols  = ii[i+1] - ii[i];
2748     const PetscInt    *icols = jj + ii[i];
2749     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2750     ierr = MatSetValuesBlocked_MPIBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2751   }
2752 
2753   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2754   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2755   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2756 
2757   PetscFunctionReturn(0);
2758 }
2759 EXTERN_C_END
2760 
2761 #undef __FUNCT__
2762 #define __FUNCT__ "MatMPIBAIJSetPreallocationCSR"
2763 /*@C
2764    MatMPIBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
2765    (the default parallel PETSc format).
2766 
2767    Collective on MPI_Comm
2768 
2769    Input Parameters:
2770 +  A - the matrix
2771 .  i - the indices into j for the start of each local row (starts with zero)
2772 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2773 -  v - optional values in the matrix
2774 
2775    Level: developer
2776 
2777 .keywords: matrix, aij, compressed row, sparse, parallel
2778 
2779 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2780 @*/
2781 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2782 {
2783   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
2784 
2785   PetscFunctionBegin;
2786   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2787   if (f) {
2788     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
2789   }
2790   PetscFunctionReturn(0);
2791 }
2792 
2793 EXTERN_C_BEGIN
2794 #undef __FUNCT__
2795 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2796 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
2797 {
2798   Mat_MPIBAIJ    *b;
2799   PetscErrorCode ierr;
2800   PetscInt       i, newbs = PetscAbs(bs);
2801 
2802   PetscFunctionBegin;
2803   if (bs < 0) {
2804     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr);
2805       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2806     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2807     bs   = PetscAbs(bs);
2808   }
2809   if ((d_nnz || o_nnz) && newbs != bs) {
2810     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
2811   }
2812   bs = newbs;
2813 
2814 
2815   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2816   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2817   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2818   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
2819   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
2820 
2821   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2822   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2823   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2824   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2825 
2826   if (d_nnz) {
2827     for (i=0; i<B->rmap->n/bs; i++) {
2828       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
2829     }
2830   }
2831   if (o_nnz) {
2832     for (i=0; i<B->rmap->n/bs; i++) {
2833       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
2834     }
2835   }
2836 
2837   b = (Mat_MPIBAIJ*)B->data;
2838   b->bs2 = bs*bs;
2839   b->mbs = B->rmap->n/bs;
2840   b->nbs = B->cmap->n/bs;
2841   b->Mbs = B->rmap->N/bs;
2842   b->Nbs = B->cmap->N/bs;
2843 
2844   for (i=0; i<=b->size; i++) {
2845     b->rangebs[i] = B->rmap->range[i]/bs;
2846   }
2847   b->rstartbs = B->rmap->rstart/bs;
2848   b->rendbs   = B->rmap->rend/bs;
2849   b->cstartbs = B->cmap->rstart/bs;
2850   b->cendbs   = B->cmap->rend/bs;
2851 
2852   if (!B->preallocated) {
2853     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2854     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2855     ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2856     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
2857     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2858     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
2859     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2860     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
2861     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
2862   }
2863 
2864   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2865   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2866   B->preallocated = PETSC_TRUE;
2867   PetscFunctionReturn(0);
2868 }
2869 EXTERN_C_END
2870 
2871 EXTERN_C_BEGIN
2872 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2873 EXTERN PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2874 EXTERN_C_END
2875 
2876 
2877 EXTERN_C_BEGIN
2878 #undef __FUNCT__
2879 #define __FUNCT__ "MatConvert_MPIBAIJ_MPIAdj"
2880 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIBAIJ_MPIAdj(Mat B, const MatType newtype,MatReuse reuse,Mat *adj)
2881 {
2882   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)B->data;
2883   PetscErrorCode ierr;
2884   Mat_SeqBAIJ    *d = (Mat_SeqBAIJ*) b->A->data,*o = (Mat_SeqBAIJ*) b->B->data;
2885   PetscInt       M = B->rmap->n/B->rmap->bs,i,*ii,*jj,cnt,j,k,rstart = B->rmap->rstart/B->rmap->bs;
2886   const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray;
2887 
2888   PetscFunctionBegin;
2889   ierr = PetscMalloc((M+1)*sizeof(PetscInt),&ii);CHKERRQ(ierr);
2890   ii[0] = 0;
2891   CHKMEMQ;
2892   for (i=0; i<M; i++) {
2893     if ((id[i+1] - id[i]) < 0) SETERRQ3(PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,id[i],id[i+1]);
2894     if ((io[i+1] - io[i]) < 0) SETERRQ3(PETSC_ERR_PLIB,"Indices wrong %D %D %D",i,io[i],io[i+1]);
2895     ii[i+1] = ii[i] + id[i+1] - id[i] + io[i+1] - io[i];
2896     /* remove one from count of matrix has diagonal */
2897     for (j=id[i]; j<id[i+1]; j++) {
2898       if (jd[j] == i) {ii[i+1]--;break;}
2899     }
2900   CHKMEMQ;
2901   }
2902   ierr = PetscMalloc(ii[M]*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2903   cnt = 0;
2904   for (i=0; i<M; i++) {
2905     for (j=io[i]; j<io[i+1]; j++) {
2906       if (garray[jo[j]] > rstart) break;
2907       jj[cnt++] = garray[jo[j]];
2908   CHKMEMQ;
2909     }
2910     for (k=id[i]; k<id[i+1]; k++) {
2911       if (jd[k] != i) {
2912         jj[cnt++] = rstart + jd[k];
2913   CHKMEMQ;
2914       }
2915     }
2916     for (;j<io[i+1]; j++) {
2917       jj[cnt++] = garray[jo[j]];
2918   CHKMEMQ;
2919     }
2920   }
2921   ierr = MatCreateMPIAdj(((PetscObject)B)->comm,M,B->cmap->N/B->rmap->bs,ii,jj,PETSC_NULL,adj);CHKERRQ(ierr);
2922   PetscFunctionReturn(0);
2923 }
2924 EXTERN_C_END
2925 
2926 /*MC
2927    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2928 
2929    Options Database Keys:
2930 + -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2931 . -mat_block_size <bs> - set the blocksize used to store the matrix
2932 - -mat_use_hash_table <fact>
2933 
2934   Level: beginner
2935 
2936 .seealso: MatCreateMPIBAIJ
2937 M*/
2938 
2939 EXTERN_C_BEGIN
2940 #undef __FUNCT__
2941 #define __FUNCT__ "MatCreate_MPIBAIJ"
2942 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPIBAIJ(Mat B)
2943 {
2944   Mat_MPIBAIJ    *b;
2945   PetscErrorCode ierr;
2946   PetscTruth     flg;
2947 
2948   PetscFunctionBegin;
2949   ierr = PetscNewLog(B,Mat_MPIBAIJ,&b);CHKERRQ(ierr);
2950   B->data = (void*)b;
2951 
2952 
2953   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2954   B->mapping    = 0;
2955   B->assembled  = PETSC_FALSE;
2956 
2957   B->insertmode = NOT_SET_VALUES;
2958   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
2959   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
2960 
2961   /* build local table of row and column ownerships */
2962   ierr = PetscMalloc((b->size+1)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
2963 
2964   /* build cache for off array entries formed */
2965   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
2966   b->donotstash  = PETSC_FALSE;
2967   b->colmap      = PETSC_NULL;
2968   b->garray      = PETSC_NULL;
2969   b->roworiented = PETSC_TRUE;
2970 
2971   /* stuff used in block assembly */
2972   b->barray       = 0;
2973 
2974   /* stuff used for matrix vector multiply */
2975   b->lvec         = 0;
2976   b->Mvctx        = 0;
2977 
2978   /* stuff for MatGetRow() */
2979   b->rowindices   = 0;
2980   b->rowvalues    = 0;
2981   b->getrowactive = PETSC_FALSE;
2982 
2983   /* hash table stuff */
2984   b->ht           = 0;
2985   b->hd           = 0;
2986   b->ht_size      = 0;
2987   b->ht_flag      = PETSC_FALSE;
2988   b->ht_fact      = 0;
2989   b->ht_total_ct  = 0;
2990   b->ht_insert_ct = 0;
2991 
2992   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 1","Mat");CHKERRQ(ierr);
2993     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2994     if (flg) {
2995       PetscReal fact = 1.39;
2996       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
2997       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
2998       if (fact <= 1.0) fact = 1.39;
2999       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
3000       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
3001     }
3002   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3003 
3004   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpibaij_mpiadj_C",
3005                                      "MatConvert_MPIBAIJ_MPIAdj",
3006                                       MatConvert_MPIBAIJ_MPIAdj);CHKERRQ(ierr);
3007   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3008                                      "MatStoreValues_MPIBAIJ",
3009                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
3010   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3011                                      "MatRetrieveValues_MPIBAIJ",
3012                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
3013   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
3014                                      "MatGetDiagonalBlock_MPIBAIJ",
3015                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
3016   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
3017                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
3018                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
3019   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocationCSR_C",
3020 				     "MatMPIBAIJSetPreallocationCSR_MPIBAIJ",
3021 				     MatMPIBAIJSetPreallocationCSR_MPIBAIJ);CHKERRQ(ierr);
3022   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
3023                                      "MatDiagonalScaleLocal_MPIBAIJ",
3024                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
3025   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
3026                                      "MatSetHashTableFactor_MPIBAIJ",
3027                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
3028   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIBAIJ);CHKERRQ(ierr);
3029   PetscFunctionReturn(0);
3030 }
3031 EXTERN_C_END
3032 
3033 /*MC
3034    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
3035 
3036    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
3037    and MATMPIBAIJ otherwise.
3038 
3039    Options Database Keys:
3040 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
3041 
3042   Level: beginner
3043 
3044 .seealso: MatCreateMPIBAIJ(),MATSEQBAIJ,MATMPIBAIJ, MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3045 M*/
3046 
3047 EXTERN_C_BEGIN
3048 #undef __FUNCT__
3049 #define __FUNCT__ "MatCreate_BAIJ"
3050 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_BAIJ(Mat A)
3051 {
3052   PetscErrorCode ierr;
3053   PetscMPIInt    size;
3054 
3055   PetscFunctionBegin;
3056   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
3057   if (size == 1) {
3058     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
3059   } else {
3060     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
3061   }
3062   PetscFunctionReturn(0);
3063 }
3064 EXTERN_C_END
3065 
3066 #undef __FUNCT__
3067 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
3068 /*@C
3069    MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in block AIJ format
3070    (block compressed row).  For good matrix assembly performance
3071    the user should preallocate the matrix storage by setting the parameters
3072    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3073    performance can be increased by more than a factor of 50.
3074 
3075    Collective on Mat
3076 
3077    Input Parameters:
3078 +  A - the matrix
3079 .  bs   - size of blockk
3080 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
3081            submatrix  (same for all local rows)
3082 .  d_nnz - array containing the number of block nonzeros in the various block rows
3083            of the in diagonal portion of the local (possibly different for each block
3084            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3085 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
3086            submatrix (same for all local rows).
3087 -  o_nnz - array containing the number of nonzeros in the various block rows of the
3088            off-diagonal portion of the local submatrix (possibly different for
3089            each block row) or PETSC_NULL.
3090 
3091    If the *_nnz parameter is given then the *_nz parameter is ignored
3092 
3093    Options Database Keys:
3094 +   -mat_block_size - size of the blocks to use
3095 -   -mat_use_hash_table <fact>
3096 
3097    Notes:
3098    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3099    than it must be used on all processors that share the object for that argument.
3100 
3101    Storage Information:
3102    For a square global matrix we define each processor's diagonal portion
3103    to be its local rows and the corresponding columns (a square submatrix);
3104    each processor's off-diagonal portion encompasses the remainder of the
3105    local matrix (a rectangular submatrix).
3106 
3107    The user can specify preallocated storage for the diagonal part of
3108    the local submatrix with either d_nz or d_nnz (not both).  Set
3109    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3110    memory allocation.  Likewise, specify preallocated storage for the
3111    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3112 
3113    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3114    the figure below we depict these three local rows and all columns (0-11).
3115 
3116 .vb
3117            0 1 2 3 4 5 6 7 8 9 10 11
3118           -------------------
3119    row 3  |  o o o d d d o o o o o o
3120    row 4  |  o o o d d d o o o o o o
3121    row 5  |  o o o d d d o o o o o o
3122           -------------------
3123 .ve
3124 
3125    Thus, any entries in the d locations are stored in the d (diagonal)
3126    submatrix, and any entries in the o locations are stored in the
3127    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3128    stored simply in the MATSEQBAIJ format for compressed row storage.
3129 
3130    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3131    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3132    In general, for PDE problems in which most nonzeros are near the diagonal,
3133    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3134    or you will get TERRIBLE performance; see the users' manual chapter on
3135    matrices.
3136 
3137    You can call MatGetInfo() to get information on how effective the preallocation was;
3138    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3139    You can also run with the option -info and look for messages with the string
3140    malloc in them to see if additional memory allocation was needed.
3141 
3142    Level: intermediate
3143 
3144 .keywords: matrix, block, aij, compressed row, sparse, parallel
3145 
3146 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocationCSR()
3147 @*/
3148 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
3149 {
3150   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
3151 
3152   PetscFunctionBegin;
3153   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3154   if (f) {
3155     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3156   }
3157   PetscFunctionReturn(0);
3158 }
3159 
3160 #undef __FUNCT__
3161 #define __FUNCT__ "MatCreateMPIBAIJ"
3162 /*@C
3163    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
3164    (block compressed row).  For good matrix assembly performance
3165    the user should preallocate the matrix storage by setting the parameters
3166    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3167    performance can be increased by more than a factor of 50.
3168 
3169    Collective on MPI_Comm
3170 
3171    Input Parameters:
3172 +  comm - MPI communicator
3173 .  bs   - size of blockk
3174 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
3175            This value should be the same as the local size used in creating the
3176            y vector for the matrix-vector product y = Ax.
3177 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
3178            This value should be the same as the local size used in creating the
3179            x vector for the matrix-vector product y = Ax.
3180 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
3181 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
3182 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
3183            submatrix  (same for all local rows)
3184 .  d_nnz - array containing the number of nonzero blocks in the various block rows
3185            of the in diagonal portion of the local (possibly different for each block
3186            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
3187 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
3188            submatrix (same for all local rows).
3189 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
3190            off-diagonal portion of the local submatrix (possibly different for
3191            each block row) or PETSC_NULL.
3192 
3193    Output Parameter:
3194 .  A - the matrix
3195 
3196    Options Database Keys:
3197 +   -mat_block_size - size of the blocks to use
3198 -   -mat_use_hash_table <fact>
3199 
3200    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3201    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3202    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3203 
3204    Notes:
3205    If the *_nnz parameter is given then the *_nz parameter is ignored
3206 
3207    A nonzero block is any block that as 1 or more nonzeros in it
3208 
3209    The user MUST specify either the local or global matrix dimensions
3210    (possibly both).
3211 
3212    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
3213    than it must be used on all processors that share the object for that argument.
3214 
3215    Storage Information:
3216    For a square global matrix we define each processor's diagonal portion
3217    to be its local rows and the corresponding columns (a square submatrix);
3218    each processor's off-diagonal portion encompasses the remainder of the
3219    local matrix (a rectangular submatrix).
3220 
3221    The user can specify preallocated storage for the diagonal part of
3222    the local submatrix with either d_nz or d_nnz (not both).  Set
3223    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
3224    memory allocation.  Likewise, specify preallocated storage for the
3225    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
3226 
3227    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
3228    the figure below we depict these three local rows and all columns (0-11).
3229 
3230 .vb
3231            0 1 2 3 4 5 6 7 8 9 10 11
3232           -------------------
3233    row 3  |  o o o d d d o o o o o o
3234    row 4  |  o o o d d d o o o o o o
3235    row 5  |  o o o d d d o o o o o o
3236           -------------------
3237 .ve
3238 
3239    Thus, any entries in the d locations are stored in the d (diagonal)
3240    submatrix, and any entries in the o locations are stored in the
3241    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
3242    stored simply in the MATSEQBAIJ format for compressed row storage.
3243 
3244    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
3245    and o_nz should indicate the number of block nonzeros per row in the o matrix.
3246    In general, for PDE problems in which most nonzeros are near the diagonal,
3247    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
3248    or you will get TERRIBLE performance; see the users' manual chapter on
3249    matrices.
3250 
3251    Level: intermediate
3252 
3253 .keywords: matrix, block, aij, compressed row, sparse, parallel
3254 
3255 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatMPIBAIJSetPreallocation(), MatMPIBAIJSetPreallocationCSR()
3256 @*/
3257 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPIBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
3258 {
3259   PetscErrorCode ierr;
3260   PetscMPIInt    size;
3261 
3262   PetscFunctionBegin;
3263   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3264   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
3265   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3266   if (size > 1) {
3267     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
3268     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
3269   } else {
3270     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3271     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
3272   }
3273   PetscFunctionReturn(0);
3274 }
3275 
3276 #undef __FUNCT__
3277 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
3278 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
3279 {
3280   Mat            mat;
3281   Mat_MPIBAIJ    *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
3282   PetscErrorCode ierr;
3283   PetscInt       len=0;
3284 
3285   PetscFunctionBegin;
3286   *newmat       = 0;
3287   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
3288   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
3289   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
3290   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3291 
3292   mat->factor       = matin->factor;
3293   mat->preallocated = PETSC_TRUE;
3294   mat->assembled    = PETSC_TRUE;
3295   mat->insertmode   = NOT_SET_VALUES;
3296 
3297   a      = (Mat_MPIBAIJ*)mat->data;
3298   mat->rmap->bs  = matin->rmap->bs;
3299   a->bs2   = oldmat->bs2;
3300   a->mbs   = oldmat->mbs;
3301   a->nbs   = oldmat->nbs;
3302   a->Mbs   = oldmat->Mbs;
3303   a->Nbs   = oldmat->Nbs;
3304 
3305   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
3306   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
3307 
3308   a->size         = oldmat->size;
3309   a->rank         = oldmat->rank;
3310   a->donotstash   = oldmat->donotstash;
3311   a->roworiented  = oldmat->roworiented;
3312   a->rowindices   = 0;
3313   a->rowvalues    = 0;
3314   a->getrowactive = PETSC_FALSE;
3315   a->barray       = 0;
3316   a->rstartbs     = oldmat->rstartbs;
3317   a->rendbs       = oldmat->rendbs;
3318   a->cstartbs     = oldmat->cstartbs;
3319   a->cendbs       = oldmat->cendbs;
3320 
3321   /* hash table stuff */
3322   a->ht           = 0;
3323   a->hd           = 0;
3324   a->ht_size      = 0;
3325   a->ht_flag      = oldmat->ht_flag;
3326   a->ht_fact      = oldmat->ht_fact;
3327   a->ht_total_ct  = 0;
3328   a->ht_insert_ct = 0;
3329 
3330   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+1)*sizeof(PetscInt));CHKERRQ(ierr);
3331   if (oldmat->colmap) {
3332 #if defined (PETSC_USE_CTABLE)
3333   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
3334 #else
3335   ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
3336   ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3337   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
3338 #endif
3339   } else a->colmap = 0;
3340 
3341   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
3342     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
3343     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
3344     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
3345   } else a->garray = 0;
3346 
3347   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
3348   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
3349   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
3350   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
3351   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
3352 
3353   ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
3354   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
3355   ierr = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
3356   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
3357   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
3358   *newmat = mat;
3359 
3360   PetscFunctionReturn(0);
3361 }
3362 
3363 #undef __FUNCT__
3364 #define __FUNCT__ "MatLoad_MPIBAIJ"
3365 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
3366 {
3367   Mat            A;
3368   PetscErrorCode ierr;
3369   int            fd;
3370   PetscInt       i,nz,j,rstart,rend;
3371   PetscScalar    *vals,*buf;
3372   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3373   MPI_Status     status;
3374   PetscMPIInt    rank,size,maxnz;
3375   PetscInt       header[4],*rowlengths = 0,M,N,m,*rowners,*cols;
3376   PetscInt       *locrowlens = PETSC_NULL,*procsnz = PETSC_NULL,*browners = PETSC_NULL;
3377   PetscInt       jj,*mycols,*ibuf,bs=1,Mbs,mbs,extra_rows,mmax;
3378   PetscMPIInt    tag = ((PetscObject)viewer)->tag;
3379   PetscInt       *dlens = PETSC_NULL,*odlens = PETSC_NULL,*mask = PETSC_NULL,*masked1 = PETSC_NULL,*masked2 = PETSC_NULL,rowcount,odcount;
3380   PetscInt       dcount,kmax,k,nzcount,tmp,mend;
3381 
3382   PetscFunctionBegin;
3383   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix 2","Mat");CHKERRQ(ierr);
3384     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3385   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3386 
3387   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3388   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
3389   if (!rank) {
3390     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3391     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
3392     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
3393   }
3394 
3395   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
3396   M = header[1]; N = header[2];
3397 
3398   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
3399 
3400   /*
3401      This code adds extra rows to make sure the number of rows is
3402      divisible by the blocksize
3403   */
3404   Mbs        = M/bs;
3405   extra_rows = bs - M + bs*Mbs;
3406   if (extra_rows == bs) extra_rows = 0;
3407   else                  Mbs++;
3408   if (extra_rows && !rank) {
3409     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3410   }
3411 
3412   /* determine ownership of all rows */
3413   mbs        = Mbs/size + ((Mbs % size) > rank);
3414   m          = mbs*bs;
3415   ierr       = PetscMalloc2(size+1,PetscInt,&rowners,size+1,PetscInt,&browners);CHKERRQ(ierr);
3416   ierr       = MPI_Allgather(&mbs,1,MPIU_INT,rowners+1,1,MPIU_INT,comm);CHKERRQ(ierr);
3417 
3418   /* process 0 needs enough room for process with most rows */
3419   if (!rank) {
3420     mmax = rowners[1];
3421     for (i=2; i<size; i++) {
3422       mmax = PetscMax(mmax,rowners[i]);
3423     }
3424     mmax*=bs;
3425   } else mmax = m;
3426 
3427   rowners[0] = 0;
3428   for (i=2; i<=size; i++)  rowners[i] += rowners[i-1];
3429   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
3430   rstart = rowners[rank];
3431   rend   = rowners[rank+1];
3432 
3433   /* distribute row lengths to all processors */
3434   ierr = PetscMalloc((mmax+1)*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
3435   if (!rank) {
3436     mend = m;
3437     if (size == 1) mend = mend - extra_rows;
3438     ierr = PetscBinaryRead(fd,locrowlens,mend,PETSC_INT);CHKERRQ(ierr);
3439     for (j=mend; j<m; j++) locrowlens[j] = 1;
3440     ierr = PetscMalloc(m*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3441     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
3442     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
3443     for (j=0; j<m; j++) {
3444       procsnz[0] += locrowlens[j];
3445     }
3446     for (i=1; i<size; i++) {
3447       mend = browners[i+1] - browners[i];
3448       if (i == size-1) mend = mend - extra_rows;
3449       ierr = PetscBinaryRead(fd,rowlengths,mend,PETSC_INT);CHKERRQ(ierr);
3450       for (j=mend; j<browners[i+1] - browners[i]; j++) rowlengths[j] = 1;
3451       /* calculate the number of nonzeros on each processor */
3452       for (j=0; j<browners[i+1]-browners[i]; j++) {
3453         procsnz[i] += rowlengths[j];
3454       }
3455       ierr = MPI_Send(rowlengths,browners[i+1]-browners[i],MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3456     }
3457     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3458   } else {
3459     ierr = MPI_Recv(locrowlens,m,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3460   }
3461 
3462   if (!rank) {
3463     /* determine max buffer needed and allocate it */
3464     maxnz = procsnz[0];
3465     for (i=1; i<size; i++) {
3466       maxnz = PetscMax(maxnz,procsnz[i]);
3467     }
3468     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
3469 
3470     /* read in my part of the matrix column indices  */
3471     nz     = procsnz[0];
3472     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3473     mycols = ibuf;
3474     if (size == 1)  nz -= extra_rows;
3475     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
3476     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
3477 
3478     /* read in every ones (except the last) and ship off */
3479     for (i=1; i<size-1; i++) {
3480       nz   = procsnz[i];
3481       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3482       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
3483     }
3484     /* read in the stuff for the last proc */
3485     if (size != 1) {
3486       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
3487       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
3488       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
3489       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
3490     }
3491     ierr = PetscFree(cols);CHKERRQ(ierr);
3492   } else {
3493     /* determine buffer space needed for message */
3494     nz = 0;
3495     for (i=0; i<m; i++) {
3496       nz += locrowlens[i];
3497     }
3498     ierr   = PetscMalloc((nz+1)*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
3499     mycols = ibuf;
3500     /* receive message of column indices*/
3501     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
3502     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
3503     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3504   }
3505 
3506   /* loop over local rows, determining number of off diagonal entries */
3507   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
3508   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
3509   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3510   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3511   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
3512   rowcount = 0; nzcount = 0;
3513   for (i=0; i<mbs; i++) {
3514     dcount  = 0;
3515     odcount = 0;
3516     for (j=0; j<bs; j++) {
3517       kmax = locrowlens[rowcount];
3518       for (k=0; k<kmax; k++) {
3519         tmp = mycols[nzcount++]/bs;
3520         if (!mask[tmp]) {
3521           mask[tmp] = 1;
3522           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
3523           else masked1[dcount++] = tmp;
3524         }
3525       }
3526       rowcount++;
3527     }
3528 
3529     dlens[i]  = dcount;
3530     odlens[i] = odcount;
3531 
3532     /* zero out the mask elements we set */
3533     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
3534     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
3535   }
3536 
3537   /* create our matrix */
3538   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
3539   ierr = MatSetSizes(A,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3540   ierr = MatSetType(A,type);CHKERRQ(ierr)
3541   ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
3542 
3543   if (!rank) {
3544     ierr = PetscMalloc((maxnz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3545     /* read in my part of the matrix numerical values  */
3546     nz = procsnz[0];
3547     vals = buf;
3548     mycols = ibuf;
3549     if (size == 1)  nz -= extra_rows;
3550     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3551     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
3552 
3553     /* insert into matrix */
3554     jj      = rstart*bs;
3555     for (i=0; i<m; i++) {
3556       ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3557       mycols += locrowlens[i];
3558       vals   += locrowlens[i];
3559       jj++;
3560     }
3561     /* read in other processors (except the last one) and ship out */
3562     for (i=1; i<size-1; i++) {
3563       nz   = procsnz[i];
3564       vals = buf;
3565       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3566       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
3567     }
3568     /* the last proc */
3569     if (size != 1){
3570       nz   = procsnz[i] - extra_rows;
3571       vals = buf;
3572       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
3573       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
3574       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
3575     }
3576     ierr = PetscFree(procsnz);CHKERRQ(ierr);
3577   } else {
3578     /* receive numeric values */
3579     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
3580 
3581     /* receive message of values*/
3582     vals   = buf;
3583     mycols = ibuf;
3584     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
3585     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
3586     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
3587 
3588     /* insert into matrix */
3589     jj      = rstart*bs;
3590     for (i=0; i<m; i++) {
3591       ierr    = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
3592       mycols += locrowlens[i];
3593       vals   += locrowlens[i];
3594       jj++;
3595     }
3596   }
3597   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
3598   ierr = PetscFree(buf);CHKERRQ(ierr);
3599   ierr = PetscFree(ibuf);CHKERRQ(ierr);
3600   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
3601   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
3602   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
3603   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3604   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3605 
3606   *newmat = A;
3607   PetscFunctionReturn(0);
3608 }
3609 
3610 #undef __FUNCT__
3611 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
3612 /*@
3613    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
3614 
3615    Input Parameters:
3616 .  mat  - the matrix
3617 .  fact - factor
3618 
3619    Collective on Mat
3620 
3621    Level: advanced
3622 
3623   Notes:
3624    This can also be set by the command line option: -mat_use_hash_table <fact>
3625 
3626 .keywords: matrix, hashtable, factor, HT
3627 
3628 .seealso: MatSetOption()
3629 @*/
3630 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
3631 {
3632   PetscErrorCode ierr,(*f)(Mat,PetscReal);
3633 
3634   PetscFunctionBegin;
3635   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
3636   if (f) {
3637     ierr = (*f)(mat,fact);CHKERRQ(ierr);
3638   }
3639   PetscFunctionReturn(0);
3640 }
3641 
3642 EXTERN_C_BEGIN
3643 #undef __FUNCT__
3644 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
3645 PetscErrorCode PETSCMAT_DLLEXPORT MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
3646 {
3647   Mat_MPIBAIJ *baij;
3648 
3649   PetscFunctionBegin;
3650   baij = (Mat_MPIBAIJ*)mat->data;
3651   baij->ht_fact = fact;
3652   PetscFunctionReturn(0);
3653 }
3654 EXTERN_C_END
3655 
3656 #undef __FUNCT__
3657 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
3658 PetscErrorCode PETSCMAT_DLLEXPORT MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,PetscInt *colmap[])
3659 {
3660   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
3661   PetscFunctionBegin;
3662   *Ad     = a->A;
3663   *Ao     = a->B;
3664   *colmap = a->garray;
3665   PetscFunctionReturn(0);
3666 }
3667 
3668 /*
3669     Special version for direct calls from Fortran (to eliminate two function call overheads
3670 */
3671 #if defined(PETSC_HAVE_FORTRAN_CAPS)
3672 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED
3673 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
3674 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked
3675 #endif
3676 
3677 #undef __FUNCT__
3678 #define __FUNCT__ "matmpibiajsetvaluesblocked"
3679 /*@C
3680   MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to MatSetValuesBlocked()
3681 
3682   Collective on Mat
3683 
3684   Input Parameters:
3685 + mat - the matrix
3686 . min - number of input rows
3687 . im - input rows
3688 . nin - number of input columns
3689 . in - input columns
3690 . v - numerical values input
3691 - addvin - INSERT_VALUES or ADD_VALUES
3692 
3693   Notes: This has a complete copy of MatSetValuesBlocked_MPIBAIJ() which is terrible code un-reuse.
3694 
3695   Level: advanced
3696 
3697 .seealso:   MatSetValuesBlocked()
3698 @*/
3699 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin,PetscInt *min,const PetscInt im[],PetscInt *nin,const PetscInt in[],const MatScalar v[],InsertMode *addvin)
3700 {
3701   /* convert input arguments to C version */
3702   Mat             mat = *matin;
3703   PetscInt        m = *min, n = *nin;
3704   InsertMode      addv = *addvin;
3705 
3706   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
3707   const MatScalar *value;
3708   MatScalar       *barray=baij->barray;
3709   PetscTruth      roworiented = baij->roworiented;
3710   PetscErrorCode  ierr;
3711   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
3712   PetscInt        rend=baij->rendbs,cstart=baij->cstartbs,stepval;
3713   PetscInt        cend=baij->cendbs,bs=mat->rmap->bs,bs2=baij->bs2;
3714 
3715   PetscFunctionBegin;
3716   /* tasks normally handled by MatSetValuesBlocked() */
3717   if (mat->insertmode == NOT_SET_VALUES) {
3718     mat->insertmode = addv;
3719   }
3720 #if defined(PETSC_USE_DEBUG)
3721   else if (mat->insertmode != addv) {
3722     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
3723   }
3724   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
3725 #endif
3726   if (mat->assembled) {
3727     mat->was_assembled = PETSC_TRUE;
3728     mat->assembled     = PETSC_FALSE;
3729   }
3730   ierr = PetscLogEventBegin(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3731 
3732 
3733   if(!barray) {
3734     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
3735     baij->barray = barray;
3736   }
3737 
3738   if (roworiented) {
3739     stepval = (n-1)*bs;
3740   } else {
3741     stepval = (m-1)*bs;
3742   }
3743   for (i=0; i<m; i++) {
3744     if (im[i] < 0) continue;
3745 #if defined(PETSC_USE_DEBUG)
3746     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
3747 #endif
3748     if (im[i] >= rstart && im[i] < rend) {
3749       row = im[i] - rstart;
3750       for (j=0; j<n; j++) {
3751         /* If NumCol = 1 then a copy is not required */
3752         if ((roworiented) && (n == 1)) {
3753           barray = (MatScalar*)v + i*bs2;
3754         } else if((!roworiented) && (m == 1)) {
3755           barray = (MatScalar*)v + j*bs2;
3756         } else { /* Here a copy is required */
3757           if (roworiented) {
3758             value = v + i*(stepval+bs)*bs + j*bs;
3759           } else {
3760             value = v + j*(stepval+bs)*bs + i*bs;
3761           }
3762           for (ii=0; ii<bs; ii++,value+=stepval) {
3763             for (jj=0; jj<bs; jj++) {
3764               *barray++  = *value++;
3765             }
3766           }
3767           barray -=bs2;
3768         }
3769 
3770         if (in[j] >= cstart && in[j] < cend){
3771           col  = in[j] - cstart;
3772           ierr = MatSetValuesBlocked_SeqBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3773         }
3774         else if (in[j] < 0) continue;
3775 #if defined(PETSC_USE_DEBUG)
3776         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
3777 #endif
3778         else {
3779           if (mat->was_assembled) {
3780             if (!baij->colmap) {
3781               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
3782             }
3783 
3784 #if defined(PETSC_USE_DEBUG)
3785 #if defined (PETSC_USE_CTABLE)
3786             { PetscInt data;
3787               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
3788               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
3789             }
3790 #else
3791             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
3792 #endif
3793 #endif
3794 #if defined (PETSC_USE_CTABLE)
3795 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
3796             col  = (col - 1)/bs;
3797 #else
3798             col = (baij->colmap[in[j]] - 1)/bs;
3799 #endif
3800             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
3801               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
3802               col =  in[j];
3803             }
3804           }
3805           else col = in[j];
3806           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
3807         }
3808       }
3809     } else {
3810       if (!baij->donotstash) {
3811         if (roworiented) {
3812           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3813         } else {
3814           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
3815         }
3816       }
3817     }
3818   }
3819 
3820   /* task normally handled by MatSetValuesBlocked() */
3821   ierr = PetscLogEventEnd(MAT_SetValues,mat,0,0,0);CHKERRQ(ierr);
3822   PetscFunctionReturn(0);
3823 }
3824