xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 72ce74bd30d5d09d1371a3eb387704f4af395d1a)
1 
2 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
3 
4 EXTERN PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat);
5 EXTERN PetscErrorCode DisAssemble_MPIBAIJ(Mat);
6 EXTERN PetscErrorCode MatIncreaseOverlap_MPIBAIJ(Mat,int,IS[],int);
7 EXTERN PetscErrorCode MatGetSubMatrices_MPIBAIJ(Mat,int,const IS[],const IS[],MatReuse,Mat *[]);
8 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,int,const int[],int,const int [],PetscScalar []);
9 EXTERN PetscErrorCode MatSetValues_SeqBAIJ(Mat,int,const int[],int,const int [],const PetscScalar [],InsertMode);
10 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
11 EXTERN PetscErrorCode MatGetRow_SeqBAIJ(Mat,int,int*,int*[],PetscScalar*[]);
12 EXTERN PetscErrorCode MatRestoreRow_SeqBAIJ(Mat,int,int*,int*[],PetscScalar*[]);
13 EXTERN PetscErrorCode MatPrintHelp_SeqBAIJ(Mat);
14 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,const PetscScalar*);
15 
16 /*  UGLY, ugly, ugly
17    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
18    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
19    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
20    converts the entries into single precision and then calls ..._MatScalar() to put them
21    into the single precision data structures.
22 */
23 #if defined(PETSC_USE_MAT_SINGLE)
24 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
25 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
26 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
27 EXTERN PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
28 EXTERN PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,int,const int*,int,const int*,const MatScalar*,InsertMode);
29 #else
30 #define MatSetValuesBlocked_SeqBAIJ_MatScalar      MatSetValuesBlocked_SeqBAIJ
31 #define MatSetValues_MPIBAIJ_MatScalar             MatSetValues_MPIBAIJ
32 #define MatSetValuesBlocked_MPIBAIJ_MatScalar      MatSetValuesBlocked_MPIBAIJ
33 #define MatSetValues_MPIBAIJ_HT_MatScalar          MatSetValues_MPIBAIJ_HT
34 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar   MatSetValuesBlocked_MPIBAIJ_HT
35 #endif
36 
37 #undef __FUNCT__
38 #define __FUNCT__ "MatGetRowMax_MPIBAIJ"
39 PetscErrorCode MatGetRowMax_MPIBAIJ(Mat A,Vec v)
40 {
41   Mat_MPIBAIJ  *a = (Mat_MPIBAIJ*)A->data;
42   PetscErrorCode ierr;
43   int i;
44   PetscScalar  *va,*vb;
45   Vec          vtmp;
46 
47   PetscFunctionBegin;
48 
49   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
50   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
51 
52   ierr = VecCreateSeq(PETSC_COMM_SELF,A->m,&vtmp);CHKERRQ(ierr);
53   ierr = MatGetRowMax(a->B,vtmp);CHKERRQ(ierr);
54   ierr = VecGetArray(vtmp,&vb);CHKERRQ(ierr);
55 
56   for (i=0; i<A->m; i++){
57     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) va[i] = vb[i];
58   }
59 
60   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
61   ierr = VecRestoreArray(vtmp,&vb);CHKERRQ(ierr);
62   ierr = VecDestroy(vtmp);CHKERRQ(ierr);
63 
64   PetscFunctionReturn(0);
65 }
66 
67 EXTERN_C_BEGIN
68 #undef __FUNCT__
69 #define __FUNCT__ "MatStoreValues_MPIBAIJ"
70 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat)
71 {
72   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
73   PetscErrorCode ierr;
74 
75   PetscFunctionBegin;
76   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
77   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
78   PetscFunctionReturn(0);
79 }
80 EXTERN_C_END
81 
82 EXTERN_C_BEGIN
83 #undef __FUNCT__
84 #define __FUNCT__ "MatRetrieveValues_MPIBAIJ"
85 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat)
86 {
87   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
88   PetscErrorCode ierr;
89 
90   PetscFunctionBegin;
91   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
92   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
93   PetscFunctionReturn(0);
94 }
95 EXTERN_C_END
96 
97 /*
98      Local utility routine that creates a mapping from the global column
99    number to the local number in the off-diagonal part of the local
100    storage of the matrix.  This is done in a non scable way since the
101    length of colmap equals the global matrix length.
102 */
103 #undef __FUNCT__
104 #define __FUNCT__ "CreateColmap_MPIBAIJ_Private"
105 PetscErrorCode CreateColmap_MPIBAIJ_Private(Mat mat)
106 {
107   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
108   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data;
109   int         nbs = B->nbs,i,bs=B->bs,ierr;
110 
111   PetscFunctionBegin;
112 #if defined (PETSC_USE_CTABLE)
113   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
114   for (i=0; i<nbs; i++){
115     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
116   }
117 #else
118   ierr = PetscMalloc((baij->Nbs+1)*sizeof(int),&baij->colmap);CHKERRQ(ierr);
119   PetscLogObjectMemory(mat,baij->Nbs*sizeof(int));
120   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr);
121   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
122 #endif
123   PetscFunctionReturn(0);
124 }
125 
126 #define CHUNKSIZE  10
127 
128 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
129 { \
130  \
131     brow = row/bs;  \
132     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
133     rmax = aimax[brow]; nrow = ailen[brow]; \
134       bcol = col/bs; \
135       ridx = row % bs; cidx = col % bs; \
136       low = 0; high = nrow; \
137       while (high-low > 3) { \
138         t = (low+high)/2; \
139         if (rp[t] > bcol) high = t; \
140         else              low  = t; \
141       } \
142       for (_i=low; _i<high; _i++) { \
143         if (rp[_i] > bcol) break; \
144         if (rp[_i] == bcol) { \
145           bap  = ap +  bs2*_i + bs*cidx + ridx; \
146           if (addv == ADD_VALUES) *bap += value;  \
147           else                    *bap  = value;  \
148           goto a_noinsert; \
149         } \
150       } \
151       if (a->nonew == 1) goto a_noinsert; \
152       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
153       if (nrow >= rmax) { \
154         /* there is no extra room in row, therefore enlarge */ \
155         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
156         MatScalar *new_a; \
157  \
158         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
159  \
160         /* malloc new storage space */ \
161         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
162         ierr = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
163         new_j   = (int*)(new_a + bs2*new_nz); \
164         new_i   = new_j + new_nz; \
165  \
166         /* copy over old data into new slots */ \
167         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
168         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
169         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
170         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
171         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
172         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
173         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
174         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
175                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
176         /* free up old matrix storage */ \
177         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
178         if (!a->singlemalloc) { \
179           ierr = PetscFree(a->i);CHKERRQ(ierr); \
180           ierr = PetscFree(a->j);CHKERRQ(ierr);\
181         } \
182         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
183         a->singlemalloc = PETSC_TRUE; \
184  \
185         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
186         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
187         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
188         a->maxnz += bs2*CHUNKSIZE; \
189         a->reallocs++; \
190         a->nz++; \
191       } \
192       N = nrow++ - 1;  \
193       /* shift up all the later entries in this row */ \
194       for (ii=N; ii>=_i; ii--) { \
195         rp[ii+1] = rp[ii]; \
196         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
197       } \
198       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
199       rp[_i]                      = bcol;  \
200       ap[bs2*_i + bs*cidx + ridx] = value;  \
201       a_noinsert:; \
202     ailen[brow] = nrow; \
203 }
204 
205 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
206 { \
207     brow = row/bs;  \
208     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
209     rmax = bimax[brow]; nrow = bilen[brow]; \
210       bcol = col/bs; \
211       ridx = row % bs; cidx = col % bs; \
212       low = 0; high = nrow; \
213       while (high-low > 3) { \
214         t = (low+high)/2; \
215         if (rp[t] > bcol) high = t; \
216         else              low  = t; \
217       } \
218       for (_i=low; _i<high; _i++) { \
219         if (rp[_i] > bcol) break; \
220         if (rp[_i] == bcol) { \
221           bap  = ap +  bs2*_i + bs*cidx + ridx; \
222           if (addv == ADD_VALUES) *bap += value;  \
223           else                    *bap  = value;  \
224           goto b_noinsert; \
225         } \
226       } \
227       if (b->nonew == 1) goto b_noinsert; \
228       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
229       if (nrow >= rmax) { \
230         /* there is no extra room in row, therefore enlarge */ \
231         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
232         MatScalar *new_a; \
233  \
234         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
235  \
236         /* malloc new storage space */ \
237         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
238         ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
239         new_j   = (int*)(new_a + bs2*new_nz); \
240         new_i   = new_j + new_nz; \
241  \
242         /* copy over old data into new slots */ \
243         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
244         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
245         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
246         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
247         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
248         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
249         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
250         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
251                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
252         /* free up old matrix storage */ \
253         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
254         if (!b->singlemalloc) { \
255           ierr = PetscFree(b->i);CHKERRQ(ierr); \
256           ierr = PetscFree(b->j);CHKERRQ(ierr); \
257         } \
258         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
259         b->singlemalloc = PETSC_TRUE; \
260  \
261         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
262         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
263         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
264         b->maxnz += bs2*CHUNKSIZE; \
265         b->reallocs++; \
266         b->nz++; \
267       } \
268       N = nrow++ - 1;  \
269       /* shift up all the later entries in this row */ \
270       for (ii=N; ii>=_i; ii--) { \
271         rp[ii+1] = rp[ii]; \
272         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
273       } \
274       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
275       rp[_i]                      = bcol;  \
276       ap[bs2*_i + bs*cidx + ridx] = value;  \
277       b_noinsert:; \
278     bilen[brow] = nrow; \
279 }
280 
281 #if defined(PETSC_USE_MAT_SINGLE)
282 #undef __FUNCT__
283 #define __FUNCT__ "MatSetValues_MPIBAIJ"
284 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
285 {
286   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
287   PetscErrorCode ierr;
288   int  i,N = m*n;
289   MatScalar   *vsingle;
290 
291   PetscFunctionBegin;
292   if (N > b->setvalueslen) {
293     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
294     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
295     b->setvalueslen  = N;
296   }
297   vsingle = b->setvaluescopy;
298 
299   for (i=0; i<N; i++) {
300     vsingle[i] = v[i];
301   }
302   ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
303   PetscFunctionReturn(0);
304 }
305 
306 #undef __FUNCT__
307 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ"
308 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
309 {
310   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
311   PetscErrorCode ierr;
312   int i,N = m*n*b->bs2;
313   MatScalar   *vsingle;
314 
315   PetscFunctionBegin;
316   if (N > b->setvalueslen) {
317     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
318     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
319     b->setvalueslen  = N;
320   }
321   vsingle = b->setvaluescopy;
322   for (i=0; i<N; i++) {
323     vsingle[i] = v[i];
324   }
325   ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
326   PetscFunctionReturn(0);
327 }
328 
329 #undef __FUNCT__
330 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT"
331 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
332 {
333   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
334   PetscErrorCode ierr;
335   int i,N = m*n;
336   MatScalar   *vsingle;
337 
338   PetscFunctionBegin;
339   if (N > b->setvalueslen) {
340     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
341     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
342     b->setvalueslen  = N;
343   }
344   vsingle = b->setvaluescopy;
345   for (i=0; i<N; i++) {
346     vsingle[i] = v[i];
347   }
348   ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
349   PetscFunctionReturn(0);
350 }
351 
352 #undef __FUNCT__
353 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT"
354 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
355 {
356   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
357   PetscErrorCode ierr;
358   int i,N = m*n*b->bs2;
359   MatScalar   *vsingle;
360 
361   PetscFunctionBegin;
362   if (N > b->setvalueslen) {
363     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
364     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
365     b->setvalueslen  = N;
366   }
367   vsingle = b->setvaluescopy;
368   for (i=0; i<N; i++) {
369     vsingle[i] = v[i];
370   }
371   ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
372   PetscFunctionReturn(0);
373 }
374 #endif
375 
376 #undef __FUNCT__
377 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
378 PetscErrorCode MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
379 {
380   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
381   MatScalar   value;
382   PetscTruth  roworiented = baij->roworiented;
383   PetscErrorCode ierr;
384   int i,j,row,col;
385   int         rstart_orig=baij->rstart_bs;
386   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
387   int         cend_orig=baij->cend_bs,bs=baij->bs;
388 
389   /* Some Variables required in the macro */
390   Mat         A = baij->A;
391   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data;
392   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
393   MatScalar   *aa=a->a;
394 
395   Mat         B = baij->B;
396   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data;
397   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
398   MatScalar   *ba=b->a;
399 
400   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
401   int         low,high,t,ridx,cidx,bs2=a->bs2;
402   MatScalar   *ap,*bap;
403 
404   PetscFunctionBegin;
405   for (i=0; i<m; i++) {
406     if (im[i] < 0) continue;
407 #if defined(PETSC_USE_BOPT_g)
408     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
409 #endif
410     if (im[i] >= rstart_orig && im[i] < rend_orig) {
411       row = im[i] - rstart_orig;
412       for (j=0; j<n; j++) {
413         if (in[j] >= cstart_orig && in[j] < cend_orig){
414           col = in[j] - cstart_orig;
415           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
416           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
417           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
418         } else if (in[j] < 0) continue;
419 #if defined(PETSC_USE_BOPT_g)
420         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[i],mat->N-1);}
421 #endif
422         else {
423           if (mat->was_assembled) {
424             if (!baij->colmap) {
425               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
426             }
427 #if defined (PETSC_USE_CTABLE)
428             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
429             col  = col - 1;
430 #else
431             col = baij->colmap[in[j]/bs] - 1;
432 #endif
433             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
434               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
435               col =  in[j];
436               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
437               B = baij->B;
438               b = (Mat_SeqBAIJ*)(B)->data;
439               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
440               ba=b->a;
441             } else col += in[j]%bs;
442           } else col = in[j];
443           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
444           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
445           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
446         }
447       }
448     } else {
449       if (!baij->donotstash) {
450         if (roworiented) {
451           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
452         } else {
453           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
454         }
455       }
456     }
457   }
458   PetscFunctionReturn(0);
459 }
460 
461 #undef __FUNCT__
462 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_MatScalar"
463 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
464 {
465   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
466   const MatScalar *value;
467   MatScalar       *barray=baij->barray;
468   PetscTruth      roworiented = baij->roworiented;
469   PetscErrorCode ierr;
470   int i,j,ii,jj,row,col,rstart=baij->rstart;
471   int             rend=baij->rend,cstart=baij->cstart,stepval;
472   int             cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
473 
474   PetscFunctionBegin;
475   if(!barray) {
476     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
477     baij->barray = barray;
478   }
479 
480   if (roworiented) {
481     stepval = (n-1)*bs;
482   } else {
483     stepval = (m-1)*bs;
484   }
485   for (i=0; i<m; i++) {
486     if (im[i] < 0) continue;
487 #if defined(PETSC_USE_BOPT_g)
488     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1);
489 #endif
490     if (im[i] >= rstart && im[i] < rend) {
491       row = im[i] - rstart;
492       for (j=0; j<n; j++) {
493         /* If NumCol = 1 then a copy is not required */
494         if ((roworiented) && (n == 1)) {
495           barray = (MatScalar*)v + i*bs2;
496         } else if((!roworiented) && (m == 1)) {
497           barray = (MatScalar*)v + j*bs2;
498         } else { /* Here a copy is required */
499           if (roworiented) {
500             value = v + i*(stepval+bs)*bs + j*bs;
501           } else {
502             value = v + j*(stepval+bs)*bs + i*bs;
503           }
504           for (ii=0; ii<bs; ii++,value+=stepval) {
505             for (jj=0; jj<bs; jj++) {
506               *barray++  = *value++;
507             }
508           }
509           barray -=bs2;
510         }
511 
512         if (in[j] >= cstart && in[j] < cend){
513           col  = in[j] - cstart;
514           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
515         }
516         else if (in[j] < 0) continue;
517 #if defined(PETSC_USE_BOPT_g)
518         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);}
519 #endif
520         else {
521           if (mat->was_assembled) {
522             if (!baij->colmap) {
523               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
524             }
525 
526 #if defined(PETSC_USE_BOPT_g)
527 #if defined (PETSC_USE_CTABLE)
528             { int data;
529               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
530               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
531             }
532 #else
533             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
534 #endif
535 #endif
536 #if defined (PETSC_USE_CTABLE)
537 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
538             col  = (col - 1)/bs;
539 #else
540             col = (baij->colmap[in[j]] - 1)/bs;
541 #endif
542             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
543               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
544               col =  in[j];
545             }
546           }
547           else col = in[j];
548           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
549         }
550       }
551     } else {
552       if (!baij->donotstash) {
553         if (roworiented) {
554           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
555         } else {
556           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
557         }
558       }
559     }
560   }
561   PetscFunctionReturn(0);
562 }
563 
564 #define HASH_KEY 0.6180339887
565 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
566 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
567 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
568 #undef __FUNCT__
569 #define __FUNCT__ "MatSetValues_MPIBAIJ_HT_MatScalar"
570 PetscErrorCode MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
571 {
572   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
573   PetscTruth  roworiented = baij->roworiented;
574   PetscErrorCode ierr;
575   int i,j,row,col;
576   int         rstart_orig=baij->rstart_bs;
577   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
578   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
579   PetscReal   tmp;
580   MatScalar   **HD = baij->hd,value;
581 #if defined(PETSC_USE_BOPT_g)
582   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
583 #endif
584 
585   PetscFunctionBegin;
586 
587   for (i=0; i<m; i++) {
588 #if defined(PETSC_USE_BOPT_g)
589     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
590     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
591 #endif
592       row = im[i];
593     if (row >= rstart_orig && row < rend_orig) {
594       for (j=0; j<n; j++) {
595         col = in[j];
596         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
597         /* Look up into the Hash Table */
598         key = (row/bs)*Nbs+(col/bs)+1;
599         h1  = HASH(size,key,tmp);
600 
601 
602         idx = h1;
603 #if defined(PETSC_USE_BOPT_g)
604         insert_ct++;
605         total_ct++;
606         if (HT[idx] != key) {
607           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
608           if (idx == size) {
609             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
610             if (idx == h1) {
611               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col);
612             }
613           }
614         }
615 #else
616         if (HT[idx] != key) {
617           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
618           if (idx == size) {
619             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
620             if (idx == h1) {
621               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col);
622             }
623           }
624         }
625 #endif
626         /* A HASH table entry is found, so insert the values at the correct address */
627         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
628         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
629       }
630     } else {
631       if (!baij->donotstash) {
632         if (roworiented) {
633           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
634         } else {
635           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
636         }
637       }
638     }
639   }
640 #if defined(PETSC_USE_BOPT_g)
641   baij->ht_total_ct = total_ct;
642   baij->ht_insert_ct = insert_ct;
643 #endif
644   PetscFunctionReturn(0);
645 }
646 
647 #undef __FUNCT__
648 #define __FUNCT__ "MatSetValuesBlocked_MPIBAIJ_HT_MatScalar"
649 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
650 {
651   Mat_MPIBAIJ     *baij = (Mat_MPIBAIJ*)mat->data;
652   PetscTruth      roworiented = baij->roworiented;
653   PetscErrorCode ierr;
654   int i,j,ii,jj,row,col;
655   int             rstart=baij->rstart ;
656   int             rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
657   int             h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
658   PetscReal       tmp;
659   MatScalar       **HD = baij->hd,*baij_a;
660   const MatScalar *v_t,*value;
661 #if defined(PETSC_USE_BOPT_g)
662   int             total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
663 #endif
664 
665   PetscFunctionBegin;
666 
667   if (roworiented) {
668     stepval = (n-1)*bs;
669   } else {
670     stepval = (m-1)*bs;
671   }
672   for (i=0; i<m; i++) {
673 #if defined(PETSC_USE_BOPT_g)
674     if (im[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",im[i]);
675     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],baij->Mbs-1);
676 #endif
677     row   = im[i];
678     v_t   = v + i*bs2;
679     if (row >= rstart && row < rend) {
680       for (j=0; j<n; j++) {
681         col = in[j];
682 
683         /* Look up into the Hash Table */
684         key = row*Nbs+col+1;
685         h1  = HASH(size,key,tmp);
686 
687         idx = h1;
688 #if defined(PETSC_USE_BOPT_g)
689         total_ct++;
690         insert_ct++;
691        if (HT[idx] != key) {
692           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
693           if (idx == size) {
694             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
695             if (idx == h1) {
696               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col);
697             }
698           }
699         }
700 #else
701         if (HT[idx] != key) {
702           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
703           if (idx == size) {
704             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
705             if (idx == h1) {
706               SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"(%d,%d) has no entry in the hash table", row, col);
707             }
708           }
709         }
710 #endif
711         baij_a = HD[idx];
712         if (roworiented) {
713           /*value = v + i*(stepval+bs)*bs + j*bs;*/
714           /* value = v + (i*(stepval+bs)+j)*bs; */
715           value = v_t;
716           v_t  += bs;
717           if (addv == ADD_VALUES) {
718             for (ii=0; ii<bs; ii++,value+=stepval) {
719               for (jj=ii; jj<bs2; jj+=bs) {
720                 baij_a[jj]  += *value++;
721               }
722             }
723           } else {
724             for (ii=0; ii<bs; ii++,value+=stepval) {
725               for (jj=ii; jj<bs2; jj+=bs) {
726                 baij_a[jj]  = *value++;
727               }
728             }
729           }
730         } else {
731           value = v + j*(stepval+bs)*bs + i*bs;
732           if (addv == ADD_VALUES) {
733             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
734               for (jj=0; jj<bs; jj++) {
735                 baij_a[jj]  += *value++;
736               }
737             }
738           } else {
739             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
740               for (jj=0; jj<bs; jj++) {
741                 baij_a[jj]  = *value++;
742               }
743             }
744           }
745         }
746       }
747     } else {
748       if (!baij->donotstash) {
749         if (roworiented) {
750           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
751         } else {
752           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
753         }
754       }
755     }
756   }
757 #if defined(PETSC_USE_BOPT_g)
758   baij->ht_total_ct = total_ct;
759   baij->ht_insert_ct = insert_ct;
760 #endif
761   PetscFunctionReturn(0);
762 }
763 
764 #undef __FUNCT__
765 #define __FUNCT__ "MatGetValues_MPIBAIJ"
766 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
767 {
768   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
769   int        bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
770   int        bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
771 
772   PetscFunctionBegin;
773   for (i=0; i<m; i++) {
774     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
775     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
776     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
777       row = idxm[i] - bsrstart;
778       for (j=0; j<n; j++) {
779         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %d",idxn[j]);
780         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
781         if (idxn[j] >= bscstart && idxn[j] < bscend){
782           col = idxn[j] - bscstart;
783           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
784         } else {
785           if (!baij->colmap) {
786             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
787           }
788 #if defined (PETSC_USE_CTABLE)
789           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
790           data --;
791 #else
792           data = baij->colmap[idxn[j]/bs]-1;
793 #endif
794           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
795           else {
796             col  = data + idxn[j]%bs;
797             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
798           }
799         }
800       }
801     } else {
802       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
803     }
804   }
805  PetscFunctionReturn(0);
806 }
807 
808 #undef __FUNCT__
809 #define __FUNCT__ "MatNorm_MPIBAIJ"
810 PetscErrorCode MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *nrm)
811 {
812   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
813   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
814   PetscErrorCode ierr;
815   int i,bs2=baij->bs2;
816   PetscReal  sum = 0.0;
817   MatScalar  *v;
818 
819   PetscFunctionBegin;
820   if (baij->size == 1) {
821     ierr =  MatNorm(baij->A,type,nrm);CHKERRQ(ierr);
822   } else {
823     if (type == NORM_FROBENIUS) {
824       v = amat->a;
825       for (i=0; i<amat->nz*bs2; i++) {
826 #if defined(PETSC_USE_COMPLEX)
827         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
828 #else
829         sum += (*v)*(*v); v++;
830 #endif
831       }
832       v = bmat->a;
833       for (i=0; i<bmat->nz*bs2; i++) {
834 #if defined(PETSC_USE_COMPLEX)
835         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
836 #else
837         sum += (*v)*(*v); v++;
838 #endif
839       }
840       ierr = MPI_Allreduce(&sum,nrm,1,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
841       *nrm = sqrt(*nrm);
842     } else {
843       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
844     }
845   }
846   PetscFunctionReturn(0);
847 }
848 
849 
850 /*
851   Creates the hash table, and sets the table
852   This table is created only once.
853   If new entried need to be added to the matrix
854   then the hash table has to be destroyed and
855   recreated.
856 */
857 #undef __FUNCT__
858 #define __FUNCT__ "MatCreateHashTable_MPIBAIJ_Private"
859 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
860 {
861   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
862   Mat         A = baij->A,B=baij->B;
863   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
864   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
865   int         size,bs2=baij->bs2,rstart=baij->rstart,ierr;
866   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
867   int         *HT,key;
868   MatScalar   **HD;
869   PetscReal   tmp;
870 #if defined(PETSC_USE_BOPT_g)
871   int         ct=0,max=0;
872 #endif
873 
874   PetscFunctionBegin;
875   baij->ht_size=(int)(factor*nz);
876   size = baij->ht_size;
877 
878   if (baij->ht) {
879     PetscFunctionReturn(0);
880   }
881 
882   /* Allocate Memory for Hash Table */
883   ierr     = PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1,&baij->hd);CHKERRQ(ierr);
884   baij->ht = (int*)(baij->hd + size);
885   HD       = baij->hd;
886   HT       = baij->ht;
887 
888 
889   ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(PetscScalar*)));CHKERRQ(ierr);
890 
891 
892   /* Loop Over A */
893   for (i=0; i<a->mbs; i++) {
894     for (j=ai[i]; j<ai[i+1]; j++) {
895       row = i+rstart;
896       col = aj[j]+cstart;
897 
898       key = row*Nbs + col + 1;
899       h1  = HASH(size,key,tmp);
900       for (k=0; k<size; k++){
901         if (!HT[(h1+k)%size]) {
902           HT[(h1+k)%size] = key;
903           HD[(h1+k)%size] = a->a + j*bs2;
904           break;
905 #if defined(PETSC_USE_BOPT_g)
906         } else {
907           ct++;
908 #endif
909         }
910       }
911 #if defined(PETSC_USE_BOPT_g)
912       if (k> max) max = k;
913 #endif
914     }
915   }
916   /* Loop Over B */
917   for (i=0; i<b->mbs; i++) {
918     for (j=bi[i]; j<bi[i+1]; j++) {
919       row = i+rstart;
920       col = garray[bj[j]];
921       key = row*Nbs + col + 1;
922       h1  = HASH(size,key,tmp);
923       for (k=0; k<size; k++){
924         if (!HT[(h1+k)%size]) {
925           HT[(h1+k)%size] = key;
926           HD[(h1+k)%size] = b->a + j*bs2;
927           break;
928 #if defined(PETSC_USE_BOPT_g)
929         } else {
930           ct++;
931 #endif
932         }
933       }
934 #if defined(PETSC_USE_BOPT_g)
935       if (k> max) max = k;
936 #endif
937     }
938   }
939 
940   /* Print Summary */
941 #if defined(PETSC_USE_BOPT_g)
942   for (i=0,j=0; i<size; i++) {
943     if (HT[i]) {j++;}
944   }
945   PetscLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(!j)? 0.0:((PetscReal)(ct+j))/j,max);
946 #endif
947   PetscFunctionReturn(0);
948 }
949 
950 #undef __FUNCT__
951 #define __FUNCT__ "MatAssemblyBegin_MPIBAIJ"
952 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
953 {
954   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
955   PetscErrorCode ierr;
956   int nstash,reallocs;
957   InsertMode  addv;
958 
959   PetscFunctionBegin;
960   if (baij->donotstash) {
961     PetscFunctionReturn(0);
962   }
963 
964   /* make sure all processors are either in INSERTMODE or ADDMODE */
965   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
966   if (addv == (ADD_VALUES|INSERT_VALUES)) {
967     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
968   }
969   mat->insertmode = addv; /* in case this processor had no cache */
970 
971   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
972   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
973   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
974   PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
975   ierr = MatStashGetInfo_Private(&mat->bstash,&nstash,&reallocs);CHKERRQ(ierr);
976   PetscLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
977   PetscFunctionReturn(0);
978 }
979 
980 #undef __FUNCT__
981 #define __FUNCT__ "MatAssemblyEnd_MPIBAIJ"
982 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
983 {
984   Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data;
985   Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
986   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
987   int         *row,*col,other_disassembled;
988   PetscTruth  r1,r2,r3;
989   MatScalar   *val;
990   InsertMode  addv = mat->insertmode;
991 
992   PetscFunctionBegin;
993   if (!baij->donotstash) {
994     while (1) {
995       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
996       if (!flg) break;
997 
998       for (i=0; i<n;) {
999         /* Now identify the consecutive vals belonging to the same row */
1000         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1001         if (j < n) ncols = j-i;
1002         else       ncols = n-i;
1003         /* Now assemble all these values with a single function call */
1004         ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
1005         i = j;
1006       }
1007     }
1008     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
1009     /* Now process the block-stash. Since the values are stashed column-oriented,
1010        set the roworiented flag to column oriented, and after MatSetValues()
1011        restore the original flags */
1012     r1 = baij->roworiented;
1013     r2 = a->roworiented;
1014     r3 = b->roworiented;
1015     baij->roworiented = PETSC_FALSE;
1016     a->roworiented    = PETSC_FALSE;
1017     b->roworiented    = PETSC_FALSE;
1018     while (1) {
1019       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1020       if (!flg) break;
1021 
1022       for (i=0; i<n;) {
1023         /* Now identify the consecutive vals belonging to the same row */
1024         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1025         if (j < n) ncols = j-i;
1026         else       ncols = n-i;
1027         ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
1028         i = j;
1029       }
1030     }
1031     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
1032     baij->roworiented = r1;
1033     a->roworiented    = r2;
1034     b->roworiented    = r3;
1035   }
1036 
1037   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
1038   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
1039 
1040   /* determine if any processor has disassembled, if so we must
1041      also disassemble ourselfs, in order that we may reassemble. */
1042   /*
1043      if nonzero structure of submatrix B cannot change then we know that
1044      no processor disassembled thus we can skip this stuff
1045   */
1046   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
1047     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
1048     if (mat->was_assembled && !other_disassembled) {
1049       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
1050     }
1051   }
1052 
1053   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1054     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
1055   }
1056   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1057   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1058 
1059 #if defined(PETSC_USE_BOPT_g)
1060   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1061     PetscLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
1062     baij->ht_total_ct  = 0;
1063     baij->ht_insert_ct = 0;
1064   }
1065 #endif
1066   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1067     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1068     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1069     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1070   }
1071 
1072   if (baij->rowvalues) {
1073     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1074     baij->rowvalues = 0;
1075   }
1076   PetscFunctionReturn(0);
1077 }
1078 
1079 #undef __FUNCT__
1080 #define __FUNCT__ "MatView_MPIBAIJ_ASCIIorDraworSocket"
1081 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1082 {
1083   Mat_MPIBAIJ       *baij = (Mat_MPIBAIJ*)mat->data;
1084   PetscErrorCode ierr;
1085   int bs = baij->bs,size = baij->size,rank = baij->rank;
1086   PetscTruth        iascii,isdraw;
1087   PetscViewer       sviewer;
1088   PetscViewerFormat format;
1089 
1090   PetscFunctionBegin;
1091   /* printf(" MatView_MPIBAIJ_ASCIIorDraworSocket is called ...\n"); */
1092   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1093   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1094   if (iascii) {
1095     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1096     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1097       MatInfo info;
1098       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
1099       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1100       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
1101               rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
1102               baij->bs,(int)info.memory);CHKERRQ(ierr);
1103       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1104       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1105       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1106       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1107       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1108       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1109       PetscFunctionReturn(0);
1110     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1111       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
1112       PetscFunctionReturn(0);
1113     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1114       PetscFunctionReturn(0);
1115     }
1116   }
1117 
1118   if (isdraw) {
1119     PetscDraw       draw;
1120     PetscTruth isnull;
1121     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1122     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1123   }
1124 
1125   if (size == 1) {
1126     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
1127     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1128   } else {
1129     /* assemble the entire matrix onto first processor. */
1130     Mat         A;
1131     Mat_SeqBAIJ *Aloc;
1132     int         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1133     MatScalar   *a;
1134 
1135     /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */
1136     /* Perhaps this should be the type of mat? */
1137     if (!rank) {
1138       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
1139     } else {
1140       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
1141     }
1142     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
1143     ierr = MatMPIBAIJSetPreallocation(A,baij->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1144     PetscLogObjectParent(mat,A);
1145 
1146     /* copy over the A part */
1147     Aloc = (Mat_SeqBAIJ*)baij->A->data;
1148     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1149     ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
1150 
1151     for (i=0; i<mbs; i++) {
1152       rvals[0] = bs*(baij->rstart + i);
1153       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1154       for (j=ai[i]; j<ai[i+1]; j++) {
1155         col = (baij->cstart+aj[j])*bs;
1156         for (k=0; k<bs; k++) {
1157           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1158           col++; a += bs;
1159         }
1160       }
1161     }
1162     /* copy over the B part */
1163     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1164     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1165     for (i=0; i<mbs; i++) {
1166       rvals[0] = bs*(baij->rstart + i);
1167       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1168       for (j=ai[i]; j<ai[i+1]; j++) {
1169         col = baij->garray[aj[j]]*bs;
1170         for (k=0; k<bs; k++) {
1171           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1172           col++; a += bs;
1173         }
1174       }
1175     }
1176     ierr = PetscFree(rvals);CHKERRQ(ierr);
1177     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1178     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1179     /*
1180        Everyone has to call to draw the matrix since the graphics waits are
1181        synchronized across all processors that share the PetscDraw object
1182     */
1183     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1184     if (!rank) {
1185       ierr = PetscObjectSetName((PetscObject)((Mat_MPIBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
1186       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1187     }
1188     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1189     ierr = MatDestroy(A);CHKERRQ(ierr);
1190   }
1191   PetscFunctionReturn(0);
1192 }
1193 
1194 #undef __FUNCT__
1195 #define __FUNCT__ "MatView_MPIBAIJ"
1196 PetscErrorCode MatView_MPIBAIJ(Mat mat,PetscViewer viewer)
1197 {
1198   PetscErrorCode ierr;
1199   PetscTruth iascii,isdraw,issocket,isbinary;
1200 
1201   PetscFunctionBegin;
1202   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1203   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1204   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
1205   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1206   if (iascii || isdraw || issocket || isbinary) {
1207     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1208   } else {
1209     SETERRQ1(1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1210   }
1211   PetscFunctionReturn(0);
1212 }
1213 
1214 #undef __FUNCT__
1215 #define __FUNCT__ "MatDestroy_MPIBAIJ"
1216 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat)
1217 {
1218   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1219   PetscErrorCode ierr;
1220 
1221   PetscFunctionBegin;
1222 #if defined(PETSC_USE_LOG)
1223   PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N);
1224 #endif
1225   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1226   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1227   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1228   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1229   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1230 #if defined (PETSC_USE_CTABLE)
1231   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1232 #else
1233   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1234 #endif
1235   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1236   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1237   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1238   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1239   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1240   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
1241 #if defined(PETSC_USE_MAT_SINGLE)
1242   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
1243 #endif
1244   ierr = PetscFree(baij);CHKERRQ(ierr);
1245   PetscFunctionReturn(0);
1246 }
1247 
1248 #undef __FUNCT__
1249 #define __FUNCT__ "MatMult_MPIBAIJ"
1250 PetscErrorCode MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1251 {
1252   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1253   PetscErrorCode ierr;
1254   int nt;
1255 
1256   PetscFunctionBegin;
1257   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1258   if (nt != A->n) {
1259     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1260   }
1261   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1262   if (nt != A->m) {
1263     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1264   }
1265   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1266   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1267   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1268   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1269   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1270   PetscFunctionReturn(0);
1271 }
1272 
1273 #undef __FUNCT__
1274 #define __FUNCT__ "MatMultAdd_MPIBAIJ"
1275 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1276 {
1277   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1278   PetscErrorCode ierr;
1279 
1280   PetscFunctionBegin;
1281   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1282   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1283   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1284   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1285   PetscFunctionReturn(0);
1286 }
1287 
1288 #undef __FUNCT__
1289 #define __FUNCT__ "MatMultTranspose_MPIBAIJ"
1290 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1291 {
1292   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1293   PetscErrorCode ierr;
1294 
1295   PetscFunctionBegin;
1296   /* do nondiagonal part */
1297   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1298   /* send it on its way */
1299   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1300   /* do local part */
1301   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1302   /* receive remote parts: note this assumes the values are not actually */
1303   /* inserted in yy until the next line, which is true for my implementation*/
1304   /* but is not perhaps always true. */
1305   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1306   PetscFunctionReturn(0);
1307 }
1308 
1309 #undef __FUNCT__
1310 #define __FUNCT__ "MatMultTransposeAdd_MPIBAIJ"
1311 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1312 {
1313   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1314   PetscErrorCode ierr;
1315 
1316   PetscFunctionBegin;
1317   /* do nondiagonal part */
1318   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1319   /* send it on its way */
1320   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1321   /* do local part */
1322   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1323   /* receive remote parts: note this assumes the values are not actually */
1324   /* inserted in yy until the next line, which is true for my implementation*/
1325   /* but is not perhaps always true. */
1326   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1327   PetscFunctionReturn(0);
1328 }
1329 
1330 /*
1331   This only works correctly for square matrices where the subblock A->A is the
1332    diagonal block
1333 */
1334 #undef __FUNCT__
1335 #define __FUNCT__ "MatGetDiagonal_MPIBAIJ"
1336 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1337 {
1338   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1339   PetscErrorCode ierr;
1340 
1341   PetscFunctionBegin;
1342   if (A->M != A->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1343   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1344   PetscFunctionReturn(0);
1345 }
1346 
1347 #undef __FUNCT__
1348 #define __FUNCT__ "MatScale_MPIBAIJ"
1349 PetscErrorCode MatScale_MPIBAIJ(const PetscScalar *aa,Mat A)
1350 {
1351   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1352   PetscErrorCode ierr;
1353 
1354   PetscFunctionBegin;
1355   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1356   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1357   PetscFunctionReturn(0);
1358 }
1359 
1360 #undef __FUNCT__
1361 #define __FUNCT__ "MatGetRow_MPIBAIJ"
1362 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1363 {
1364   Mat_MPIBAIJ  *mat = (Mat_MPIBAIJ*)matin->data;
1365   PetscScalar  *vworkA,*vworkB,**pvA,**pvB,*v_p;
1366   int          bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1367   int          nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1368   int          *cmap,*idx_p,cstart = mat->cstart;
1369 
1370   PetscFunctionBegin;
1371   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1372   mat->getrowactive = PETSC_TRUE;
1373 
1374   if (!mat->rowvalues && (idx || v)) {
1375     /*
1376         allocate enough space to hold information from the longest row.
1377     */
1378     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1379     int     max = 1,mbs = mat->mbs,tmp;
1380     for (i=0; i<mbs; i++) {
1381       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1382       if (max < tmp) { max = tmp; }
1383     }
1384     ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1385     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1386   }
1387 
1388   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1389   lrow = row - brstart;
1390 
1391   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1392   if (!v)   {pvA = 0; pvB = 0;}
1393   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1394   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1395   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1396   nztot = nzA + nzB;
1397 
1398   cmap  = mat->garray;
1399   if (v  || idx) {
1400     if (nztot) {
1401       /* Sort by increasing column numbers, assuming A and B already sorted */
1402       int imark = -1;
1403       if (v) {
1404         *v = v_p = mat->rowvalues;
1405         for (i=0; i<nzB; i++) {
1406           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1407           else break;
1408         }
1409         imark = i;
1410         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1411         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1412       }
1413       if (idx) {
1414         *idx = idx_p = mat->rowindices;
1415         if (imark > -1) {
1416           for (i=0; i<imark; i++) {
1417             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1418           }
1419         } else {
1420           for (i=0; i<nzB; i++) {
1421             if (cmap[cworkB[i]/bs] < cstart)
1422               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1423             else break;
1424           }
1425           imark = i;
1426         }
1427         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1428         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1429       }
1430     } else {
1431       if (idx) *idx = 0;
1432       if (v)   *v   = 0;
1433     }
1434   }
1435   *nz = nztot;
1436   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1437   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1438   PetscFunctionReturn(0);
1439 }
1440 
1441 #undef __FUNCT__
1442 #define __FUNCT__ "MatRestoreRow_MPIBAIJ"
1443 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1444 {
1445   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1446 
1447   PetscFunctionBegin;
1448   if (baij->getrowactive == PETSC_FALSE) {
1449     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1450   }
1451   baij->getrowactive = PETSC_FALSE;
1452   PetscFunctionReturn(0);
1453 }
1454 
1455 #undef __FUNCT__
1456 #define __FUNCT__ "MatGetBlockSize_MPIBAIJ"
1457 PetscErrorCode MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
1458 {
1459   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1460 
1461   PetscFunctionBegin;
1462   *bs = baij->bs;
1463   PetscFunctionReturn(0);
1464 }
1465 
1466 #undef __FUNCT__
1467 #define __FUNCT__ "MatZeroEntries_MPIBAIJ"
1468 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A)
1469 {
1470   Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data;
1471   PetscErrorCode ierr;
1472 
1473   PetscFunctionBegin;
1474   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1475   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1476   PetscFunctionReturn(0);
1477 }
1478 
1479 #undef __FUNCT__
1480 #define __FUNCT__ "MatGetInfo_MPIBAIJ"
1481 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1482 {
1483   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data;
1484   Mat         A = a->A,B = a->B;
1485   PetscErrorCode ierr;
1486   PetscReal   isend[5],irecv[5];
1487 
1488   PetscFunctionBegin;
1489   info->block_size     = (PetscReal)a->bs;
1490   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1491   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1492   isend[3] = info->memory;  isend[4] = info->mallocs;
1493   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1494   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1495   isend[3] += info->memory;  isend[4] += info->mallocs;
1496   if (flag == MAT_LOCAL) {
1497     info->nz_used      = isend[0];
1498     info->nz_allocated = isend[1];
1499     info->nz_unneeded  = isend[2];
1500     info->memory       = isend[3];
1501     info->mallocs      = isend[4];
1502   } else if (flag == MAT_GLOBAL_MAX) {
1503     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1504     info->nz_used      = irecv[0];
1505     info->nz_allocated = irecv[1];
1506     info->nz_unneeded  = irecv[2];
1507     info->memory       = irecv[3];
1508     info->mallocs      = irecv[4];
1509   } else if (flag == MAT_GLOBAL_SUM) {
1510     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1511     info->nz_used      = irecv[0];
1512     info->nz_allocated = irecv[1];
1513     info->nz_unneeded  = irecv[2];
1514     info->memory       = irecv[3];
1515     info->mallocs      = irecv[4];
1516   } else {
1517     SETERRQ1(1,"Unknown MatInfoType argument %d",flag);
1518   }
1519   info->rows_global       = (PetscReal)A->M;
1520   info->columns_global    = (PetscReal)A->N;
1521   info->rows_local        = (PetscReal)A->m;
1522   info->columns_local     = (PetscReal)A->N;
1523   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1524   info->fill_ratio_needed = 0;
1525   info->factor_mallocs    = 0;
1526   PetscFunctionReturn(0);
1527 }
1528 
1529 #undef __FUNCT__
1530 #define __FUNCT__ "MatSetOption_MPIBAIJ"
1531 PetscErrorCode MatSetOption_MPIBAIJ(Mat A,MatOption op)
1532 {
1533   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1534   PetscErrorCode ierr;
1535 
1536   PetscFunctionBegin;
1537   switch (op) {
1538   case MAT_NO_NEW_NONZERO_LOCATIONS:
1539   case MAT_YES_NEW_NONZERO_LOCATIONS:
1540   case MAT_COLUMNS_UNSORTED:
1541   case MAT_COLUMNS_SORTED:
1542   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1543   case MAT_KEEP_ZEROED_ROWS:
1544   case MAT_NEW_NONZERO_LOCATION_ERR:
1545     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1546     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1547     break;
1548   case MAT_ROW_ORIENTED:
1549     a->roworiented = PETSC_TRUE;
1550     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1551     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1552     break;
1553   case MAT_ROWS_SORTED:
1554   case MAT_ROWS_UNSORTED:
1555   case MAT_YES_NEW_DIAGONALS:
1556     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1557     break;
1558   case MAT_COLUMN_ORIENTED:
1559     a->roworiented = PETSC_FALSE;
1560     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1561     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1562     break;
1563   case MAT_IGNORE_OFF_PROC_ENTRIES:
1564     a->donotstash = PETSC_TRUE;
1565     break;
1566   case MAT_NO_NEW_DIAGONALS:
1567     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1568   case MAT_USE_HASH_TABLE:
1569     a->ht_flag = PETSC_TRUE;
1570     break;
1571   case MAT_SYMMETRIC:
1572   case MAT_STRUCTURALLY_SYMMETRIC:
1573   case MAT_HERMITIAN:
1574   case MAT_SYMMETRY_ETERNAL:
1575     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1576     break;
1577   case MAT_NOT_SYMMETRIC:
1578   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1579   case MAT_NOT_HERMITIAN:
1580   case MAT_NOT_SYMMETRY_ETERNAL:
1581     break;
1582   default:
1583     SETERRQ(PETSC_ERR_SUP,"unknown option");
1584   }
1585   PetscFunctionReturn(0);
1586 }
1587 
1588 #undef __FUNCT__
1589 #define __FUNCT__ "MatTranspose_MPIBAIJ("
1590 PetscErrorCode MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1591 {
1592   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data;
1593   Mat_SeqBAIJ *Aloc;
1594   Mat         B;
1595   PetscErrorCode ierr;
1596   int M=A->M,N=A->N,*ai,*aj,i,*rvals,j,k,col;
1597   int         bs=baij->bs,mbs=baij->mbs;
1598   MatScalar   *a;
1599 
1600   PetscFunctionBegin;
1601   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1602   ierr = MatCreate(A->comm,A->n,A->m,N,M,&B);CHKERRQ(ierr);
1603   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
1604   ierr = MatMPIBAIJSetPreallocation(B,baij->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
1605 
1606   /* copy over the A part */
1607   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1608   ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
1609   ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
1610 
1611   for (i=0; i<mbs; i++) {
1612     rvals[0] = bs*(baij->rstart + i);
1613     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1614     for (j=ai[i]; j<ai[i+1]; j++) {
1615       col = (baij->cstart+aj[j])*bs;
1616       for (k=0; k<bs; k++) {
1617         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1618         col++; a += bs;
1619       }
1620     }
1621   }
1622   /* copy over the B part */
1623   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1624   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1625   for (i=0; i<mbs; i++) {
1626     rvals[0] = bs*(baij->rstart + i);
1627     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1628     for (j=ai[i]; j<ai[i+1]; j++) {
1629       col = baij->garray[aj[j]]*bs;
1630       for (k=0; k<bs; k++) {
1631         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1632         col++; a += bs;
1633       }
1634     }
1635   }
1636   ierr = PetscFree(rvals);CHKERRQ(ierr);
1637   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1638   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1639 
1640   if (matout) {
1641     *matout = B;
1642   } else {
1643     ierr = MatHeaderCopy(A,B);CHKERRQ(ierr);
1644   }
1645   PetscFunctionReturn(0);
1646 }
1647 
1648 #undef __FUNCT__
1649 #define __FUNCT__ "MatDiagonalScale_MPIBAIJ"
1650 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1651 {
1652   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1653   Mat         a = baij->A,b = baij->B;
1654   PetscErrorCode ierr;
1655   int s1,s2,s3;
1656 
1657   PetscFunctionBegin;
1658   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1659   if (rr) {
1660     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1661     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1662     /* Overlap communication with computation. */
1663     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1664   }
1665   if (ll) {
1666     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1667     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1668     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1669   }
1670   /* scale  the diagonal block */
1671   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1672 
1673   if (rr) {
1674     /* Do a scatter end and then right scale the off-diagonal block */
1675     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1676     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1677   }
1678 
1679   PetscFunctionReturn(0);
1680 }
1681 
1682 #undef __FUNCT__
1683 #define __FUNCT__ "MatZeroRows_MPIBAIJ"
1684 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A,IS is,const PetscScalar *diag)
1685 {
1686   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1687   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
1688   int            *nprocs,j,idx,nsends,row;
1689   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1690   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1691   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1692   MPI_Comm       comm = A->comm;
1693   MPI_Request    *send_waits,*recv_waits;
1694   MPI_Status     recv_status,*send_status;
1695   IS             istmp;
1696   PetscTruth     found;
1697 
1698   PetscFunctionBegin;
1699   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
1700   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1701 
1702   /*  first count number of contributors to each processor */
1703   ierr  = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
1704   ierr  = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
1705   ierr  = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
1706   for (i=0; i<N; i++) {
1707     idx   = rows[i];
1708     found = PETSC_FALSE;
1709     for (j=0; j<size; j++) {
1710       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1711         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; found = PETSC_TRUE; break;
1712       }
1713     }
1714     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1715   }
1716   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
1717 
1718   /* inform other processors of number of messages and max length*/
1719   ierr = PetscMaxSum(comm,nprocs,&nmax,&nrecvs);CHKERRQ(ierr);
1720 
1721   /* post receives:   */
1722   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
1723   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1724   for (i=0; i<nrecvs; i++) {
1725     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1726   }
1727 
1728   /* do sends:
1729      1) starts[i] gives the starting index in svalues for stuff going to
1730      the ith processor
1731   */
1732   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
1733   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1734   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
1735   starts[0]  = 0;
1736   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1737   for (i=0; i<N; i++) {
1738     svalues[starts[owner[i]]++] = rows[i];
1739   }
1740   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
1741 
1742   starts[0] = 0;
1743   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[2*i-2];}
1744   count = 0;
1745   for (i=0; i<size; i++) {
1746     if (nprocs[2*i+1]) {
1747       ierr = MPI_Isend(svalues+starts[i],nprocs[2*i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1748     }
1749   }
1750   ierr = PetscFree(starts);CHKERRQ(ierr);
1751 
1752   base = owners[rank]*bs;
1753 
1754   /*  wait on receives */
1755   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
1756   source = lens + nrecvs;
1757   count  = nrecvs; slen = 0;
1758   while (count) {
1759     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1760     /* unpack receives into our local space */
1761     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1762     source[imdex]  = recv_status.MPI_SOURCE;
1763     lens[imdex]    = n;
1764     slen          += n;
1765     count--;
1766   }
1767   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1768 
1769   /* move the data into the send scatter */
1770   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
1771   count = 0;
1772   for (i=0; i<nrecvs; i++) {
1773     values = rvalues + i*nmax;
1774     for (j=0; j<lens[i]; j++) {
1775       lrows[count++] = values[j] - base;
1776     }
1777   }
1778   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1779   ierr = PetscFree(lens);CHKERRQ(ierr);
1780   ierr = PetscFree(owner);CHKERRQ(ierr);
1781   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1782 
1783   /* actually zap the local rows */
1784   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1785   PetscLogObjectParent(A,istmp);
1786 
1787   /*
1788         Zero the required rows. If the "diagonal block" of the matrix
1789      is square and the user wishes to set the diagonal we use seperate
1790      code so that MatSetValues() is not called for each diagonal allocating
1791      new memory, thus calling lots of mallocs and slowing things down.
1792 
1793        Contributed by: Mathew Knepley
1794   */
1795   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1796   ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr);
1797   if (diag && (l->A->M == l->A->N)) {
1798     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr);
1799   } else if (diag) {
1800     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1801     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1802       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1803 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1804     }
1805     for (i=0; i<slen; i++) {
1806       row  = lrows[i] + rstart_bs;
1807       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1808     }
1809     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1810     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1811   } else {
1812     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1813   }
1814 
1815   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1816   ierr = PetscFree(lrows);CHKERRQ(ierr);
1817 
1818   /* wait on sends */
1819   if (nsends) {
1820     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1821     ierr = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1822     ierr = PetscFree(send_status);CHKERRQ(ierr);
1823   }
1824   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1825   ierr = PetscFree(svalues);CHKERRQ(ierr);
1826 
1827   PetscFunctionReturn(0);
1828 }
1829 
1830 #undef __FUNCT__
1831 #define __FUNCT__ "MatPrintHelp_MPIBAIJ"
1832 PetscErrorCode MatPrintHelp_MPIBAIJ(Mat A)
1833 {
1834   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1835   MPI_Comm    comm = A->comm;
1836   static int  called = 0;
1837   PetscErrorCode ierr;
1838 
1839   PetscFunctionBegin;
1840   if (!a->rank) {
1841     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1842   }
1843   if (called) {PetscFunctionReturn(0);} else called = 1;
1844   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1845   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1846   PetscFunctionReturn(0);
1847 }
1848 
1849 #undef __FUNCT__
1850 #define __FUNCT__ "MatSetUnfactored_MPIBAIJ"
1851 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A)
1852 {
1853   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1854   PetscErrorCode ierr;
1855 
1856   PetscFunctionBegin;
1857   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1858   PetscFunctionReturn(0);
1859 }
1860 
1861 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1862 
1863 #undef __FUNCT__
1864 #define __FUNCT__ "MatEqual_MPIBAIJ"
1865 PetscErrorCode MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1866 {
1867   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1868   Mat         a,b,c,d;
1869   PetscTruth  flg;
1870   PetscErrorCode ierr;
1871 
1872   PetscFunctionBegin;
1873   a = matA->A; b = matA->B;
1874   c = matB->A; d = matB->B;
1875 
1876   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1877   if (flg == PETSC_TRUE) {
1878     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1879   }
1880   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1881   PetscFunctionReturn(0);
1882 }
1883 
1884 
1885 #undef __FUNCT__
1886 #define __FUNCT__ "MatSetUpPreallocation_MPIBAIJ"
1887 PetscErrorCode MatSetUpPreallocation_MPIBAIJ(Mat A)
1888 {
1889   PetscErrorCode ierr;
1890 
1891   PetscFunctionBegin;
1892   ierr =  MatMPIBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1893   PetscFunctionReturn(0);
1894 }
1895 
1896 /* -------------------------------------------------------------------*/
1897 static struct _MatOps MatOps_Values = {
1898        MatSetValues_MPIBAIJ,
1899        MatGetRow_MPIBAIJ,
1900        MatRestoreRow_MPIBAIJ,
1901        MatMult_MPIBAIJ,
1902 /* 4*/ MatMultAdd_MPIBAIJ,
1903        MatMultTranspose_MPIBAIJ,
1904        MatMultTransposeAdd_MPIBAIJ,
1905        0,
1906        0,
1907        0,
1908 /*10*/ 0,
1909        0,
1910        0,
1911        0,
1912        MatTranspose_MPIBAIJ,
1913 /*15*/ MatGetInfo_MPIBAIJ,
1914        MatEqual_MPIBAIJ,
1915        MatGetDiagonal_MPIBAIJ,
1916        MatDiagonalScale_MPIBAIJ,
1917        MatNorm_MPIBAIJ,
1918 /*20*/ MatAssemblyBegin_MPIBAIJ,
1919        MatAssemblyEnd_MPIBAIJ,
1920        0,
1921        MatSetOption_MPIBAIJ,
1922        MatZeroEntries_MPIBAIJ,
1923 /*25*/ MatZeroRows_MPIBAIJ,
1924        0,
1925        0,
1926        0,
1927        0,
1928 /*30*/ MatSetUpPreallocation_MPIBAIJ,
1929        0,
1930        0,
1931        0,
1932        0,
1933 /*35*/ MatDuplicate_MPIBAIJ,
1934        0,
1935        0,
1936        0,
1937        0,
1938 /*40*/ 0,
1939        MatGetSubMatrices_MPIBAIJ,
1940        MatIncreaseOverlap_MPIBAIJ,
1941        MatGetValues_MPIBAIJ,
1942        0,
1943 /*45*/ MatPrintHelp_MPIBAIJ,
1944        MatScale_MPIBAIJ,
1945        0,
1946        0,
1947        0,
1948 /*50*/ MatGetBlockSize_MPIBAIJ,
1949        0,
1950        0,
1951        0,
1952        0,
1953 /*55*/ 0,
1954        0,
1955        MatSetUnfactored_MPIBAIJ,
1956        0,
1957        MatSetValuesBlocked_MPIBAIJ,
1958 /*60*/ 0,
1959        MatDestroy_MPIBAIJ,
1960        MatView_MPIBAIJ,
1961        MatGetPetscMaps_Petsc,
1962        0,
1963 /*65*/ 0,
1964        0,
1965        0,
1966        0,
1967        0,
1968 /*70*/ MatGetRowMax_MPIBAIJ,
1969        0,
1970        0,
1971        0,
1972        0,
1973 /*75*/ 0,
1974        0,
1975        0,
1976        0,
1977        0,
1978 /*80*/ 0,
1979        0,
1980        0,
1981        0,
1982 /*85*/ MatLoad_MPIBAIJ
1983 };
1984 
1985 
1986 EXTERN_C_BEGIN
1987 #undef __FUNCT__
1988 #define __FUNCT__ "MatGetDiagonalBlock_MPIBAIJ"
1989 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1990 {
1991   PetscFunctionBegin;
1992   *a      = ((Mat_MPIBAIJ *)A->data)->A;
1993   *iscopy = PETSC_FALSE;
1994   PetscFunctionReturn(0);
1995 }
1996 EXTERN_C_END
1997 
1998 EXTERN_C_BEGIN
1999 extern int MatConvert_MPIBAIJ_MPISBAIJ(Mat,const MatType,Mat*);
2000 EXTERN_C_END
2001 
2002 EXTERN_C_BEGIN
2003 #undef __FUNCT__
2004 #define __FUNCT__ "MatMPIBAIJSetPreallocation_MPIBAIJ"
2005 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
2006 {
2007   Mat_MPIBAIJ  *b;
2008   PetscErrorCode ierr;
2009   int  i;
2010 
2011   PetscFunctionBegin;
2012   B->preallocated = PETSC_TRUE;
2013   ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2014 
2015   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
2016   if (d_nz == PETSC_DEFAULT || d_nz == PETSC_DECIDE) d_nz = 5;
2017   if (o_nz == PETSC_DEFAULT || o_nz == PETSC_DECIDE) o_nz = 2;
2018   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
2019   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
2020   if (d_nnz) {
2021   for (i=0; i<B->m/bs; i++) {
2022       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
2023     }
2024   }
2025   if (o_nnz) {
2026     for (i=0; i<B->m/bs; i++) {
2027       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
2028     }
2029   }
2030 
2031   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
2032   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
2033   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
2034   ierr = PetscMapCreateMPI(B->comm,B->n,B->N,&B->cmap);CHKERRQ(ierr);
2035 
2036   b = (Mat_MPIBAIJ*)B->data;
2037   b->bs  = bs;
2038   b->bs2 = bs*bs;
2039   b->mbs = B->m/bs;
2040   b->nbs = B->n/bs;
2041   b->Mbs = B->M/bs;
2042   b->Nbs = B->N/bs;
2043 
2044   ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
2045   b->rowners[0]    = 0;
2046   for (i=2; i<=b->size; i++) {
2047     b->rowners[i] += b->rowners[i-1];
2048   }
2049   b->rstart    = b->rowners[b->rank];
2050   b->rend      = b->rowners[b->rank+1];
2051 
2052   ierr = MPI_Allgather(&b->nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
2053   b->cowners[0] = 0;
2054   for (i=2; i<=b->size; i++) {
2055     b->cowners[i] += b->cowners[i-1];
2056   }
2057   b->cstart    = b->cowners[b->rank];
2058   b->cend      = b->cowners[b->rank+1];
2059 
2060   for (i=0; i<=b->size; i++) {
2061     b->rowners_bs[i] = b->rowners[i]*bs;
2062   }
2063   b->rstart_bs = b->rstart*bs;
2064   b->rend_bs   = b->rend*bs;
2065   b->cstart_bs = b->cstart*bs;
2066   b->cend_bs   = b->cend*bs;
2067 
2068   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->n,B->m,B->n,&b->A);CHKERRQ(ierr);
2069   ierr = MatSetType(b->A,MATSEQBAIJ);CHKERRQ(ierr);
2070   ierr = MatSeqBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2071   PetscLogObjectParent(B,b->A);
2072   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->N,B->m,B->N,&b->B);CHKERRQ(ierr);
2073   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
2074   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
2075   PetscLogObjectParent(B,b->B);
2076 
2077   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
2078 
2079   PetscFunctionReturn(0);
2080 }
2081 EXTERN_C_END
2082 
2083 EXTERN_C_BEGIN
2084 EXTERN PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat,Vec);
2085 EXTERN PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat,PetscReal);
2086 EXTERN_C_END
2087 
2088 /*MC
2089    MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices.
2090 
2091    Options Database Keys:
2092 . -mat_type mpibaij - sets the matrix type to "mpibaij" during a call to MatSetFromOptions()
2093 
2094   Level: beginner
2095 
2096 .seealso: MatCreateMPIBAIJ
2097 M*/
2098 
2099 EXTERN_C_BEGIN
2100 #undef __FUNCT__
2101 #define __FUNCT__ "MatCreate_MPIBAIJ"
2102 PetscErrorCode MatCreate_MPIBAIJ(Mat B)
2103 {
2104   Mat_MPIBAIJ  *b;
2105   PetscErrorCode ierr;
2106   PetscTruth   flg;
2107 
2108   PetscFunctionBegin;
2109   ierr = PetscNew(Mat_MPIBAIJ,&b);CHKERRQ(ierr);
2110   B->data = (void*)b;
2111 
2112   ierr    = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
2113   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2114   B->mapping    = 0;
2115   B->factor     = 0;
2116   B->assembled  = PETSC_FALSE;
2117 
2118   B->insertmode = NOT_SET_VALUES;
2119   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
2120   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
2121 
2122   /* build local table of row and column ownerships */
2123   ierr          = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
2124   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2125   b->cowners    = b->rowners + b->size + 2;
2126   b->rowners_bs = b->cowners + b->size + 2;
2127 
2128   /* build cache for off array entries formed */
2129   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
2130   b->donotstash  = PETSC_FALSE;
2131   b->colmap      = PETSC_NULL;
2132   b->garray      = PETSC_NULL;
2133   b->roworiented = PETSC_TRUE;
2134 
2135 #if defined(PETSC_USE_MAT_SINGLE)
2136   /* stuff for MatSetValues_XXX in single precision */
2137   b->setvalueslen     = 0;
2138   b->setvaluescopy    = PETSC_NULL;
2139 #endif
2140 
2141   /* stuff used in block assembly */
2142   b->barray       = 0;
2143 
2144   /* stuff used for matrix vector multiply */
2145   b->lvec         = 0;
2146   b->Mvctx        = 0;
2147 
2148   /* stuff for MatGetRow() */
2149   b->rowindices   = 0;
2150   b->rowvalues    = 0;
2151   b->getrowactive = PETSC_FALSE;
2152 
2153   /* hash table stuff */
2154   b->ht           = 0;
2155   b->hd           = 0;
2156   b->ht_size      = 0;
2157   b->ht_flag      = PETSC_FALSE;
2158   b->ht_fact      = 0;
2159   b->ht_total_ct  = 0;
2160   b->ht_insert_ct = 0;
2161 
2162   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2163   if (flg) {
2164     PetscReal fact = 1.39;
2165     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2166     ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2167     if (fact <= 1.0) fact = 1.39;
2168     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2169     PetscLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2170   }
2171   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2172                                      "MatStoreValues_MPIBAIJ",
2173                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2174   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2175                                      "MatRetrieveValues_MPIBAIJ",
2176                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2177   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2178                                      "MatGetDiagonalBlock_MPIBAIJ",
2179                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2180   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPIBAIJSetPreallocation_C",
2181                                      "MatMPIBAIJSetPreallocation_MPIBAIJ",
2182                                      MatMPIBAIJSetPreallocation_MPIBAIJ);CHKERRQ(ierr);
2183   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatDiagonalScaleLocal_C",
2184                                      "MatDiagonalScaleLocal_MPIBAIJ",
2185                                      MatDiagonalScaleLocal_MPIBAIJ);CHKERRQ(ierr);
2186   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSetHashTableFactor_C",
2187                                      "MatSetHashTableFactor_MPIBAIJ",
2188                                      MatSetHashTableFactor_MPIBAIJ);CHKERRQ(ierr);
2189   PetscFunctionReturn(0);
2190 }
2191 EXTERN_C_END
2192 
2193 /*MC
2194    MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices.
2195 
2196    This matrix type is identical to MATSEQBAIJ when constructed with a single process communicator,
2197    and MATMPIBAIJ otherwise.
2198 
2199    Options Database Keys:
2200 . -mat_type baij - sets the matrix type to "baij" during a call to MatSetFromOptions()
2201 
2202   Level: beginner
2203 
2204 .seealso: MatCreateMPIBAIJ,MATSEQBAIJ,MATMPIBAIJ
2205 M*/
2206 
2207 EXTERN_C_BEGIN
2208 #undef __FUNCT__
2209 #define __FUNCT__ "MatCreate_BAIJ"
2210 PetscErrorCode MatCreate_BAIJ(Mat A)
2211 {
2212   PetscErrorCode ierr,size;
2213 
2214   PetscFunctionBegin;
2215   ierr = PetscObjectChangeTypeName((PetscObject)A,MATBAIJ);CHKERRQ(ierr);
2216   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2217   if (size == 1) {
2218     ierr = MatSetType(A,MATSEQBAIJ);CHKERRQ(ierr);
2219   } else {
2220     ierr = MatSetType(A,MATMPIBAIJ);CHKERRQ(ierr);
2221   }
2222   PetscFunctionReturn(0);
2223 }
2224 EXTERN_C_END
2225 
2226 #undef __FUNCT__
2227 #define __FUNCT__ "MatMPIBAIJSetPreallocation"
2228 /*@C
2229    MatMPIBAIJSetPreallocation - Creates a sparse parallel matrix in block AIJ format
2230    (block compressed row).  For good matrix assembly performance
2231    the user should preallocate the matrix storage by setting the parameters
2232    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2233    performance can be increased by more than a factor of 50.
2234 
2235    Collective on Mat
2236 
2237    Input Parameters:
2238 +  A - the matrix
2239 .  bs   - size of blockk
2240 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2241            submatrix  (same for all local rows)
2242 .  d_nnz - array containing the number of block nonzeros in the various block rows
2243            of the in diagonal portion of the local (possibly different for each block
2244            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2245 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2246            submatrix (same for all local rows).
2247 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2248            off-diagonal portion of the local submatrix (possibly different for
2249            each block row) or PETSC_NULL.
2250 
2251    Output Parameter:
2252 
2253 
2254    Options Database Keys:
2255 .   -mat_no_unroll - uses code that does not unroll the loops in the
2256                      block calculations (much slower)
2257 .   -mat_block_size - size of the blocks to use
2258 
2259    Notes:
2260    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2261    than it must be used on all processors that share the object for that argument.
2262 
2263    Storage Information:
2264    For a square global matrix we define each processor's diagonal portion
2265    to be its local rows and the corresponding columns (a square submatrix);
2266    each processor's off-diagonal portion encompasses the remainder of the
2267    local matrix (a rectangular submatrix).
2268 
2269    The user can specify preallocated storage for the diagonal part of
2270    the local submatrix with either d_nz or d_nnz (not both).  Set
2271    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2272    memory allocation.  Likewise, specify preallocated storage for the
2273    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2274 
2275    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2276    the figure below we depict these three local rows and all columns (0-11).
2277 
2278 .vb
2279            0 1 2 3 4 5 6 7 8 9 10 11
2280           -------------------
2281    row 3  |  o o o d d d o o o o o o
2282    row 4  |  o o o d d d o o o o o o
2283    row 5  |  o o o d d d o o o o o o
2284           -------------------
2285 .ve
2286 
2287    Thus, any entries in the d locations are stored in the d (diagonal)
2288    submatrix, and any entries in the o locations are stored in the
2289    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2290    stored simply in the MATSEQBAIJ format for compressed row storage.
2291 
2292    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2293    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2294    In general, for PDE problems in which most nonzeros are near the diagonal,
2295    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2296    or you will get TERRIBLE performance; see the users' manual chapter on
2297    matrices.
2298 
2299    Level: intermediate
2300 
2301 .keywords: matrix, block, aij, compressed row, sparse, parallel
2302 
2303 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2304 @*/
2305 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
2306 {
2307   PetscErrorCode ierr,(*f)(Mat,int,int,const int[],int,const int[]);
2308 
2309   PetscFunctionBegin;
2310   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPIBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2311   if (f) {
2312     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2313   }
2314   PetscFunctionReturn(0);
2315 }
2316 
2317 #undef __FUNCT__
2318 #define __FUNCT__ "MatCreateMPIBAIJ"
2319 /*@C
2320    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
2321    (block compressed row).  For good matrix assembly performance
2322    the user should preallocate the matrix storage by setting the parameters
2323    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2324    performance can be increased by more than a factor of 50.
2325 
2326    Collective on MPI_Comm
2327 
2328    Input Parameters:
2329 +  comm - MPI communicator
2330 .  bs   - size of blockk
2331 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2332            This value should be the same as the local size used in creating the
2333            y vector for the matrix-vector product y = Ax.
2334 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2335            This value should be the same as the local size used in creating the
2336            x vector for the matrix-vector product y = Ax.
2337 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2338 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2339 .  d_nz  - number of nonzero blocks per block row in diagonal portion of local
2340            submatrix  (same for all local rows)
2341 .  d_nnz - array containing the number of nonzero blocks in the various block rows
2342            of the in diagonal portion of the local (possibly different for each block
2343            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2344 .  o_nz  - number of nonzero blocks per block row in the off-diagonal portion of local
2345            submatrix (same for all local rows).
2346 -  o_nnz - array containing the number of nonzero blocks in the various block rows of the
2347            off-diagonal portion of the local submatrix (possibly different for
2348            each block row) or PETSC_NULL.
2349 
2350    Output Parameter:
2351 .  A - the matrix
2352 
2353    Options Database Keys:
2354 .   -mat_no_unroll - uses code that does not unroll the loops in the
2355                      block calculations (much slower)
2356 .   -mat_block_size - size of the blocks to use
2357 
2358    Notes:
2359    A nonzero block is any block that as 1 or more nonzeros in it
2360 
2361    The user MUST specify either the local or global matrix dimensions
2362    (possibly both).
2363 
2364    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2365    than it must be used on all processors that share the object for that argument.
2366 
2367    Storage Information:
2368    For a square global matrix we define each processor's diagonal portion
2369    to be its local rows and the corresponding columns (a square submatrix);
2370    each processor's off-diagonal portion encompasses the remainder of the
2371    local matrix (a rectangular submatrix).
2372 
2373    The user can specify preallocated storage for the diagonal part of
2374    the local submatrix with either d_nz or d_nnz (not both).  Set
2375    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2376    memory allocation.  Likewise, specify preallocated storage for the
2377    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2378 
2379    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2380    the figure below we depict these three local rows and all columns (0-11).
2381 
2382 .vb
2383            0 1 2 3 4 5 6 7 8 9 10 11
2384           -------------------
2385    row 3  |  o o o d d d o o o o o o
2386    row 4  |  o o o d d d o o o o o o
2387    row 5  |  o o o d d d o o o o o o
2388           -------------------
2389 .ve
2390 
2391    Thus, any entries in the d locations are stored in the d (diagonal)
2392    submatrix, and any entries in the o locations are stored in the
2393    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2394    stored simply in the MATSEQBAIJ format for compressed row storage.
2395 
2396    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2397    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2398    In general, for PDE problems in which most nonzeros are near the diagonal,
2399    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2400    or you will get TERRIBLE performance; see the users' manual chapter on
2401    matrices.
2402 
2403    Level: intermediate
2404 
2405 .keywords: matrix, block, aij, compressed row, sparse, parallel
2406 
2407 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2408 @*/
2409 PetscErrorCode MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
2410 {
2411   PetscErrorCode ierr,size;
2412 
2413   PetscFunctionBegin;
2414   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2415   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2416   if (size > 1) {
2417     ierr = MatSetType(*A,MATMPIBAIJ);CHKERRQ(ierr);
2418     ierr = MatMPIBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2419   } else {
2420     ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2421     ierr = MatSeqBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2422   }
2423   PetscFunctionReturn(0);
2424 }
2425 
2426 #undef __FUNCT__
2427 #define __FUNCT__ "MatDuplicate_MPIBAIJ"
2428 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2429 {
2430   Mat         mat;
2431   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2432   PetscErrorCode ierr;
2433   int len=0;
2434 
2435   PetscFunctionBegin;
2436   *newmat       = 0;
2437   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2438   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2439   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2440 
2441   mat->factor       = matin->factor;
2442   mat->preallocated = PETSC_TRUE;
2443   mat->assembled    = PETSC_TRUE;
2444   mat->insertmode   = NOT_SET_VALUES;
2445 
2446   a      = (Mat_MPIBAIJ*)mat->data;
2447   a->bs  = oldmat->bs;
2448   a->bs2 = oldmat->bs2;
2449   a->mbs = oldmat->mbs;
2450   a->nbs = oldmat->nbs;
2451   a->Mbs = oldmat->Mbs;
2452   a->Nbs = oldmat->Nbs;
2453 
2454   a->rstart       = oldmat->rstart;
2455   a->rend         = oldmat->rend;
2456   a->cstart       = oldmat->cstart;
2457   a->cend         = oldmat->cend;
2458   a->size         = oldmat->size;
2459   a->rank         = oldmat->rank;
2460   a->donotstash   = oldmat->donotstash;
2461   a->roworiented  = oldmat->roworiented;
2462   a->rowindices   = 0;
2463   a->rowvalues    = 0;
2464   a->getrowactive = PETSC_FALSE;
2465   a->barray       = 0;
2466   a->rstart_bs    = oldmat->rstart_bs;
2467   a->rend_bs      = oldmat->rend_bs;
2468   a->cstart_bs    = oldmat->cstart_bs;
2469   a->cend_bs      = oldmat->cend_bs;
2470 
2471   /* hash table stuff */
2472   a->ht           = 0;
2473   a->hd           = 0;
2474   a->ht_size      = 0;
2475   a->ht_flag      = oldmat->ht_flag;
2476   a->ht_fact      = oldmat->ht_fact;
2477   a->ht_total_ct  = 0;
2478   a->ht_insert_ct = 0;
2479 
2480   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2481   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2482   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2483   if (oldmat->colmap) {
2484 #if defined (PETSC_USE_CTABLE)
2485   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2486 #else
2487   ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr);
2488   PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2489   ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2490 #endif
2491   } else a->colmap = 0;
2492 
2493   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2494     ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr);
2495     PetscLogObjectMemory(mat,len*sizeof(int));
2496     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2497   } else a->garray = 0;
2498 
2499   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2500   PetscLogObjectParent(mat,a->lvec);
2501   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2502   PetscLogObjectParent(mat,a->Mvctx);
2503 
2504   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2505   PetscLogObjectParent(mat,a->A);
2506   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2507   PetscLogObjectParent(mat,a->B);
2508   ierr = PetscFListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2509   *newmat = mat;
2510 
2511   PetscFunctionReturn(0);
2512 }
2513 
2514 #include "petscsys.h"
2515 
2516 #undef __FUNCT__
2517 #define __FUNCT__ "MatLoad_MPIBAIJ"
2518 PetscErrorCode MatLoad_MPIBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
2519 {
2520   Mat          A;
2521   int          i,nz,ierr,j,rstart,rend,fd;
2522   PetscScalar  *vals,*buf;
2523   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2524   MPI_Status   status;
2525   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2526   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2527   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2528   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2529   int          dcount,kmax,k,nzcount,tmp;
2530 
2531   PetscFunctionBegin;
2532   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2533 
2534   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2535   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2536   if (!rank) {
2537     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2538     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2539     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2540     if (header[3] < 0) {
2541       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPIBAIJ");
2542     }
2543   }
2544 
2545   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2546   M = header[1]; N = header[2];
2547 
2548   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2549 
2550   /*
2551      This code adds extra rows to make sure the number of rows is
2552      divisible by the blocksize
2553   */
2554   Mbs        = M/bs;
2555   extra_rows = bs - M + bs*(Mbs);
2556   if (extra_rows == bs) extra_rows = 0;
2557   else                  Mbs++;
2558   if (extra_rows &&!rank) {
2559     PetscLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
2560   }
2561 
2562   /* determine ownership of all rows */
2563   mbs        = Mbs/size + ((Mbs % size) > rank);
2564   m          = mbs*bs;
2565   ierr       = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
2566   browners   = rowners + size + 1;
2567   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2568   rowners[0] = 0;
2569   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2570   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2571   rstart = rowners[rank];
2572   rend   = rowners[rank+1];
2573 
2574   /* distribute row lengths to all processors */
2575   ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr);
2576   if (!rank) {
2577     ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
2578     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2579     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2580     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
2581     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2582     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2583     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2584   } else {
2585     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2586   }
2587 
2588   if (!rank) {
2589     /* calculate the number of nonzeros on each processor */
2590     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
2591     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2592     for (i=0; i<size; i++) {
2593       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2594         procsnz[i] += rowlengths[j];
2595       }
2596     }
2597     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2598 
2599     /* determine max buffer needed and allocate it */
2600     maxnz = 0;
2601     for (i=0; i<size; i++) {
2602       maxnz = PetscMax(maxnz,procsnz[i]);
2603     }
2604     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
2605 
2606     /* read in my part of the matrix column indices  */
2607     nz     = procsnz[0];
2608     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2609     mycols = ibuf;
2610     if (size == 1)  nz -= extra_rows;
2611     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2612     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2613 
2614     /* read in every ones (except the last) and ship off */
2615     for (i=1; i<size-1; i++) {
2616       nz   = procsnz[i];
2617       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2618       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2619     }
2620     /* read in the stuff for the last proc */
2621     if (size != 1) {
2622       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2623       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2624       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2625       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2626     }
2627     ierr = PetscFree(cols);CHKERRQ(ierr);
2628   } else {
2629     /* determine buffer space needed for message */
2630     nz = 0;
2631     for (i=0; i<m; i++) {
2632       nz += locrowlens[i];
2633     }
2634     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2635     mycols = ibuf;
2636     /* receive message of column indices*/
2637     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2638     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2639     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2640   }
2641 
2642   /* loop over local rows, determining number of off diagonal entries */
2643   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2644   odlens   = dlens + (rend-rstart);
2645   ierr     = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr);
2646   ierr     = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2647   masked1  = mask    + Mbs;
2648   masked2  = masked1 + Mbs;
2649   rowcount = 0; nzcount = 0;
2650   for (i=0; i<mbs; i++) {
2651     dcount  = 0;
2652     odcount = 0;
2653     for (j=0; j<bs; j++) {
2654       kmax = locrowlens[rowcount];
2655       for (k=0; k<kmax; k++) {
2656         tmp = mycols[nzcount++]/bs;
2657         if (!mask[tmp]) {
2658           mask[tmp] = 1;
2659           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2660           else masked1[dcount++] = tmp;
2661         }
2662       }
2663       rowcount++;
2664     }
2665 
2666     dlens[i]  = dcount;
2667     odlens[i] = odcount;
2668 
2669     /* zero out the mask elements we set */
2670     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2671     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2672   }
2673 
2674   /* create our matrix */
2675   ierr = MatCreate(comm,m,m,M+extra_rows,N+extra_rows,&A);CHKERRQ(ierr);
2676   ierr = MatSetType(A,type);CHKERRQ(ierr)
2677   ierr = MatMPIBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2678 
2679   /* Why doesn't this called using ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr); */
2680   MatSetOption(A,MAT_COLUMNS_SORTED);
2681 
2682   if (!rank) {
2683     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2684     /* read in my part of the matrix numerical values  */
2685     nz = procsnz[0];
2686     vals = buf;
2687     mycols = ibuf;
2688     if (size == 1)  nz -= extra_rows;
2689     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2690     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2691 
2692     /* insert into matrix */
2693     jj      = rstart*bs;
2694     for (i=0; i<m; i++) {
2695       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2696       mycols += locrowlens[i];
2697       vals   += locrowlens[i];
2698       jj++;
2699     }
2700     /* read in other processors (except the last one) and ship out */
2701     for (i=1; i<size-1; i++) {
2702       nz   = procsnz[i];
2703       vals = buf;
2704       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2705       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2706     }
2707     /* the last proc */
2708     if (size != 1){
2709       nz   = procsnz[i] - extra_rows;
2710       vals = buf;
2711       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2712       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2713       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2714     }
2715     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2716   } else {
2717     /* receive numeric values */
2718     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2719 
2720     /* receive message of values*/
2721     vals   = buf;
2722     mycols = ibuf;
2723     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2724     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2725     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2726 
2727     /* insert into matrix */
2728     jj      = rstart*bs;
2729     for (i=0; i<m; i++) {
2730       ierr    = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2731       mycols += locrowlens[i];
2732       vals   += locrowlens[i];
2733       jj++;
2734     }
2735   }
2736   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2737   ierr = PetscFree(buf);CHKERRQ(ierr);
2738   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2739   ierr = PetscFree(rowners);CHKERRQ(ierr);
2740   ierr = PetscFree(dlens);CHKERRQ(ierr);
2741   ierr = PetscFree(mask);CHKERRQ(ierr);
2742   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2743   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2744 
2745   *newmat = A;
2746   PetscFunctionReturn(0);
2747 }
2748 
2749 #undef __FUNCT__
2750 #define __FUNCT__ "MatMPIBAIJSetHashTableFactor"
2751 /*@
2752    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2753 
2754    Input Parameters:
2755 .  mat  - the matrix
2756 .  fact - factor
2757 
2758    Collective on Mat
2759 
2760    Level: advanced
2761 
2762   Notes:
2763    This can also be set by the command line option: -mat_use_hash_table fact
2764 
2765 .keywords: matrix, hashtable, factor, HT
2766 
2767 .seealso: MatSetOption()
2768 @*/
2769 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2770 {
2771   PetscErrorCode ierr,(*f)(Mat,PetscReal);
2772 
2773   PetscFunctionBegin;
2774   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSetHashTableFactor_C",(void (**)(void))&f);CHKERRQ(ierr);
2775   if (f) {
2776     ierr = (*f)(mat,fact);CHKERRQ(ierr);
2777   }
2778   PetscFunctionReturn(0);
2779 }
2780 
2781 #undef __FUNCT__
2782 #define __FUNCT__ "MatSetHashTableFactor_MPIBAIJ"
2783 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat,PetscReal fact)
2784 {
2785   Mat_MPIBAIJ *baij;
2786 
2787   PetscFunctionBegin;
2788   baij = (Mat_MPIBAIJ*)mat->data;
2789   baij->ht_fact = fact;
2790   PetscFunctionReturn(0);
2791 }
2792 
2793 #undef __FUNCT__
2794 #define __FUNCT__ "MatMPIBAIJGetSeqBAIJ"
2795 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A,Mat *Ad,Mat *Ao,int *colmap[])
2796 {
2797   Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data;
2798   PetscFunctionBegin;
2799   *Ad     = a->A;
2800   *Ao     = a->B;
2801   *colmap = a->garray;
2802   PetscFunctionReturn(0);
2803 }
2804