xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision 95d5f7c29374efcfd2ca44c2fe93981fbc2b4454)
1 /*$Id: mpibaij.c,v 1.191 2000/04/12 01:47:05 bsmith Exp bsmith $*/
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "mat.h"  I*/
4 #include "src/vec/vecimpl.h"
5 
6 extern int MatSetUpMultiply_MPIBAIJ(Mat);
7 extern int DisAssemble_MPIBAIJ(Mat);
8 extern int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
9 extern int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
10 extern int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *);
11 extern int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode);
12 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
13 extern int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
14 extern int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
15 extern int MatPrintHelp_SeqBAIJ(Mat);
16 extern int MatZeroRows_SeqAIJ(Mat,IS,Scalar*);
17 
18 /*  UGLY, ugly, ugly
19    When MatScalar == Scalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
20    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
21    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
22    converts the entries into single precision and then calls ..._MatScalar() to put them
23    into the single precision data structures.
24 */
25 #if defined(PETSC_USE_MAT_SINGLE)
26 extern int MatSetValues_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
27 extern int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
28 extern int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
29 #else
30 #define MatSetValues_MPIBAIJ_MatScalar             MatSetValues_MPIBAIJ
31 #define MatSetValuesBlocked_MPIBAIJ_MatScalar      MatSetValuesBlocked_MPIBAIJ
32 #define MatSetValuesBlocked_SeqBAIJ_MatScalar      MatSetValuesBlocked_SeqBAIJ
33 #define MatStashValuesRow_Private_MatScalar        MatStashValuesRow_Private
34 #define MatStashValuesCol_Private_MatScalar        MatStashValuesCol_Private
35 #define MatStashValuesRowBlocked_Private_MatScalar MatStashValuesRowBlocked_Private
36 #define MatStashValuesColBlocked_Private_MatScalar MatStashValuesColBlocked_Private
37 #endif
38 
39 EXTERN_C_BEGIN
40 #undef __FUNC__
41 #define __FUNC__ /*<a name=""></a>*/"MatStoreValues_MPIBAIJ"
42 int MatStoreValues_MPIBAIJ(Mat mat)
43 {
44   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
45   int         ierr;
46 
47   PetscFunctionBegin;
48   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
49   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
50   PetscFunctionReturn(0);
51 }
52 EXTERN_C_END
53 
54 EXTERN_C_BEGIN
55 #undef __FUNC__
56 #define __FUNC__ /*<a name=""></a>*/"MatRetrieveValues_MPIBAIJ"
57 int MatRetrieveValues_MPIBAIJ(Mat mat)
58 {
59   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
60   int         ierr;
61 
62   PetscFunctionBegin;
63   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
64   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
65   PetscFunctionReturn(0);
66 }
67 EXTERN_C_END
68 
69 /*
70      Local utility routine that creates a mapping from the global column
71    number to the local number in the off-diagonal part of the local
72    storage of the matrix.  This is done in a non scable way since the
73    length of colmap equals the global matrix length.
74 */
75 #undef __FUNC__
76 #define __FUNC__ /*<a name=""></a>*/"CreateColmap_MPIBAIJ_Private"
77 static int CreateColmap_MPIBAIJ_Private(Mat mat)
78 {
79   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
80   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data;
81   int         nbs = B->nbs,i,bs=B->bs,ierr;
82 
83   PetscFunctionBegin;
84 #if defined (PETSC_USE_CTABLE)
85   ierr = PetscTableCreate(baij->nbs/5,&baij->colmap);CHKERRQ(ierr);
86   for (i=0; i<nbs; i++){
87     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
88   }
89 #else
90   baij->colmap = (int*)PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
91   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
92   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr);
93   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
94 #endif
95   PetscFunctionReturn(0);
96 }
97 
98 #define CHUNKSIZE  10
99 
100 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
101 { \
102  \
103     brow = row/bs;  \
104     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
105     rmax = aimax[brow]; nrow = ailen[brow]; \
106       bcol = col/bs; \
107       ridx = row % bs; cidx = col % bs; \
108       low = 0; high = nrow; \
109       while (high-low > 3) { \
110         t = (low+high)/2; \
111         if (rp[t] > bcol) high = t; \
112         else              low  = t; \
113       } \
114       for (_i=low; _i<high; _i++) { \
115         if (rp[_i] > bcol) break; \
116         if (rp[_i] == bcol) { \
117           bap  = ap +  bs2*_i + bs*cidx + ridx; \
118           if (addv == ADD_VALUES) *bap += value;  \
119           else                    *bap  = value;  \
120           goto a_noinsert; \
121         } \
122       } \
123       if (a->nonew == 1) goto a_noinsert; \
124       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
125       if (nrow >= rmax) { \
126         /* there is no extra room in row, therefore enlarge */ \
127         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
128         MatScalar *new_a; \
129  \
130         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
131  \
132         /* malloc new storage space */ \
133         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
134         new_a   = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \
135         new_j   = (int*)(new_a + bs2*new_nz); \
136         new_i   = new_j + new_nz; \
137  \
138         /* copy over old data into new slots */ \
139         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
140         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
141         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
142         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
143         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
144         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
145         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \
146         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
147                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
148         /* free up old matrix storage */ \
149         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
150         if (!a->singlemalloc) { \
151           ierr = PetscFree(a->i);CHKERRQ(ierr); \
152           ierr = PetscFree(a->j);CHKERRQ(ierr);\
153         } \
154         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
155         a->singlemalloc = PETSC_TRUE; \
156  \
157         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
158         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
159         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
160         a->maxnz += bs2*CHUNKSIZE; \
161         a->reallocs++; \
162         a->nz++; \
163       } \
164       N = nrow++ - 1;  \
165       /* shift up all the later entries in this row */ \
166       for (ii=N; ii>=_i; ii--) { \
167         rp[ii+1] = rp[ii]; \
168         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
169       } \
170       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
171       rp[_i]                      = bcol;  \
172       ap[bs2*_i + bs*cidx + ridx] = value;  \
173       a_noinsert:; \
174     ailen[brow] = nrow; \
175 }
176 
177 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
178 { \
179     brow = row/bs;  \
180     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
181     rmax = bimax[brow]; nrow = bilen[brow]; \
182       bcol = col/bs; \
183       ridx = row % bs; cidx = col % bs; \
184       low = 0; high = nrow; \
185       while (high-low > 3) { \
186         t = (low+high)/2; \
187         if (rp[t] > bcol) high = t; \
188         else              low  = t; \
189       } \
190       for (_i=low; _i<high; _i++) { \
191         if (rp[_i] > bcol) break; \
192         if (rp[_i] == bcol) { \
193           bap  = ap +  bs2*_i + bs*cidx + ridx; \
194           if (addv == ADD_VALUES) *bap += value;  \
195           else                    *bap  = value;  \
196           goto b_noinsert; \
197         } \
198       } \
199       if (b->nonew == 1) goto b_noinsert; \
200       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
201       if (nrow >= rmax) { \
202         /* there is no extra room in row, therefore enlarge */ \
203         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
204         MatScalar *new_a; \
205  \
206         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
207  \
208         /* malloc new storage space */ \
209         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
210         new_a   = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \
211         new_j   = (int*)(new_a + bs2*new_nz); \
212         new_i   = new_j + new_nz; \
213  \
214         /* copy over old data into new slots */ \
215         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
216         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
217         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
218         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
219         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
220         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
221         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
222         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
223                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
224         /* free up old matrix storage */ \
225         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
226         if (!b->singlemalloc) { \
227           ierr = PetscFree(b->i);CHKERRQ(ierr); \
228           ierr = PetscFree(b->j);CHKERRQ(ierr); \
229         } \
230         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
231         b->singlemalloc = PETSC_TRUE; \
232  \
233         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
234         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
235         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
236         b->maxnz += bs2*CHUNKSIZE; \
237         b->reallocs++; \
238         b->nz++; \
239       } \
240       N = nrow++ - 1;  \
241       /* shift up all the later entries in this row */ \
242       for (ii=N; ii>=_i; ii--) { \
243         rp[ii+1] = rp[ii]; \
244         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
245       } \
246       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
247       rp[_i]                      = bcol;  \
248       ap[bs2*_i + bs*cidx + ridx] = value;  \
249       b_noinsert:; \
250     bilen[brow] = nrow; \
251 }
252 
253 #if defined(PETSC_USE_MAT_SINGLE)
254 #undef __FUNC__
255 #define __FUNC__ /*<a name=""></a>*/"MatSetValues_MPIBAIJ"
256 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
257 {
258   int       ierr,i,N = m*n;
259   MatScalar *vsingle = (MatScalar*)v;
260 
261   PetscFunctionBegin;
262   for (i=0; i<N; i++) {
263     vsingle[i] = v[i];
264   }
265   ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
266   PetscFunctionReturn(0);
267 }
268 
269 #undef __FUNC__
270 #define __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPIBAIJ"
271 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
272 {
273   int       ierr,i,N = m*n*((Mat_MPIBAIJ*)mat->data)->bs2;
274   MatScalar *vsingle = (MatScalar*)v;
275 
276   PetscFunctionBegin;
277   for (i=0; i<N; i++) {
278     vsingle[i] = v[i];
279   }
280   ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
281   PetscFunctionReturn(0);
282 }
283 #endif
284 
285 #undef __FUNC__
286 #define __FUNC__ /*<a name=""></a>*/"MatSetValues_MPIBAIJ"
287 int MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
288 {
289   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
290   MatScalar   value;
291   int         ierr,i,j,row,col;
292   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
293   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
294   int         cend_orig=baij->cend_bs,bs=baij->bs;
295 
296   /* Some Variables required in the macro */
297   Mat         A = baij->A;
298   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data;
299   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
300   MatScalar   *aa=a->a;
301 
302   Mat         B = baij->B;
303   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data;
304   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
305   MatScalar   *ba=b->a;
306 
307   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
308   int         low,high,t,ridx,cidx,bs2=a->bs2;
309   MatScalar   *ap,*bap;
310 
311   PetscFunctionBegin;
312   for (i=0; i<m; i++) {
313     if (im[i] < 0) continue;
314 #if defined(PETSC_USE_BOPT_g)
315     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
316 #endif
317     if (im[i] >= rstart_orig && im[i] < rend_orig) {
318       row = im[i] - rstart_orig;
319       for (j=0; j<n; j++) {
320         if (in[j] >= cstart_orig && in[j] < cend_orig){
321           col = in[j] - cstart_orig;
322           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
323           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
324           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
325         } else if (in[j] < 0) continue;
326 #if defined(PETSC_USE_BOPT_g)
327         else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");}
328 #endif
329         else {
330           if (mat->was_assembled) {
331             if (!baij->colmap) {
332               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
333             }
334 #if defined (PETSC_USE_CTABLE)
335             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
336             col  = col - 1 + in[j]%bs;
337 #else
338             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
339 #endif
340             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
341               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
342               col =  in[j];
343               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
344               B = baij->B;
345               b = (Mat_SeqBAIJ*)(B)->data;
346               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
347               ba=b->a;
348             }
349           } else col = in[j];
350           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
351           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
352           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
353         }
354       }
355     } else {
356       if (!baij->donotstash) {
357         if (roworiented) {
358           ierr = MatStashValuesRow_Private_MatScalar(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
359         } else {
360           ierr = MatStashValuesCol_Private_MatScalar(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
361         }
362       }
363     }
364   }
365   PetscFunctionReturn(0);
366 }
367 
368 #undef __FUNC__
369 #define __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPIBAIJ"
370 int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
371 {
372   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
373   MatScalar   *value,*barray=baij->barray;
374   int         ierr,i,j,ii,jj,row,col;
375   int         roworiented = baij->roworiented,rstart=baij->rstart ;
376   int         rend=baij->rend,cstart=baij->cstart,stepval;
377   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
378 
379   PetscFunctionBegin;
380   if(!barray) {
381     baij->barray = barray = (MatScalar*)PetscMalloc(bs2*sizeof(MatScalar));CHKPTRQ(barray);
382   }
383 
384   if (roworiented) {
385     stepval = (n-1)*bs;
386   } else {
387     stepval = (m-1)*bs;
388   }
389   for (i=0; i<m; i++) {
390     if (im[i] < 0) continue;
391 #if defined(PETSC_USE_BOPT_g)
392     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs);
393 #endif
394     if (im[i] >= rstart && im[i] < rend) {
395       row = im[i] - rstart;
396       for (j=0; j<n; j++) {
397         /* If NumCol = 1 then a copy is not required */
398         if ((roworiented) && (n == 1)) {
399           barray = v + i*bs2;
400         } else if((!roworiented) && (m == 1)) {
401           barray = v + j*bs2;
402         } else { /* Here a copy is required */
403           if (roworiented) {
404             value = v + i*(stepval+bs)*bs + j*bs;
405           } else {
406             value = v + j*(stepval+bs)*bs + i*bs;
407           }
408           for (ii=0; ii<bs; ii++,value+=stepval) {
409             for (jj=0; jj<bs; jj++) {
410               *barray++  = *value++;
411             }
412           }
413           barray -=bs2;
414         }
415 
416         if (in[j] >= cstart && in[j] < cend){
417           col  = in[j] - cstart;
418           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
419         }
420         else if (in[j] < 0) continue;
421 #if defined(PETSC_USE_BOPT_g)
422         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);}
423 #endif
424         else {
425           if (mat->was_assembled) {
426             if (!baij->colmap) {
427               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
428             }
429 
430 #if defined(PETSC_USE_BOPT_g)
431 #if defined (PETSC_USE_CTABLE)
432             { int data;
433               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
434               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");
435             }
436 #else
437             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");
438 #endif
439 #endif
440 #if defined (PETSC_USE_CTABLE)
441 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
442             col  = (col - 1)/bs;
443 #else
444             col = (baij->colmap[in[j]] - 1)/bs;
445 #endif
446             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
447               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
448               col =  in[j];
449             }
450           }
451           else col = in[j];
452           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
453         }
454       }
455     } else {
456       if (!baij->donotstash) {
457         if (roworiented) {
458           ierr = MatStashValuesRowBlocked_Private_MatScalar(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
459         } else {
460           ierr = MatStashValuesColBlocked_Private_MatScalar(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
461         }
462       }
463     }
464   }
465   PetscFunctionReturn(0);
466 }
467 #define HASH_KEY 0.6180339887
468 /* #define HASH1(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
469 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
470 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
471 #undef __FUNC__
472 #define __FUNC__ /*<a name=""></a>*/"MatSetValues_MPIBAIJ_HT"
473 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
474 {
475   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
476   int         ierr,i,j,row,col;
477   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
478   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
479   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
480   PetscReal   tmp;
481   MatScalar   ** HD = baij->hd,value;
482 #if defined(PETSC_USE_BOPT_g)
483   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
484 #endif
485 
486   PetscFunctionBegin;
487 
488   for (i=0; i<m; i++) {
489 #if defined(PETSC_USE_BOPT_g)
490     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
491     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
492 #endif
493       row = im[i];
494     if (row >= rstart_orig && row < rend_orig) {
495       for (j=0; j<n; j++) {
496         col = in[j];
497         if (roworiented) value = (MatScalar)v[i*n+j]; else value = (MatScalar)v[i+j*m];
498         /* Look up into the Hash Table */
499         key = (row/bs)*Nbs+(col/bs)+1;
500         h1  = HASH(size,key,tmp);
501 
502 
503         idx = h1;
504 #if defined(PETSC_USE_BOPT_g)
505         insert_ct++;
506         total_ct++;
507         if (HT[idx] != key) {
508           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
509           if (idx == size) {
510             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
511             if (idx == h1) {
512               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
513             }
514           }
515         }
516 #else
517         if (HT[idx] != key) {
518           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
519           if (idx == size) {
520             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
521             if (idx == h1) {
522               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
523             }
524           }
525         }
526 #endif
527         /* A HASH table entry is found, so insert the values at the correct address */
528         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
529         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
530       }
531     } else {
532       if (!baij->donotstash) {
533         if (roworiented) {
534           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
535         } else {
536           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
537         }
538       }
539     }
540   }
541 #if defined(PETSC_USE_BOPT_g)
542   baij->ht_total_ct = total_ct;
543   baij->ht_insert_ct = insert_ct;
544 #endif
545   PetscFunctionReturn(0);
546 }
547 
548 #undef __FUNC__
549 #define __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPIBAIJ_HT"
550 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
551 {
552   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
553   int         ierr,i,j,ii,jj,row,col;
554   int         roworiented = baij->roworiented,rstart=baij->rstart ;
555   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
556   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
557   PetscReal   tmp;
558   MatScalar   **HD = baij->hd,*baij_a;
559   Scalar      *v_t,*value;
560 #if defined(PETSC_USE_BOPT_g)
561   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
562 #endif
563 
564   PetscFunctionBegin;
565 
566   if (roworiented) {
567     stepval = (n-1)*bs;
568   } else {
569     stepval = (m-1)*bs;
570   }
571   for (i=0; i<m; i++) {
572 #if defined(PETSC_USE_BOPT_g)
573     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
574     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
575 #endif
576     row   = im[i];
577     v_t   = v + i*bs2;
578     if (row >= rstart && row < rend) {
579       for (j=0; j<n; j++) {
580         col = in[j];
581 
582         /* Look up into the Hash Table */
583         key = row*Nbs+col+1;
584         h1  = HASH(size,key,tmp);
585 
586         idx = h1;
587 #if defined(PETSC_USE_BOPT_g)
588         total_ct++;
589         insert_ct++;
590        if (HT[idx] != key) {
591           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
592           if (idx == size) {
593             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
594             if (idx == h1) {
595               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
596             }
597           }
598         }
599 #else
600         if (HT[idx] != key) {
601           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
602           if (idx == size) {
603             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
604             if (idx == h1) {
605               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
606             }
607           }
608         }
609 #endif
610         baij_a = HD[idx];
611         if (roworiented) {
612           /*value = v + i*(stepval+bs)*bs + j*bs;*/
613           /* value = v + (i*(stepval+bs)+j)*bs; */
614           value = v_t;
615           v_t  += bs;
616           if (addv == ADD_VALUES) {
617             for (ii=0; ii<bs; ii++,value+=stepval) {
618               for (jj=ii; jj<bs2; jj+=bs) {
619                 baij_a[jj]  += *value++;
620               }
621             }
622           } else {
623             for (ii=0; ii<bs; ii++,value+=stepval) {
624               for (jj=ii; jj<bs2; jj+=bs) {
625                 baij_a[jj]  = *value++;
626               }
627             }
628           }
629         } else {
630           value = v + j*(stepval+bs)*bs + i*bs;
631           if (addv == ADD_VALUES) {
632             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
633               for (jj=0; jj<bs; jj++) {
634                 baij_a[jj]  += *value++;
635               }
636             }
637           } else {
638             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
639               for (jj=0; jj<bs; jj++) {
640                 baij_a[jj]  = *value++;
641               }
642             }
643           }
644         }
645       }
646     } else {
647       if (!baij->donotstash) {
648         if (roworiented) {
649           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
650         } else {
651           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
652         }
653       }
654     }
655   }
656 #if defined(PETSC_USE_BOPT_g)
657   baij->ht_total_ct = total_ct;
658   baij->ht_insert_ct = insert_ct;
659 #endif
660   PetscFunctionReturn(0);
661 }
662 
663 #undef __FUNC__
664 #define __FUNC__ /*<a name=""></a>*/"MatGetValues_MPIBAIJ"
665 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
666 {
667   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
668   int        bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
669   int        bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
670 
671   PetscFunctionBegin;
672   for (i=0; i<m; i++) {
673     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
674     if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
675     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
676       row = idxm[i] - bsrstart;
677       for (j=0; j<n; j++) {
678         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
679         if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
680         if (idxn[j] >= bscstart && idxn[j] < bscend){
681           col = idxn[j] - bscstart;
682           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
683         } else {
684           if (!baij->colmap) {
685             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
686           }
687 #if defined (PETSC_USE_CTABLE)
688           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
689           data --;
690 #else
691           data = baij->colmap[idxn[j]/bs]-1;
692 #endif
693           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
694           else {
695             col  = data + idxn[j]%bs;
696             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
697           }
698         }
699       }
700     } else {
701       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
702     }
703   }
704  PetscFunctionReturn(0);
705 }
706 
707 #undef __FUNC__
708 #define __FUNC__ /*<a name=""></a>*/"MatNorm_MPIBAIJ"
709 int MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *norm)
710 {
711   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
712   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
713   int        ierr,i,bs2=baij->bs2;
714   PetscReal  sum = 0.0;
715   MatScalar  *v;
716 
717   PetscFunctionBegin;
718   if (baij->size == 1) {
719     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
720   } else {
721     if (type == NORM_FROBENIUS) {
722       v = amat->a;
723       for (i=0; i<amat->nz*bs2; i++) {
724 #if defined(PETSC_USE_COMPLEX)
725         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
726 #else
727         sum += (*v)*(*v); v++;
728 #endif
729       }
730       v = bmat->a;
731       for (i=0; i<bmat->nz*bs2; i++) {
732 #if defined(PETSC_USE_COMPLEX)
733         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
734 #else
735         sum += (*v)*(*v); v++;
736 #endif
737       }
738       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
739       *norm = sqrt(*norm);
740     } else {
741       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
742     }
743   }
744   PetscFunctionReturn(0);
745 }
746 
747 
748 /*
749   Creates the hash table, and sets the table
750   This table is created only once.
751   If new entried need to be added to the matrix
752   then the hash table has to be destroyed and
753   recreated.
754 */
755 #undef __FUNC__
756 #define __FUNC__ /*<a name=""></a>*/"MatCreateHashTable_MPIBAIJ_Private"
757 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
758 {
759   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
760   Mat         A = baij->A,B=baij->B;
761   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
762   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
763   int         size,bs2=baij->bs2,rstart=baij->rstart,ierr;
764   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
765   int         *HT,key;
766   MatScalar   **HD;
767   PetscReal   tmp;
768 #if defined(PETSC_USE_BOPT_g)
769   int         ct=0,max=0;
770 #endif
771 
772   PetscFunctionBegin;
773   baij->ht_size=(int)(factor*nz);
774   size = baij->ht_size;
775 
776   if (baij->ht) {
777     PetscFunctionReturn(0);
778   }
779 
780   /* Allocate Memory for Hash Table */
781   baij->hd = (MatScalar**)PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1);CHKPTRQ(baij->hd);
782   baij->ht = (int*)(baij->hd + size);
783   HD = baij->hd;
784   HT = baij->ht;
785 
786 
787   ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr);
788 
789 
790   /* Loop Over A */
791   for (i=0; i<a->mbs; i++) {
792     for (j=ai[i]; j<ai[i+1]; j++) {
793       row = i+rstart;
794       col = aj[j]+cstart;
795 
796       key = row*Nbs + col + 1;
797       h1  = HASH(size,key,tmp);
798       for (k=0; k<size; k++){
799         if (HT[(h1+k)%size] == 0.0) {
800           HT[(h1+k)%size] = key;
801           HD[(h1+k)%size] = a->a + j*bs2;
802           break;
803 #if defined(PETSC_USE_BOPT_g)
804         } else {
805           ct++;
806 #endif
807         }
808       }
809 #if defined(PETSC_USE_BOPT_g)
810       if (k> max) max = k;
811 #endif
812     }
813   }
814   /* Loop Over B */
815   for (i=0; i<b->mbs; i++) {
816     for (j=bi[i]; j<bi[i+1]; j++) {
817       row = i+rstart;
818       col = garray[bj[j]];
819       key = row*Nbs + col + 1;
820       h1  = HASH(size,key,tmp);
821       for (k=0; k<size; k++){
822         if (HT[(h1+k)%size] == 0.0) {
823           HT[(h1+k)%size] = key;
824           HD[(h1+k)%size] = b->a + j*bs2;
825           break;
826 #if defined(PETSC_USE_BOPT_g)
827         } else {
828           ct++;
829 #endif
830         }
831       }
832 #if defined(PETSC_USE_BOPT_g)
833       if (k> max) max = k;
834 #endif
835     }
836   }
837 
838   /* Print Summary */
839 #if defined(PETSC_USE_BOPT_g)
840   for (i=0,j=0; i<size; i++) {
841     if (HT[i]) {j++;}
842   }
843   PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max);
844 #endif
845   PetscFunctionReturn(0);
846 }
847 
848 #undef __FUNC__
849 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyBegin_MPIBAIJ"
850 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
851 {
852   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
853   int         ierr,nstash,reallocs;
854   InsertMode  addv;
855 
856   PetscFunctionBegin;
857   if (baij->donotstash) {
858     PetscFunctionReturn(0);
859   }
860 
861   /* make sure all processors are either in INSERTMODE or ADDMODE */
862   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
863   if (addv == (ADD_VALUES|INSERT_VALUES)) {
864     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
865   }
866   mat->insertmode = addv; /* in case this processor had no cache */
867 
868   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
869   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
870   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
871   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
872   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
873   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
874   PetscFunctionReturn(0);
875 }
876 
877 #undef __FUNC__
878 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyEnd_MPIBAIJ"
879 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
880 {
881   Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data;
882   Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
883   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
884   int         *row,*col,other_disassembled;
885   PetscTruth  r1,r2,r3;
886   MatScalar   *val;
887   InsertMode  addv = mat->insertmode;
888 
889   PetscFunctionBegin;
890   if (!baij->donotstash) {
891     while (1) {
892       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
893       if (!flg) break;
894 
895       for (i=0; i<n;) {
896         /* Now identify the consecutive vals belonging to the same row */
897         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
898         if (j < n) ncols = j-i;
899         else       ncols = n-i;
900         /* Now assemble all these values with a single function call */
901         ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
902         i = j;
903       }
904     }
905     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
906     /* Now process the block-stash. Since the values are stashed column-oriented,
907        set the roworiented flag to column oriented, and after MatSetValues()
908        restore the original flags */
909     r1 = baij->roworiented;
910     r2 = a->roworiented;
911     r3 = b->roworiented;
912     baij->roworiented = PETSC_FALSE;
913     a->roworiented    = PETSC_FALSE;
914     b->roworiented    = PETSC_FALSE;
915     while (1) {
916       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
917       if (!flg) break;
918 
919       for (i=0; i<n;) {
920         /* Now identify the consecutive vals belonging to the same row */
921         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
922         if (j < n) ncols = j-i;
923         else       ncols = n-i;
924         ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
925         i = j;
926       }
927     }
928     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
929     baij->roworiented = r1;
930     a->roworiented    = r2;
931     b->roworiented    = r3;
932   }
933 
934   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
935   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
936 
937   /* determine if any processor has disassembled, if so we must
938      also disassemble ourselfs, in order that we may reassemble. */
939   /*
940      if nonzero structure of submatrix B cannot change then we know that
941      no processor disassembled thus we can skip this stuff
942   */
943   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
944     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
945     if (mat->was_assembled && !other_disassembled) {
946       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
947     }
948   }
949 
950   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
951     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
952   }
953   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
954   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
955 
956 #if defined(PETSC_USE_BOPT_g)
957   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
958     PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((double)baij->ht_total_ct)/baij->ht_insert_ct);
959     baij->ht_total_ct  = 0;
960     baij->ht_insert_ct = 0;
961   }
962 #endif
963   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
964     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
965     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
966     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
967   }
968 
969   if (baij->rowvalues) {
970     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
971     baij->rowvalues = 0;
972   }
973   PetscFunctionReturn(0);
974 }
975 
976 #undef __FUNC__
977 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIBAIJ_ASCIIorDraworSocket"
978 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
979 {
980   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ*)mat->data;
981   int          ierr,format,bs = baij->bs,size = baij->size,rank = baij->rank;
982   PetscTruth   isascii,isdraw;
983 
984   PetscFunctionBegin;
985   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
986   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
987   if (isascii) {
988     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
989     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
990       MatInfo info;
991       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
992       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
993       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
994               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
995               baij->bs,(int)info.memory);CHKERRQ(ierr);
996       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
997       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
998       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
999       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1000       ierr = ViewerFlush(viewer);CHKERRQ(ierr);
1001       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1002       PetscFunctionReturn(0);
1003     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
1004       ierr = ViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
1005       PetscFunctionReturn(0);
1006     }
1007   }
1008 
1009   if (isdraw) {
1010     Draw       draw;
1011     PetscTruth isnull;
1012     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1013     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1014   }
1015 
1016   if (size == 1) {
1017     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1018   } else {
1019     /* assemble the entire matrix onto first processor. */
1020     Mat         A;
1021     Mat_SeqBAIJ *Aloc;
1022     int         M = baij->M,N = baij->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1023     MatScalar   *a;
1024 
1025     if (!rank) {
1026       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1027     } else {
1028       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1029     }
1030     PLogObjectParent(mat,A);
1031 
1032     /* copy over the A part */
1033     Aloc  = (Mat_SeqBAIJ*)baij->A->data;
1034     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
1035     rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
1036 
1037     for (i=0; i<mbs; i++) {
1038       rvals[0] = bs*(baij->rstart + i);
1039       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1040       for (j=ai[i]; j<ai[i+1]; j++) {
1041         col = (baij->cstart+aj[j])*bs;
1042         for (k=0; k<bs; k++) {
1043           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1044           col++; a += bs;
1045         }
1046       }
1047     }
1048     /* copy over the B part */
1049     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1050     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1051     for (i=0; i<mbs; i++) {
1052       rvals[0] = bs*(baij->rstart + i);
1053       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1054       for (j=ai[i]; j<ai[i+1]; j++) {
1055         col = baij->garray[aj[j]]*bs;
1056         for (k=0; k<bs; k++) {
1057           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1058           col++; a += bs;
1059         }
1060       }
1061     }
1062     ierr = PetscFree(rvals);CHKERRQ(ierr);
1063     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1064     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1065     /*
1066        Everyone has to call to draw the matrix since the graphics waits are
1067        synchronized across all processors that share the Draw object
1068     */
1069     if (!rank) {
1070       Viewer sviewer;
1071       ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1072       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1073       ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1074     }
1075     ierr = MatDestroy(A);CHKERRQ(ierr);
1076   }
1077   PetscFunctionReturn(0);
1078 }
1079 
1080 #undef __FUNC__
1081 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIBAIJ"
1082 int MatView_MPIBAIJ(Mat mat,Viewer viewer)
1083 {
1084   int        ierr;
1085   PetscTruth isascii,isdraw,issocket,isbinary;
1086 
1087   PetscFunctionBegin;
1088   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
1089   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
1090   ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr);
1091   ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
1092   if (isascii || isdraw || issocket || isbinary) {
1093     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1094   } else {
1095     SETERRQ1(1,1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1096   }
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNC__
1101 #define __FUNC__ /*<a name=""></a>*/"MatDestroy_MPIBAIJ"
1102 int MatDestroy_MPIBAIJ(Mat mat)
1103 {
1104   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1105   int         ierr;
1106 
1107   PetscFunctionBegin;
1108 
1109   if (mat->mapping) {
1110     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
1111   }
1112   if (mat->bmapping) {
1113     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
1114   }
1115   if (mat->rmap) {
1116     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
1117   }
1118   if (mat->cmap) {
1119     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
1120   }
1121 #if defined(PETSC_USE_LOG)
1122   PLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",baij->M,baij->N);
1123 #endif
1124 
1125   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1126   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1127 
1128   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1129   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1130   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1131 #if defined (PETSC_USE_CTABLE)
1132   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1133 #else
1134   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1135 #endif
1136   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1137   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1138   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1139   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1140   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1141   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
1142   ierr = PetscFree(baij);CHKERRQ(ierr);
1143   PLogObjectDestroy(mat);
1144   PetscHeaderDestroy(mat);
1145   PetscFunctionReturn(0);
1146 }
1147 
1148 #undef __FUNC__
1149 #define __FUNC__ /*<a name=""></a>*/"MatMult_MPIBAIJ"
1150 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1151 {
1152   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1153   int         ierr,nt;
1154 
1155   PetscFunctionBegin;
1156   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1157   if (nt != a->n) {
1158     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
1159   }
1160   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1161   if (nt != a->m) {
1162     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy");
1163   }
1164   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1165   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1166   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1167   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1168   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1169   PetscFunctionReturn(0);
1170 }
1171 
1172 #undef __FUNC__
1173 #define __FUNC__ /*<a name=""></a>*/"MatMultAdd_MPIBAIJ"
1174 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1175 {
1176   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1177   int        ierr;
1178 
1179   PetscFunctionBegin;
1180   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1181   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1182   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1183   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1184   PetscFunctionReturn(0);
1185 }
1186 
1187 #undef __FUNC__
1188 #define __FUNC__ /*<a name=""></a>*/"MatMultTranspose_MPIBAIJ"
1189 int MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1190 {
1191   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1192   int         ierr;
1193 
1194   PetscFunctionBegin;
1195   /* do nondiagonal part */
1196   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1197   /* send it on its way */
1198   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1199   /* do local part */
1200   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1201   /* receive remote parts: note this assumes the values are not actually */
1202   /* inserted in yy until the next line, which is true for my implementation*/
1203   /* but is not perhaps always true. */
1204   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1205   PetscFunctionReturn(0);
1206 }
1207 
1208 #undef __FUNC__
1209 #define __FUNC__ /*<a name=""></a>*/"MatMultTransposeAdd_MPIBAIJ"
1210 int MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1211 {
1212   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1213   int         ierr;
1214 
1215   PetscFunctionBegin;
1216   /* do nondiagonal part */
1217   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1218   /* send it on its way */
1219   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1220   /* do local part */
1221   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1222   /* receive remote parts: note this assumes the values are not actually */
1223   /* inserted in yy until the next line, which is true for my implementation*/
1224   /* but is not perhaps always true. */
1225   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1226   PetscFunctionReturn(0);
1227 }
1228 
1229 /*
1230   This only works correctly for square matrices where the subblock A->A is the
1231    diagonal block
1232 */
1233 #undef __FUNC__
1234 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonal_MPIBAIJ"
1235 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1236 {
1237   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1238   int         ierr;
1239 
1240   PetscFunctionBegin;
1241   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
1242   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1243   PetscFunctionReturn(0);
1244 }
1245 
1246 #undef __FUNC__
1247 #define __FUNC__ /*<a name=""></a>*/"MatScale_MPIBAIJ"
1248 int MatScale_MPIBAIJ(Scalar *aa,Mat A)
1249 {
1250   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1251   int         ierr;
1252 
1253   PetscFunctionBegin;
1254   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1255   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1256   PetscFunctionReturn(0);
1257 }
1258 
1259 #undef __FUNC__
1260 #define __FUNC__ /*<a name=""></a>*/"MatGetSize_MPIBAIJ"
1261 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
1262 {
1263   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1264 
1265   PetscFunctionBegin;
1266   if (m) *m = mat->M;
1267   if (n) *n = mat->N;
1268   PetscFunctionReturn(0);
1269 }
1270 
1271 #undef __FUNC__
1272 #define __FUNC__ /*<a name=""></a>*/"MatGetLocalSize_MPIBAIJ"
1273 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
1274 {
1275   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1276 
1277   PetscFunctionBegin;
1278   *m = mat->m; *n = mat->n;
1279   PetscFunctionReturn(0);
1280 }
1281 
1282 #undef __FUNC__
1283 #define __FUNC__ /*<a name=""></a>*/"MatGetOwnershipRange_MPIBAIJ"
1284 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
1285 {
1286   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1287 
1288   PetscFunctionBegin;
1289   if (m) *m = mat->rstart*mat->bs;
1290   if (n) *n = mat->rend*mat->bs;
1291   PetscFunctionReturn(0);
1292 }
1293 
1294 #undef __FUNC__
1295 #define __FUNC__ /*<a name=""></a>*/"MatGetRow_MPIBAIJ"
1296 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1297 {
1298   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1299   Scalar     *vworkA,*vworkB,**pvA,**pvB,*v_p;
1300   int        bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1301   int        nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1302   int        *cmap,*idx_p,cstart = mat->cstart;
1303 
1304   PetscFunctionBegin;
1305   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1306   mat->getrowactive = PETSC_TRUE;
1307 
1308   if (!mat->rowvalues && (idx || v)) {
1309     /*
1310         allocate enough space to hold information from the longest row.
1311     */
1312     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1313     int     max = 1,mbs = mat->mbs,tmp;
1314     for (i=0; i<mbs; i++) {
1315       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1316       if (max < tmp) { max = tmp; }
1317     }
1318     mat->rowvalues = (Scalar*)PetscMalloc(max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1319     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1320   }
1321 
1322   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows")
1323   lrow = row - brstart;
1324 
1325   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1326   if (!v)   {pvA = 0; pvB = 0;}
1327   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1328   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1329   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1330   nztot = nzA + nzB;
1331 
1332   cmap  = mat->garray;
1333   if (v  || idx) {
1334     if (nztot) {
1335       /* Sort by increasing column numbers, assuming A and B already sorted */
1336       int imark = -1;
1337       if (v) {
1338         *v = v_p = mat->rowvalues;
1339         for (i=0; i<nzB; i++) {
1340           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1341           else break;
1342         }
1343         imark = i;
1344         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1345         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1346       }
1347       if (idx) {
1348         *idx = idx_p = mat->rowindices;
1349         if (imark > -1) {
1350           for (i=0; i<imark; i++) {
1351             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1352           }
1353         } else {
1354           for (i=0; i<nzB; i++) {
1355             if (cmap[cworkB[i]/bs] < cstart)
1356               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1357             else break;
1358           }
1359           imark = i;
1360         }
1361         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1362         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1363       }
1364     } else {
1365       if (idx) *idx = 0;
1366       if (v)   *v   = 0;
1367     }
1368   }
1369   *nz = nztot;
1370   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1371   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1372   PetscFunctionReturn(0);
1373 }
1374 
1375 #undef __FUNC__
1376 #define __FUNC__ /*<a name=""></a>*/"MatRestoreRow_MPIBAIJ"
1377 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1378 {
1379   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1380 
1381   PetscFunctionBegin;
1382   if (baij->getrowactive == PETSC_FALSE) {
1383     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1384   }
1385   baij->getrowactive = PETSC_FALSE;
1386   PetscFunctionReturn(0);
1387 }
1388 
1389 #undef __FUNC__
1390 #define __FUNC__ /*<a name=""></a>*/"MatGetBlockSize_MPIBAIJ"
1391 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
1392 {
1393   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1394 
1395   PetscFunctionBegin;
1396   *bs = baij->bs;
1397   PetscFunctionReturn(0);
1398 }
1399 
1400 #undef __FUNC__
1401 #define __FUNC__ /*<a name=""></a>*/"MatZeroEntries_MPIBAIJ"
1402 int MatZeroEntries_MPIBAIJ(Mat A)
1403 {
1404   Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data;
1405   int         ierr;
1406 
1407   PetscFunctionBegin;
1408   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1409   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1410   PetscFunctionReturn(0);
1411 }
1412 
1413 #undef __FUNC__
1414 #define __FUNC__ /*<a name=""></a>*/"MatGetInfo_MPIBAIJ"
1415 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1416 {
1417   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data;
1418   Mat         A = a->A,B = a->B;
1419   int         ierr;
1420   PetscReal   isend[5],irecv[5];
1421 
1422   PetscFunctionBegin;
1423   info->block_size     = (double)a->bs;
1424   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1425   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1426   isend[3] = info->memory;  isend[4] = info->mallocs;
1427   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1428   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1429   isend[3] += info->memory;  isend[4] += info->mallocs;
1430   if (flag == MAT_LOCAL) {
1431     info->nz_used      = isend[0];
1432     info->nz_allocated = isend[1];
1433     info->nz_unneeded  = isend[2];
1434     info->memory       = isend[3];
1435     info->mallocs      = isend[4];
1436   } else if (flag == MAT_GLOBAL_MAX) {
1437     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1438     info->nz_used      = irecv[0];
1439     info->nz_allocated = irecv[1];
1440     info->nz_unneeded  = irecv[2];
1441     info->memory       = irecv[3];
1442     info->mallocs      = irecv[4];
1443   } else if (flag == MAT_GLOBAL_SUM) {
1444     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1445     info->nz_used      = irecv[0];
1446     info->nz_allocated = irecv[1];
1447     info->nz_unneeded  = irecv[2];
1448     info->memory       = irecv[3];
1449     info->mallocs      = irecv[4];
1450   } else {
1451     SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag);
1452   }
1453   info->rows_global       = (double)a->M;
1454   info->columns_global    = (double)a->N;
1455   info->rows_local        = (double)a->m;
1456   info->columns_local     = (double)a->N;
1457   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1458   info->fill_ratio_needed = 0;
1459   info->factor_mallocs    = 0;
1460   PetscFunctionReturn(0);
1461 }
1462 
1463 #undef __FUNC__
1464 #define __FUNC__ /*<a name=""></a>*/"MatSetOption_MPIBAIJ"
1465 int MatSetOption_MPIBAIJ(Mat A,MatOption op)
1466 {
1467   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1468   int         ierr;
1469 
1470   PetscFunctionBegin;
1471   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1472       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1473       op == MAT_COLUMNS_UNSORTED ||
1474       op == MAT_COLUMNS_SORTED ||
1475       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1476       op == MAT_KEEP_ZEROED_ROWS ||
1477       op == MAT_NEW_NONZERO_LOCATION_ERR) {
1478         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1479         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1480   } else if (op == MAT_ROW_ORIENTED) {
1481         a->roworiented = PETSC_TRUE;
1482         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1483         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1484   } else if (op == MAT_ROWS_SORTED ||
1485              op == MAT_ROWS_UNSORTED ||
1486              op == MAT_SYMMETRIC ||
1487              op == MAT_STRUCTURALLY_SYMMETRIC ||
1488              op == MAT_YES_NEW_DIAGONALS ||
1489              op == MAT_USE_HASH_TABLE) {
1490     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1491   } else if (op == MAT_COLUMN_ORIENTED) {
1492     a->roworiented = PETSC_FALSE;
1493     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1494     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1495   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1496     a->donotstash = PETSC_TRUE;
1497   } else if (op == MAT_NO_NEW_DIAGONALS) {
1498     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1499   } else if (op == MAT_USE_HASH_TABLE) {
1500     a->ht_flag = PETSC_TRUE;
1501   } else {
1502     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1503   }
1504   PetscFunctionReturn(0);
1505 }
1506 
1507 #undef __FUNC__
1508 #define __FUNC__ /*<a name=""></a>*/"MatTranspose_MPIBAIJ("
1509 int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1510 {
1511   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data;
1512   Mat_SeqBAIJ *Aloc;
1513   Mat         B;
1514   int         ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col;
1515   int         bs=baij->bs,mbs=baij->mbs;
1516   MatScalar   *a;
1517 
1518   PetscFunctionBegin;
1519   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1520   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1521 
1522   /* copy over the A part */
1523   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1524   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1525   rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
1526 
1527   for (i=0; i<mbs; i++) {
1528     rvals[0] = bs*(baij->rstart + i);
1529     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1530     for (j=ai[i]; j<ai[i+1]; j++) {
1531       col = (baij->cstart+aj[j])*bs;
1532       for (k=0; k<bs; k++) {
1533         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1534         col++; a += bs;
1535       }
1536     }
1537   }
1538   /* copy over the B part */
1539   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1540   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1541   for (i=0; i<mbs; i++) {
1542     rvals[0] = bs*(baij->rstart + i);
1543     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1544     for (j=ai[i]; j<ai[i+1]; j++) {
1545       col = baij->garray[aj[j]]*bs;
1546       for (k=0; k<bs; k++) {
1547         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1548         col++; a += bs;
1549       }
1550     }
1551   }
1552   ierr = PetscFree(rvals);CHKERRQ(ierr);
1553   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1554   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1555 
1556   if (matout) {
1557     *matout = B;
1558   } else {
1559     PetscOps *Abops;
1560     MatOps   Aops;
1561 
1562     /* This isn't really an in-place transpose .... but free data structures from baij */
1563     ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1564     ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1565     ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1566 #if defined (PETSC_USE_CTABLE)
1567     if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1568 #else
1569     if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1570 #endif
1571     if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1572     if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1573     if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1574     ierr = PetscFree(baij);CHKERRQ(ierr);
1575 
1576     /*
1577        This is horrible, horrible code. We need to keep the
1578       A pointers for the bops and ops but copy everything
1579       else from C.
1580     */
1581     Abops   = A->bops;
1582     Aops    = A->ops;
1583     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
1584     A->bops = Abops;
1585     A->ops  = Aops;
1586 
1587     PetscHeaderDestroy(B);
1588   }
1589   PetscFunctionReturn(0);
1590 }
1591 
1592 #undef __FUNC__
1593 #define __FUNC__ /*<a name=""></a>*/"MatDiagonalScale_MPIBAIJ"
1594 int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1595 {
1596   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1597   Mat         a = baij->A,b = baij->B;
1598   int         ierr,s1,s2,s3;
1599 
1600   PetscFunctionBegin;
1601   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1602   if (rr) {
1603     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1604     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
1605     /* Overlap communication with computation. */
1606     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1607   }
1608   if (ll) {
1609     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1610     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1611     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1612   }
1613   /* scale  the diagonal block */
1614   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1615 
1616   if (rr) {
1617     /* Do a scatter end and then right scale the off-diagonal block */
1618     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1619     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1620   }
1621 
1622   PetscFunctionReturn(0);
1623 }
1624 
1625 #undef __FUNC__
1626 #define __FUNC__ /*<a name=""></a>*/"MatZeroRows_MPIBAIJ"
1627 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
1628 {
1629   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1630   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
1631   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
1632   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1633   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1634   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1635   MPI_Comm       comm = A->comm;
1636   MPI_Request    *send_waits,*recv_waits;
1637   MPI_Status     recv_status,*send_status;
1638   IS             istmp;
1639 
1640   PetscFunctionBegin;
1641   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
1642   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1643 
1644   /*  first count number of contributors to each processor */
1645   nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs);
1646   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
1647   procs  = nprocs + size;
1648   owner  = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
1649   for (i=0; i<N; i++) {
1650     idx   = rows[i];
1651     found = 0;
1652     for (j=0; j<size; j++) {
1653       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1654         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
1655       }
1656     }
1657     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
1658   }
1659   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
1660 
1661   /* inform other processors of number of messages and max length*/
1662   work   = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work);
1663   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
1664   nmax   = work[rank];
1665   nrecvs = work[size+rank];
1666   ierr = PetscFree(work);CHKERRQ(ierr);
1667 
1668   /* post receives:   */
1669   rvalues = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
1670   recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
1671   for (i=0; i<nrecvs; i++) {
1672     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1673   }
1674 
1675   /* do sends:
1676      1) starts[i] gives the starting index in svalues for stuff going to
1677      the ith processor
1678   */
1679   svalues    = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues);
1680   send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
1681   starts     = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts);
1682   starts[0]  = 0;
1683   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1684   for (i=0; i<N; i++) {
1685     svalues[starts[owner[i]]++] = rows[i];
1686   }
1687   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
1688 
1689   starts[0] = 0;
1690   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1691   count = 0;
1692   for (i=0; i<size; i++) {
1693     if (procs[i]) {
1694       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1695     }
1696   }
1697   ierr = PetscFree(starts);CHKERRQ(ierr);
1698 
1699   base = owners[rank]*bs;
1700 
1701   /*  wait on receives */
1702   lens   = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens);
1703   source = lens + nrecvs;
1704   count  = nrecvs; slen = 0;
1705   while (count) {
1706     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1707     /* unpack receives into our local space */
1708     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1709     source[imdex]  = recv_status.MPI_SOURCE;
1710     lens[imdex]    = n;
1711     slen          += n;
1712     count--;
1713   }
1714   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1715 
1716   /* move the data into the send scatter */
1717   lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows);
1718   count = 0;
1719   for (i=0; i<nrecvs; i++) {
1720     values = rvalues + i*nmax;
1721     for (j=0; j<lens[i]; j++) {
1722       lrows[count++] = values[j] - base;
1723     }
1724   }
1725   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1726   ierr = PetscFree(lens);CHKERRQ(ierr);
1727   ierr = PetscFree(owner);CHKERRQ(ierr);
1728   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1729 
1730   /* actually zap the local rows */
1731   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1732   PLogObjectParent(A,istmp);
1733 
1734   /*
1735         Zero the required rows. If the "diagonal block" of the matrix
1736      is square and the user wishes to set the diagonal we use seperate
1737      code so that MatSetValues() is not called for each diagonal allocating
1738      new memory, thus calling lots of mallocs and slowing things down.
1739 
1740        Contributed by: Mathew Knepley
1741   */
1742   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1743   ierr = MatZeroRows_SeqAIJ(l->B,istmp,0);CHKERRQ(ierr);
1744   if (diag && (l->A->M == l->A->N)) {
1745     ierr      = MatZeroRows_SeqAIJ(l->A,istmp,diag);CHKERRQ(ierr);
1746   } else if (diag) {
1747     ierr = MatZeroRows_SeqAIJ(l->A,istmp,0);CHKERRQ(ierr);
1748     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1749       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1750 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1751     }
1752     for (i = 0; i < slen; i++) {
1753       row  = lrows[i] + rstart_bs;
1754       ierr = MatSetValues_MPIBAIJ(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1755     }
1756     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1757     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1758   } else {
1759     ierr = MatZeroRows_SeqAIJ(l->A,istmp,0);CHKERRQ(ierr);
1760   }
1761 
1762   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1763   ierr = PetscFree(lrows);CHKERRQ(ierr);
1764 
1765   /* wait on sends */
1766   if (nsends) {
1767     send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1768     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1769     ierr        = PetscFree(send_status);CHKERRQ(ierr);
1770   }
1771   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1772   ierr = PetscFree(svalues);CHKERRQ(ierr);
1773 
1774   PetscFunctionReturn(0);
1775 }
1776 
1777 #undef __FUNC__
1778 #define __FUNC__ /*<a name=""></a>*/"MatPrintHelp_MPIBAIJ"
1779 int MatPrintHelp_MPIBAIJ(Mat A)
1780 {
1781   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1782   MPI_Comm    comm = A->comm;
1783   static int  called = 0;
1784   int         ierr;
1785 
1786   PetscFunctionBegin;
1787   if (!a->rank) {
1788     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1789   }
1790   if (called) {PetscFunctionReturn(0);} else called = 1;
1791   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1792   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1793   PetscFunctionReturn(0);
1794 }
1795 
1796 #undef __FUNC__
1797 #define __FUNC__ /*<a name=""></a>*/"MatSetUnfactored_MPIBAIJ"
1798 int MatSetUnfactored_MPIBAIJ(Mat A)
1799 {
1800   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1801   int         ierr;
1802 
1803   PetscFunctionBegin;
1804   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1805   PetscFunctionReturn(0);
1806 }
1807 
1808 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1809 
1810 #undef __FUNC__
1811 #define __FUNC__ /*<a name=""></a>*/"MatEqual_MPIBAIJ"
1812 int MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1813 {
1814   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1815   Mat         a,b,c,d;
1816   PetscTruth  flg;
1817   int         ierr;
1818 
1819   PetscFunctionBegin;
1820   if (B->type != MATMPIBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1821   a = matA->A; b = matA->B;
1822   c = matB->A; d = matB->B;
1823 
1824   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1825   if (flg == PETSC_TRUE) {
1826     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1827   }
1828   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1829   PetscFunctionReturn(0);
1830 }
1831 
1832 /* -------------------------------------------------------------------*/
1833 static struct _MatOps MatOps_Values = {
1834   MatSetValues_MPIBAIJ,
1835   MatGetRow_MPIBAIJ,
1836   MatRestoreRow_MPIBAIJ,
1837   MatMult_MPIBAIJ,
1838   MatMultAdd_MPIBAIJ,
1839   MatMultTranspose_MPIBAIJ,
1840   MatMultTransposeAdd_MPIBAIJ,
1841   0,
1842   0,
1843   0,
1844   0,
1845   0,
1846   0,
1847   0,
1848   MatTranspose_MPIBAIJ,
1849   MatGetInfo_MPIBAIJ,
1850   MatEqual_MPIBAIJ,
1851   MatGetDiagonal_MPIBAIJ,
1852   MatDiagonalScale_MPIBAIJ,
1853   MatNorm_MPIBAIJ,
1854   MatAssemblyBegin_MPIBAIJ,
1855   MatAssemblyEnd_MPIBAIJ,
1856   0,
1857   MatSetOption_MPIBAIJ,
1858   MatZeroEntries_MPIBAIJ,
1859   MatZeroRows_MPIBAIJ,
1860   0,
1861   0,
1862   0,
1863   0,
1864   MatGetSize_MPIBAIJ,
1865   MatGetLocalSize_MPIBAIJ,
1866   MatGetOwnershipRange_MPIBAIJ,
1867   0,
1868   0,
1869   0,
1870   0,
1871   MatDuplicate_MPIBAIJ,
1872   0,
1873   0,
1874   0,
1875   0,
1876   0,
1877   MatGetSubMatrices_MPIBAIJ,
1878   MatIncreaseOverlap_MPIBAIJ,
1879   MatGetValues_MPIBAIJ,
1880   0,
1881   MatPrintHelp_MPIBAIJ,
1882   MatScale_MPIBAIJ,
1883   0,
1884   0,
1885   0,
1886   MatGetBlockSize_MPIBAIJ,
1887   0,
1888   0,
1889   0,
1890   0,
1891   0,
1892   0,
1893   MatSetUnfactored_MPIBAIJ,
1894   0,
1895   MatSetValuesBlocked_MPIBAIJ,
1896   0,
1897   0,
1898   0,
1899   MatGetMaps_Petsc};
1900 
1901 
1902 EXTERN_C_BEGIN
1903 #undef __FUNC__
1904 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonalBlock_MPIBAIJ"
1905 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1906 {
1907   PetscFunctionBegin;
1908   *a      = ((Mat_MPIBAIJ *)A->data)->A;
1909   *iscopy = PETSC_FALSE;
1910   PetscFunctionReturn(0);
1911 }
1912 EXTERN_C_END
1913 
1914 #undef __FUNC__
1915 #define __FUNC__ /*<a name=""></a>*/"MatCreateMPIBAIJ"
1916 /*@C
1917    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
1918    (block compressed row).  For good matrix assembly performance
1919    the user should preallocate the matrix storage by setting the parameters
1920    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1921    performance can be increased by more than a factor of 50.
1922 
1923    Collective on MPI_Comm
1924 
1925    Input Parameters:
1926 +  comm - MPI communicator
1927 .  bs   - size of blockk
1928 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1929            This value should be the same as the local size used in creating the
1930            y vector for the matrix-vector product y = Ax.
1931 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1932            This value should be the same as the local size used in creating the
1933            x vector for the matrix-vector product y = Ax.
1934 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1935 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1936 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1937            submatrix  (same for all local rows)
1938 .  d_nnz - array containing the number of block nonzeros in the various block rows
1939            of the in diagonal portion of the local (possibly different for each block
1940            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
1941 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1942            submatrix (same for all local rows).
1943 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1944            off-diagonal portion of the local submatrix (possibly different for
1945            each block row) or PETSC_NULL.
1946 
1947    Output Parameter:
1948 .  A - the matrix
1949 
1950    Options Database Keys:
1951 .   -mat_no_unroll - uses code that does not unroll the loops in the
1952                      block calculations (much slower)
1953 .   -mat_block_size - size of the blocks to use
1954 .   -mat_mpi - use the parallel matrix data structures even on one processor
1955                (defaults to using SeqBAIJ format on one processor)
1956 
1957    Notes:
1958    The user MUST specify either the local or global matrix dimensions
1959    (possibly both).
1960 
1961    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1962    than it must be used on all processors that share the object for that argument.
1963 
1964    Storage Information:
1965    For a square global matrix we define each processor's diagonal portion
1966    to be its local rows and the corresponding columns (a square submatrix);
1967    each processor's off-diagonal portion encompasses the remainder of the
1968    local matrix (a rectangular submatrix).
1969 
1970    The user can specify preallocated storage for the diagonal part of
1971    the local submatrix with either d_nz or d_nnz (not both).  Set
1972    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1973    memory allocation.  Likewise, specify preallocated storage for the
1974    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1975 
1976    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1977    the figure below we depict these three local rows and all columns (0-11).
1978 
1979 .vb
1980            0 1 2 3 4 5 6 7 8 9 10 11
1981           -------------------
1982    row 3  |  o o o d d d o o o o o o
1983    row 4  |  o o o d d d o o o o o o
1984    row 5  |  o o o d d d o o o o o o
1985           -------------------
1986 .ve
1987 
1988    Thus, any entries in the d locations are stored in the d (diagonal)
1989    submatrix, and any entries in the o locations are stored in the
1990    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1991    stored simply in the MATSEQBAIJ format for compressed row storage.
1992 
1993    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
1994    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1995    In general, for PDE problems in which most nonzeros are near the diagonal,
1996    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1997    or you will get TERRIBLE performance; see the users' manual chapter on
1998    matrices.
1999 
2000    Level: intermediate
2001 
2002 .keywords: matrix, block, aij, compressed row, sparse, parallel
2003 
2004 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2005 @*/
2006 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2007 {
2008   Mat          B;
2009   Mat_MPIBAIJ  *b;
2010   int          ierr,i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size;
2011   PetscTruth   flag1,flag2,flg;
2012 
2013   PetscFunctionBegin;
2014   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2015 
2016   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive");
2017   if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -1: value %d",d_nz);
2018   if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -1: value %d",o_nz);
2019   if (d_nnz) {
2020     for (i=0; i<m/bs; i++) {
2021       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
2022     }
2023   }
2024   if (o_nnz) {
2025     for (i=0; i<m/bs; i++) {
2026       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
2027     }
2028   }
2029 
2030   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2031   ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1);CHKERRQ(ierr);
2032   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr);
2033   if (!flag1 && !flag2 && size == 1) {
2034     if (M == PETSC_DECIDE) M = m;
2035     if (N == PETSC_DECIDE) N = n;
2036     ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A);CHKERRQ(ierr);
2037     PetscFunctionReturn(0);
2038   }
2039 
2040   *A = 0;
2041   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView);
2042   PLogObjectCreate(B);
2043   B->data = (void*)(b = PetscNew(Mat_MPIBAIJ));CHKPTRQ(b);
2044   ierr    = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
2045   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2046 
2047   B->ops->destroy    = MatDestroy_MPIBAIJ;
2048   B->ops->view       = MatView_MPIBAIJ;
2049   B->mapping    = 0;
2050   B->factor     = 0;
2051   B->assembled  = PETSC_FALSE;
2052 
2053   B->insertmode = NOT_SET_VALUES;
2054   ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr);
2055   ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr);
2056 
2057   if (m == PETSC_DECIDE && (d_nnz || o_nnz)) {
2058     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
2059   }
2060   if (M == PETSC_DECIDE && m == PETSC_DECIDE) {
2061     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified");
2062   }
2063   if (N == PETSC_DECIDE && n == PETSC_DECIDE) {
2064     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified");
2065   }
2066   if (M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
2067   if (N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
2068 
2069   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
2070     work[0] = m; work[1] = n;
2071     mbs = m/bs; nbs = n/bs;
2072     ierr = MPI_Allreduce(work,sum,2,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2073     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
2074     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
2075   }
2076   if (m == PETSC_DECIDE) {
2077     Mbs = M/bs;
2078     if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize");
2079     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
2080     m   = mbs*bs;
2081   }
2082   if (n == PETSC_DECIDE) {
2083     Nbs = N/bs;
2084     if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize");
2085     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
2086     n   = nbs*bs;
2087   }
2088   if (mbs*bs != m || nbs*bs != n) {
2089     SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize");
2090   }
2091 
2092   b->m = m; B->m = m;
2093   b->n = n; B->n = n;
2094   b->N = N; B->N = N;
2095   b->M = M; B->M = M;
2096   b->bs  = bs;
2097   b->bs2 = bs*bs;
2098   b->mbs = mbs;
2099   b->nbs = nbs;
2100   b->Mbs = Mbs;
2101   b->Nbs = Nbs;
2102 
2103   /* the information in the maps duplicates the information computed below, eventually
2104      we should remove the duplicate information that is not contained in the maps */
2105   ierr = MapCreateMPI(comm,m,M,&B->rmap);CHKERRQ(ierr);
2106   ierr = MapCreateMPI(comm,n,N,&B->cmap);CHKERRQ(ierr);
2107 
2108   /* build local table of row and column ownerships */
2109   b->rowners = (int*)PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners);
2110   PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2111   b->cowners    = b->rowners + b->size + 2;
2112   b->rowners_bs = b->cowners + b->size + 2;
2113   ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2114   b->rowners[0]    = 0;
2115   for (i=2; i<=b->size; i++) {
2116     b->rowners[i] += b->rowners[i-1];
2117   }
2118   for (i=0; i<=b->size; i++) {
2119     b->rowners_bs[i] = b->rowners[i]*bs;
2120   }
2121   b->rstart    = b->rowners[b->rank];
2122   b->rend      = b->rowners[b->rank+1];
2123   b->rstart_bs = b->rstart * bs;
2124   b->rend_bs   = b->rend * bs;
2125 
2126   ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2127   b->cowners[0] = 0;
2128   for (i=2; i<=b->size; i++) {
2129     b->cowners[i] += b->cowners[i-1];
2130   }
2131   b->cstart    = b->cowners[b->rank];
2132   b->cend      = b->cowners[b->rank+1];
2133   b->cstart_bs = b->cstart * bs;
2134   b->cend_bs   = b->cend * bs;
2135 
2136 
2137   if (d_nz == PETSC_DEFAULT) d_nz = 5;
2138   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2139   PLogObjectParent(B,b->A);
2140   if (o_nz == PETSC_DEFAULT) o_nz = 0;
2141   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2142   PLogObjectParent(B,b->B);
2143 
2144   /* build cache for off array entries formed */
2145   ierr = MatStashCreate_Private(comm,1,&B->stash);CHKERRQ(ierr);
2146   ierr = MatStashCreate_Private(comm,bs,&B->bstash);CHKERRQ(ierr);
2147   b->donotstash  = PETSC_FALSE;
2148   b->colmap      = PETSC_NULL;
2149   b->garray      = PETSC_NULL;
2150   b->roworiented = PETSC_TRUE;
2151 
2152   /* stuff used in block assembly */
2153   b->barray       = 0;
2154 
2155   /* stuff used for matrix vector multiply */
2156   b->lvec         = 0;
2157   b->Mvctx        = 0;
2158 
2159   /* stuff for MatGetRow() */
2160   b->rowindices   = 0;
2161   b->rowvalues    = 0;
2162   b->getrowactive = PETSC_FALSE;
2163 
2164   /* hash table stuff */
2165   b->ht           = 0;
2166   b->hd           = 0;
2167   b->ht_size      = 0;
2168   b->ht_flag      = PETSC_FALSE;
2169   b->ht_fact      = 0;
2170   b->ht_total_ct  = 0;
2171   b->ht_insert_ct = 0;
2172 
2173   *A = B;
2174   ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2175   if (flg) {
2176     double fact = 1.39;
2177     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2178     ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2179     if (fact <= 1.0) fact = 1.39;
2180     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2181     PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2182   }
2183   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2184                                      "MatStoreValues_MPIBAIJ",
2185                                      (void*)MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2186   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2187                                      "MatRetrieveValues_MPIBAIJ",
2188                                      (void*)MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2189   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2190                                      "MatGetDiagonalBlock_MPIBAIJ",
2191                                      (void*)MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2192   PetscFunctionReturn(0);
2193 }
2194 
2195 #undef __FUNC__
2196 #define __FUNC__ /*<a name=""></a>*/"MatDuplicate_MPIBAIJ"
2197 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2198 {
2199   Mat         mat;
2200   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2201   int         ierr,len=0;
2202   PetscTruth  flg;
2203 
2204   PetscFunctionBegin;
2205   *newmat       = 0;
2206   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView);
2207   PLogObjectCreate(mat);
2208   mat->data         = (void*)(a = PetscNew(Mat_MPIBAIJ));CHKPTRQ(a);
2209   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2210   mat->ops->destroy = MatDestroy_MPIBAIJ;
2211   mat->ops->view    = MatView_MPIBAIJ;
2212   mat->factor       = matin->factor;
2213   mat->assembled    = PETSC_TRUE;
2214   mat->insertmode   = NOT_SET_VALUES;
2215 
2216   a->m = mat->m   = oldmat->m;
2217   a->n = mat->n   = oldmat->n;
2218   a->M = mat->M   = oldmat->M;
2219   a->N = mat->N   = oldmat->N;
2220 
2221   a->bs  = oldmat->bs;
2222   a->bs2 = oldmat->bs2;
2223   a->mbs = oldmat->mbs;
2224   a->nbs = oldmat->nbs;
2225   a->Mbs = oldmat->Mbs;
2226   a->Nbs = oldmat->Nbs;
2227 
2228   a->rstart       = oldmat->rstart;
2229   a->rend         = oldmat->rend;
2230   a->cstart       = oldmat->cstart;
2231   a->cend         = oldmat->cend;
2232   a->size         = oldmat->size;
2233   a->rank         = oldmat->rank;
2234   a->donotstash   = oldmat->donotstash;
2235   a->roworiented  = oldmat->roworiented;
2236   a->rowindices   = 0;
2237   a->rowvalues    = 0;
2238   a->getrowactive = PETSC_FALSE;
2239   a->barray       = 0;
2240   a->rstart_bs    = oldmat->rstart_bs;
2241   a->rend_bs      = oldmat->rend_bs;
2242   a->cstart_bs    = oldmat->cstart_bs;
2243   a->cend_bs      = oldmat->cend_bs;
2244 
2245   /* hash table stuff */
2246   a->ht           = 0;
2247   a->hd           = 0;
2248   a->ht_size      = 0;
2249   a->ht_flag      = oldmat->ht_flag;
2250   a->ht_fact      = oldmat->ht_fact;
2251   a->ht_total_ct  = 0;
2252   a->ht_insert_ct = 0;
2253 
2254 
2255   a->rowners = (int*)PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
2256   PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2257   a->cowners    = a->rowners + a->size + 2;
2258   a->rowners_bs = a->cowners + a->size + 2;
2259   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2260   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2261   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2262   if (oldmat->colmap) {
2263 #if defined (PETSC_USE_CTABLE)
2264   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2265 #else
2266     a->colmap = (int*)PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
2267     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2268     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2269 #endif
2270   } else a->colmap = 0;
2271   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2272     a->garray = (int*)PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray);
2273     PLogObjectMemory(mat,len*sizeof(int));
2274     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2275   } else a->garray = 0;
2276 
2277   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2278   PLogObjectParent(mat,a->lvec);
2279   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2280 
2281   PLogObjectParent(mat,a->Mvctx);
2282   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2283   PLogObjectParent(mat,a->A);
2284   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2285   PLogObjectParent(mat,a->B);
2286   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
2287   ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2288   if (flg) {
2289     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
2290   }
2291   *newmat = mat;
2292   PetscFunctionReturn(0);
2293 }
2294 
2295 #include "sys.h"
2296 
2297 #undef __FUNC__
2298 #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPIBAIJ"
2299 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
2300 {
2301   Mat          A;
2302   int          i,nz,ierr,j,rstart,rend,fd;
2303   Scalar       *vals,*buf;
2304   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2305   MPI_Status   status;
2306   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2307   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2308   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2309   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2310   int          dcount,kmax,k,nzcount,tmp;
2311 
2312   PetscFunctionBegin;
2313   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2314 
2315   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2316   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2317   if (!rank) {
2318     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2319     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2320     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
2321     if (header[3] < 0) {
2322       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ");
2323     }
2324   }
2325 
2326   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2327   M = header[1]; N = header[2];
2328 
2329   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
2330 
2331   /*
2332      This code adds extra rows to make sure the number of rows is
2333      divisible by the blocksize
2334   */
2335   Mbs        = M/bs;
2336   extra_rows = bs - M + bs*(Mbs);
2337   if (extra_rows == bs) extra_rows = 0;
2338   else                  Mbs++;
2339   if (extra_rows &&!rank) {
2340     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
2341   }
2342 
2343   /* determine ownership of all rows */
2344   mbs = Mbs/size + ((Mbs % size) > rank);
2345   m   = mbs * bs;
2346   rowners = (int*)PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners);
2347   browners = rowners + size + 1;
2348   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2349   rowners[0] = 0;
2350   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2351   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2352   rstart = rowners[rank];
2353   rend   = rowners[rank+1];
2354 
2355   /* distribute row lengths to all processors */
2356   locrowlens = (int*)PetscMalloc((rend-rstart)*bs*sizeof(int));CHKPTRQ(locrowlens);
2357   if (!rank) {
2358     rowlengths = (int*)PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
2359     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2360     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2361     sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts);
2362     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2363     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2364     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2365   } else {
2366     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2367   }
2368 
2369   if (!rank) {
2370     /* calculate the number of nonzeros on each processor */
2371     procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz);
2372     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2373     for (i=0; i<size; i++) {
2374       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2375         procsnz[i] += rowlengths[j];
2376       }
2377     }
2378     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2379 
2380     /* determine max buffer needed and allocate it */
2381     maxnz = 0;
2382     for (i=0; i<size; i++) {
2383       maxnz = PetscMax(maxnz,procsnz[i]);
2384     }
2385     cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols);
2386 
2387     /* read in my part of the matrix column indices  */
2388     nz = procsnz[0];
2389     ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf);
2390     mycols = ibuf;
2391     if (size == 1)  nz -= extra_rows;
2392     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2393     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2394 
2395     /* read in every ones (except the last) and ship off */
2396     for (i=1; i<size-1; i++) {
2397       nz   = procsnz[i];
2398       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2399       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2400     }
2401     /* read in the stuff for the last proc */
2402     if (size != 1) {
2403       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2404       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2405       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2406       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2407     }
2408     ierr = PetscFree(cols);CHKERRQ(ierr);
2409   } else {
2410     /* determine buffer space needed for message */
2411     nz = 0;
2412     for (i=0; i<m; i++) {
2413       nz += locrowlens[i];
2414     }
2415     ibuf   = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf);
2416     mycols = ibuf;
2417     /* receive message of column indices*/
2418     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2419     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2420     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2421   }
2422 
2423   /* loop over local rows, determining number of off diagonal entries */
2424   dlens  = (int*)PetscMalloc(2*(rend-rstart+1)*sizeof(int));CHKPTRQ(dlens);
2425   odlens = dlens + (rend-rstart);
2426   mask   = (int*)PetscMalloc(3*Mbs*sizeof(int));CHKPTRQ(mask);
2427   ierr   = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2428   masked1 = mask    + Mbs;
2429   masked2 = masked1 + Mbs;
2430   rowcount = 0; nzcount = 0;
2431   for (i=0; i<mbs; i++) {
2432     dcount  = 0;
2433     odcount = 0;
2434     for (j=0; j<bs; j++) {
2435       kmax = locrowlens[rowcount];
2436       for (k=0; k<kmax; k++) {
2437         tmp = mycols[nzcount++]/bs;
2438         if (!mask[tmp]) {
2439           mask[tmp] = 1;
2440           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2441           else masked1[dcount++] = tmp;
2442         }
2443       }
2444       rowcount++;
2445     }
2446 
2447     dlens[i]  = dcount;
2448     odlens[i] = odcount;
2449 
2450     /* zero out the mask elements we set */
2451     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2452     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2453   }
2454 
2455   /* create our matrix */
2456   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr);
2457   A = *newmat;
2458   MatSetOption(A,MAT_COLUMNS_SORTED);
2459 
2460   if (!rank) {
2461     buf = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(buf);
2462     /* read in my part of the matrix numerical values  */
2463     nz = procsnz[0];
2464     vals = buf;
2465     mycols = ibuf;
2466     if (size == 1)  nz -= extra_rows;
2467     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2468     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2469 
2470     /* insert into matrix */
2471     jj      = rstart*bs;
2472     for (i=0; i<m; i++) {
2473       ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2474       mycols += locrowlens[i];
2475       vals   += locrowlens[i];
2476       jj++;
2477     }
2478     /* read in other processors (except the last one) and ship out */
2479     for (i=1; i<size-1; i++) {
2480       nz   = procsnz[i];
2481       vals = buf;
2482       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2483       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2484     }
2485     /* the last proc */
2486     if (size != 1){
2487       nz   = procsnz[i] - extra_rows;
2488       vals = buf;
2489       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2490       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2491       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2492     }
2493     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2494   } else {
2495     /* receive numeric values */
2496     buf = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(buf);
2497 
2498     /* receive message of values*/
2499     vals   = buf;
2500     mycols = ibuf;
2501     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2502     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2503     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2504 
2505     /* insert into matrix */
2506     jj      = rstart*bs;
2507     for (i=0; i<m; i++) {
2508       ierr    = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2509       mycols += locrowlens[i];
2510       vals   += locrowlens[i];
2511       jj++;
2512     }
2513   }
2514   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2515   ierr = PetscFree(buf);CHKERRQ(ierr);
2516   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2517   ierr = PetscFree(rowners);CHKERRQ(ierr);
2518   ierr = PetscFree(dlens);CHKERRQ(ierr);
2519   ierr = PetscFree(mask);CHKERRQ(ierr);
2520   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2521   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2522   PetscFunctionReturn(0);
2523 }
2524 
2525 #undef __FUNC__
2526 #define __FUNC__ /*<a name=""></a>*/"MatMPIBAIJSetHashTableFactor"
2527 /*@
2528    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2529 
2530    Input Parameters:
2531 .  mat  - the matrix
2532 .  fact - factor
2533 
2534    Collective on Mat
2535 
2536    Level: advanced
2537 
2538   Notes:
2539    This can also be set by the command line option: -mat_use_hash_table fact
2540 
2541 .keywords: matrix, hashtable, factor, HT
2542 
2543 .seealso: MatSetOption()
2544 @*/
2545 int MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2546 {
2547   Mat_MPIBAIJ *baij;
2548 
2549   PetscFunctionBegin;
2550   PetscValidHeaderSpecific(mat,MAT_COOKIE);
2551   if (mat->type != MATMPIBAIJ) {
2552     SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only.");
2553   }
2554   baij = (Mat_MPIBAIJ*)mat->data;
2555   baij->ht_fact = fact;
2556   PetscFunctionReturn(0);
2557 }
2558