xref: /petsc/src/mat/impls/baij/mpi/mpibaij.c (revision f7cf7585ec4ab5bbe01f2b7df7e992ca325859cc)
1 /*$Id: mpibaij.c,v 1.200 2000/06/30 18:55:36 bsmith Exp bsmith $*/
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"   /*I  "petscmat.h"  I*/
4 #include "src/vec/vecimpl.h"
5 
6 EXTERN int MatSetUpMultiply_MPIBAIJ(Mat);
7 EXTERN int DisAssemble_MPIBAIJ(Mat);
8 EXTERN int MatIncreaseOverlap_MPIBAIJ(Mat,int,IS *,int);
9 EXTERN int MatGetSubMatrices_MPIBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
10 EXTERN int MatGetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *);
11 EXTERN int MatSetValues_SeqBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode);
12 EXTERN int MatSetValuesBlocked_SeqBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
13 EXTERN int MatGetRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
14 EXTERN int MatRestoreRow_SeqBAIJ(Mat,int,int*,int**,Scalar**);
15 EXTERN int MatPrintHelp_SeqBAIJ(Mat);
16 EXTERN int MatZeroRows_SeqBAIJ(Mat,IS,Scalar*);
17 
18 /*  UGLY, ugly, ugly
19    When MatScalar == Scalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
20    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
21    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
22    converts the entries into single precision and then calls ..._MatScalar() to put them
23    into the single precision data structures.
24 */
25 #if defined(PETSC_USE_MAT_SINGLE)
26 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
27 EXTERN int MatSetValues_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
28 EXTERN int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
29 EXTERN int MatSetValues_MPIBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
30 EXTERN int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
31 #else
32 #define MatSetValuesBlocked_SeqBAIJ_MatScalar      MatSetValuesBlocked_SeqBAIJ
33 #define MatSetValues_MPIBAIJ_MatScalar             MatSetValues_MPIBAIJ
34 #define MatSetValuesBlocked_MPIBAIJ_MatScalar      MatSetValuesBlocked_MPIBAIJ
35 #define MatSetValues_MPIBAIJ_HT_MatScalar          MatSetValues_MPIBAIJ_HT
36 #define MatSetValuesBlocked_MPIBAIJ_HT_MatScalar   MatSetValuesBlocked_MPIBAIJ_HT
37 #endif
38 
39 EXTERN_C_BEGIN
40 #undef __FUNC__
41 #define __FUNC__ /*<a name="MatStoreValues_MPIBAIJ"></a>*/"MatStoreValues_MPIBAIJ"
42 int MatStoreValues_MPIBAIJ(Mat mat)
43 {
44   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
45   int         ierr;
46 
47   PetscFunctionBegin;
48   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
49   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
50   PetscFunctionReturn(0);
51 }
52 EXTERN_C_END
53 
54 EXTERN_C_BEGIN
55 #undef __FUNC__
56 #define __FUNC__ /*<a name="MatRetrieveValues_MPIBAIJ"></a>*/"MatRetrieveValues_MPIBAIJ"
57 int MatRetrieveValues_MPIBAIJ(Mat mat)
58 {
59   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
60   int         ierr;
61 
62   PetscFunctionBegin;
63   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
64   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
65   PetscFunctionReturn(0);
66 }
67 EXTERN_C_END
68 
69 /*
70      Local utility routine that creates a mapping from the global column
71    number to the local number in the off-diagonal part of the local
72    storage of the matrix.  This is done in a non scable way since the
73    length of colmap equals the global matrix length.
74 */
75 #undef __FUNC__
76 #define __FUNC__ /*<a name="CreateColmap_MPIBAIJ_Private"></a>*/"CreateColmap_MPIBAIJ_Private"
77 static int CreateColmap_MPIBAIJ_Private(Mat mat)
78 {
79   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
80   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data;
81   int         nbs = B->nbs,i,bs=B->bs,ierr;
82 
83   PetscFunctionBegin;
84 #if defined (PETSC_USE_CTABLE)
85   ierr = PetscTableCreate(baij->nbs,&baij->colmap);CHKERRQ(ierr);
86   for (i=0; i<nbs; i++){
87     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
88   }
89 #else
90   baij->colmap = (int*)PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
91   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
92   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr);
93   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
94 #endif
95   PetscFunctionReturn(0);
96 }
97 
98 #define CHUNKSIZE  10
99 
100 #define  MatSetValues_SeqBAIJ_A_Private(row,col,value,addv) \
101 { \
102  \
103     brow = row/bs;  \
104     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
105     rmax = aimax[brow]; nrow = ailen[brow]; \
106       bcol = col/bs; \
107       ridx = row % bs; cidx = col % bs; \
108       low = 0; high = nrow; \
109       while (high-low > 3) { \
110         t = (low+high)/2; \
111         if (rp[t] > bcol) high = t; \
112         else              low  = t; \
113       } \
114       for (_i=low; _i<high; _i++) { \
115         if (rp[_i] > bcol) break; \
116         if (rp[_i] == bcol) { \
117           bap  = ap +  bs2*_i + bs*cidx + ridx; \
118           if (addv == ADD_VALUES) *bap += value;  \
119           else                    *bap  = value;  \
120           goto a_noinsert; \
121         } \
122       } \
123       if (a->nonew == 1) goto a_noinsert; \
124       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
125       if (nrow >= rmax) { \
126         /* there is no extra room in row, therefore enlarge */ \
127         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
128         MatScalar *new_a; \
129  \
130         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
131  \
132         /* malloc new storage space */ \
133         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
134         new_a   = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \
135         new_j   = (int*)(new_a + bs2*new_nz); \
136         new_i   = new_j + new_nz; \
137  \
138         /* copy over old data into new slots */ \
139         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
140         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
141         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
142         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
143         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
144         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
145         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \
146         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
147                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
148         /* free up old matrix storage */ \
149         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
150         if (!a->singlemalloc) { \
151           ierr = PetscFree(a->i);CHKERRQ(ierr); \
152           ierr = PetscFree(a->j);CHKERRQ(ierr);\
153         } \
154         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
155         a->singlemalloc = PETSC_TRUE; \
156  \
157         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
158         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
159         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
160         a->maxnz += bs2*CHUNKSIZE; \
161         a->reallocs++; \
162         a->nz++; \
163       } \
164       N = nrow++ - 1;  \
165       /* shift up all the later entries in this row */ \
166       for (ii=N; ii>=_i; ii--) { \
167         rp[ii+1] = rp[ii]; \
168         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
169       } \
170       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
171       rp[_i]                      = bcol;  \
172       ap[bs2*_i + bs*cidx + ridx] = value;  \
173       a_noinsert:; \
174     ailen[brow] = nrow; \
175 }
176 
177 #define  MatSetValues_SeqBAIJ_B_Private(row,col,value,addv) \
178 { \
179     brow = row/bs;  \
180     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
181     rmax = bimax[brow]; nrow = bilen[brow]; \
182       bcol = col/bs; \
183       ridx = row % bs; cidx = col % bs; \
184       low = 0; high = nrow; \
185       while (high-low > 3) { \
186         t = (low+high)/2; \
187         if (rp[t] > bcol) high = t; \
188         else              low  = t; \
189       } \
190       for (_i=low; _i<high; _i++) { \
191         if (rp[_i] > bcol) break; \
192         if (rp[_i] == bcol) { \
193           bap  = ap +  bs2*_i + bs*cidx + ridx; \
194           if (addv == ADD_VALUES) *bap += value;  \
195           else                    *bap  = value;  \
196           goto b_noinsert; \
197         } \
198       } \
199       if (b->nonew == 1) goto b_noinsert; \
200       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
201       if (nrow >= rmax) { \
202         /* there is no extra room in row, therefore enlarge */ \
203         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
204         MatScalar *new_a; \
205  \
206         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
207  \
208         /* malloc new storage space */ \
209         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
210         new_a   = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \
211         new_j   = (int*)(new_a + bs2*new_nz); \
212         new_i   = new_j + new_nz; \
213  \
214         /* copy over old data into new slots */ \
215         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
216         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
217         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
218         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
219         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
220         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
221         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
222         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
223                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
224         /* free up old matrix storage */ \
225         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
226         if (!b->singlemalloc) { \
227           ierr = PetscFree(b->i);CHKERRQ(ierr); \
228           ierr = PetscFree(b->j);CHKERRQ(ierr); \
229         } \
230         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
231         b->singlemalloc = PETSC_TRUE; \
232  \
233         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
234         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
235         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
236         b->maxnz += bs2*CHUNKSIZE; \
237         b->reallocs++; \
238         b->nz++; \
239       } \
240       N = nrow++ - 1;  \
241       /* shift up all the later entries in this row */ \
242       for (ii=N; ii>=_i; ii--) { \
243         rp[ii+1] = rp[ii]; \
244         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
245       } \
246       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
247       rp[_i]                      = bcol;  \
248       ap[bs2*_i + bs*cidx + ridx] = value;  \
249       b_noinsert:; \
250     bilen[brow] = nrow; \
251 }
252 
253 #if defined(PETSC_USE_MAT_SINGLE)
254 #undef __FUNC__
255 #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ"></a>*/"MatSetValues_MPIBAIJ"
256 int MatSetValues_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
257 {
258   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
259   int         ierr,i,N = m*n;
260   MatScalar   *vsingle;
261 
262   PetscFunctionBegin;
263   if (N > b->setvalueslen) {
264     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
265     b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy);
266     b->setvalueslen  = N;
267   }
268   vsingle = b->setvaluescopy;
269 
270   for (i=0; i<N; i++) {
271     vsingle[i] = v[i];
272   }
273   ierr = MatSetValues_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
274   PetscFunctionReturn(0);
275 }
276 
277 #undef __FUNC__
278 #define __FUNC__ /*<a name="MatSetValuesBlocked_MPIBAIJ"></a>*/"MatSetValuesBlocked_MPIBAIJ"
279 int MatSetValuesBlocked_MPIBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
280 {
281   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
282   int         ierr,i,N = m*n*b->bs2;
283   MatScalar   *vsingle;
284 
285   PetscFunctionBegin;
286   if (N > b->setvalueslen) {
287     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
288     b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy);
289     b->setvalueslen  = N;
290   }
291   vsingle = b->setvaluescopy;
292   for (i=0; i<N; i++) {
293     vsingle[i] = v[i];
294   }
295   ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
296   PetscFunctionReturn(0);
297 }
298 
299 #undef __FUNC__
300 #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ_HT"></a>*/"MatSetValues_MPIBAIJ_HT"
301 int MatSetValues_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
302 {
303   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
304   int         ierr,i,N = m*n;
305   MatScalar   *vsingle;
306 
307   PetscFunctionBegin;
308   if (N > b->setvalueslen) {
309     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
310     b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy);
311     b->setvalueslen  = N;
312   }
313   vsingle = b->setvaluescopy;
314   for (i=0; i<N; i++) {
315     vsingle[i] = v[i];
316   }
317   ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
318   PetscFunctionReturn(0);
319 }
320 
321 #undef __FUNC__
322 #define __FUNC__ /*<a name="MatSetValuesBlocked_MPIBAIJ_HT"></a>*/"MatSetValuesBlocked_MPIBAIJ_HT"
323 int MatSetValuesBlocked_MPIBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
324 {
325   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
326   int         ierr,i,N = m*n*b->bs2;
327   MatScalar   *vsingle;
328 
329   PetscFunctionBegin;
330   if (N > b->setvalueslen) {
331     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
332     b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy);
333     b->setvalueslen  = N;
334   }
335   vsingle = b->setvaluescopy;
336   for (i=0; i<N; i++) {
337     vsingle[i] = v[i];
338   }
339   ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
340   PetscFunctionReturn(0);
341 }
342 #endif
343 
344 #undef __FUNC__
345 #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ"></a>*/"MatSetValues_MPIBAIJ"
346 int MatSetValues_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
347 {
348   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
349   MatScalar   value;
350   int         ierr,i,j,row,col;
351   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
352   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
353   int         cend_orig=baij->cend_bs,bs=baij->bs;
354 
355   /* Some Variables required in the macro */
356   Mat         A = baij->A;
357   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)(A)->data;
358   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
359   MatScalar   *aa=a->a;
360 
361   Mat         B = baij->B;
362   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data;
363   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
364   MatScalar   *ba=b->a;
365 
366   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
367   int         low,high,t,ridx,cidx,bs2=a->bs2;
368   MatScalar   *ap,*bap;
369 
370   PetscFunctionBegin;
371   for (i=0; i<m; i++) {
372     if (im[i] < 0) continue;
373 #if defined(PETSC_USE_BOPT_g)
374     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
375 #endif
376     if (im[i] >= rstart_orig && im[i] < rend_orig) {
377       row = im[i] - rstart_orig;
378       for (j=0; j<n; j++) {
379         if (in[j] >= cstart_orig && in[j] < cend_orig){
380           col = in[j] - cstart_orig;
381           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
382           MatSetValues_SeqBAIJ_A_Private(row,col,value,addv);
383           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
384         } else if (in[j] < 0) continue;
385 #if defined(PETSC_USE_BOPT_g)
386         else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");}
387 #endif
388         else {
389           if (mat->was_assembled) {
390             if (!baij->colmap) {
391               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
392             }
393 #if defined (PETSC_USE_CTABLE)
394             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
395             col  = col - 1 + in[j]%bs;
396 #else
397             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
398 #endif
399             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
400               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
401               col =  in[j];
402               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
403               B = baij->B;
404               b = (Mat_SeqBAIJ*)(B)->data;
405               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
406               ba=b->a;
407             }
408           } else col = in[j];
409           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
410           MatSetValues_SeqBAIJ_B_Private(row,col,value,addv);
411           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
412         }
413       }
414     } else {
415       if (!baij->donotstash) {
416         if (roworiented) {
417           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
418         } else {
419           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
420         }
421       }
422     }
423   }
424   PetscFunctionReturn(0);
425 }
426 
427 #undef __FUNC__
428 #define __FUNC__ /*<a name="MatSetValuesBlocked_MPIBAIJ"></a>*/"MatSetValuesBlocked_MPIBAIJ"
429 int MatSetValuesBlocked_MPIBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
430 {
431   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
432   MatScalar   *value,*barray=baij->barray;
433   int         ierr,i,j,ii,jj,row,col;
434   int         roworiented = baij->roworiented,rstart=baij->rstart ;
435   int         rend=baij->rend,cstart=baij->cstart,stepval;
436   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
437 
438   PetscFunctionBegin;
439   if(!barray) {
440     baij->barray = barray = (MatScalar*)PetscMalloc(bs2*sizeof(MatScalar));CHKPTRQ(barray);
441   }
442 
443   if (roworiented) {
444     stepval = (n-1)*bs;
445   } else {
446     stepval = (m-1)*bs;
447   }
448   for (i=0; i<m; i++) {
449     if (im[i] < 0) continue;
450 #if defined(PETSC_USE_BOPT_g)
451     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs);
452 #endif
453     if (im[i] >= rstart && im[i] < rend) {
454       row = im[i] - rstart;
455       for (j=0; j<n; j++) {
456         /* If NumCol = 1 then a copy is not required */
457         if ((roworiented) && (n == 1)) {
458           barray = v + i*bs2;
459         } else if((!roworiented) && (m == 1)) {
460           barray = v + j*bs2;
461         } else { /* Here a copy is required */
462           if (roworiented) {
463             value = v + i*(stepval+bs)*bs + j*bs;
464           } else {
465             value = v + j*(stepval+bs)*bs + i*bs;
466           }
467           for (ii=0; ii<bs; ii++,value+=stepval) {
468             for (jj=0; jj<bs; jj++) {
469               *barray++  = *value++;
470             }
471           }
472           barray -=bs2;
473         }
474 
475         if (in[j] >= cstart && in[j] < cend){
476           col  = in[j] - cstart;
477           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
478         }
479         else if (in[j] < 0) continue;
480 #if defined(PETSC_USE_BOPT_g)
481         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);}
482 #endif
483         else {
484           if (mat->was_assembled) {
485             if (!baij->colmap) {
486               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
487             }
488 
489 #if defined(PETSC_USE_BOPT_g)
490 #if defined (PETSC_USE_CTABLE)
491             { int data;
492               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
493               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");
494             }
495 #else
496             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");
497 #endif
498 #endif
499 #if defined (PETSC_USE_CTABLE)
500 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
501             col  = (col - 1)/bs;
502 #else
503             col = (baij->colmap[in[j]] - 1)/bs;
504 #endif
505             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
506               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
507               col =  in[j];
508             }
509           }
510           else col = in[j];
511           ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
512         }
513       }
514     } else {
515       if (!baij->donotstash) {
516         if (roworiented) {
517           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
518         } else {
519           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
520         }
521       }
522     }
523   }
524   PetscFunctionReturn(0);
525 }
526 
527 #define HASH_KEY 0.6180339887
528 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
529 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
530 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
531 #undef __FUNC__
532 #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ_HT_MatScalar"></a>*/"MatSetValues_MPIBAIJ_HT_MatScalar"
533 int MatSetValues_MPIBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
534 {
535   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
536   int         ierr,i,j,row,col;
537   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
538   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
539   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
540   PetscReal   tmp;
541   MatScalar   **HD = baij->hd,value;
542 #if defined(PETSC_USE_BOPT_g)
543   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
544 #endif
545 
546   PetscFunctionBegin;
547 
548   for (i=0; i<m; i++) {
549 #if defined(PETSC_USE_BOPT_g)
550     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
551     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
552 #endif
553       row = im[i];
554     if (row >= rstart_orig && row < rend_orig) {
555       for (j=0; j<n; j++) {
556         col = in[j];
557         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
558         /* Look up into the Hash Table */
559         key = (row/bs)*Nbs+(col/bs)+1;
560         h1  = HASH(size,key,tmp);
561 
562 
563         idx = h1;
564 #if defined(PETSC_USE_BOPT_g)
565         insert_ct++;
566         total_ct++;
567         if (HT[idx] != key) {
568           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
569           if (idx == size) {
570             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
571             if (idx == h1) {
572               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
573             }
574           }
575         }
576 #else
577         if (HT[idx] != key) {
578           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
579           if (idx == size) {
580             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
581             if (idx == h1) {
582               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
583             }
584           }
585         }
586 #endif
587         /* A HASH table entry is found, so insert the values at the correct address */
588         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
589         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
590       }
591     } else {
592       if (!baij->donotstash) {
593         if (roworiented) {
594           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
595         } else {
596           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
597         }
598       }
599     }
600   }
601 #if defined(PETSC_USE_BOPT_g)
602   baij->ht_total_ct = total_ct;
603   baij->ht_insert_ct = insert_ct;
604 #endif
605   PetscFunctionReturn(0);
606 }
607 
608 #undef __FUNC__
609 #define __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPIBAIJ_HT_MatScalar"
610 int MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
611 {
612   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
613   int         ierr,i,j,ii,jj,row,col;
614   int         roworiented = baij->roworiented,rstart=baij->rstart ;
615   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
616   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
617   PetscReal   tmp;
618   MatScalar   **HD = baij->hd,*baij_a;
619   MatScalar   *v_t,*value;
620 #if defined(PETSC_USE_BOPT_g)
621   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
622 #endif
623 
624   PetscFunctionBegin;
625 
626   if (roworiented) {
627     stepval = (n-1)*bs;
628   } else {
629     stepval = (m-1)*bs;
630   }
631   for (i=0; i<m; i++) {
632 #if defined(PETSC_USE_BOPT_g)
633     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
634     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
635 #endif
636     row   = im[i];
637     v_t   = v + i*bs2;
638     if (row >= rstart && row < rend) {
639       for (j=0; j<n; j++) {
640         col = in[j];
641 
642         /* Look up into the Hash Table */
643         key = row*Nbs+col+1;
644         h1  = HASH(size,key,tmp);
645 
646         idx = h1;
647 #if defined(PETSC_USE_BOPT_g)
648         total_ct++;
649         insert_ct++;
650        if (HT[idx] != key) {
651           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
652           if (idx == size) {
653             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
654             if (idx == h1) {
655               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
656             }
657           }
658         }
659 #else
660         if (HT[idx] != key) {
661           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
662           if (idx == size) {
663             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
664             if (idx == h1) {
665               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
666             }
667           }
668         }
669 #endif
670         baij_a = HD[idx];
671         if (roworiented) {
672           /*value = v + i*(stepval+bs)*bs + j*bs;*/
673           /* value = v + (i*(stepval+bs)+j)*bs; */
674           value = v_t;
675           v_t  += bs;
676           if (addv == ADD_VALUES) {
677             for (ii=0; ii<bs; ii++,value+=stepval) {
678               for (jj=ii; jj<bs2; jj+=bs) {
679                 baij_a[jj]  += *value++;
680               }
681             }
682           } else {
683             for (ii=0; ii<bs; ii++,value+=stepval) {
684               for (jj=ii; jj<bs2; jj+=bs) {
685                 baij_a[jj]  = *value++;
686               }
687             }
688           }
689         } else {
690           value = v + j*(stepval+bs)*bs + i*bs;
691           if (addv == ADD_VALUES) {
692             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
693               for (jj=0; jj<bs; jj++) {
694                 baij_a[jj]  += *value++;
695               }
696             }
697           } else {
698             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
699               for (jj=0; jj<bs; jj++) {
700                 baij_a[jj]  = *value++;
701               }
702             }
703           }
704         }
705       }
706     } else {
707       if (!baij->donotstash) {
708         if (roworiented) {
709           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
710         } else {
711           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
712         }
713       }
714     }
715   }
716 #if defined(PETSC_USE_BOPT_g)
717   baij->ht_total_ct = total_ct;
718   baij->ht_insert_ct = insert_ct;
719 #endif
720   PetscFunctionReturn(0);
721 }
722 
723 #undef __FUNC__
724 #define __FUNC__ /*<a name=""></a>*/"MatGetValues_MPIBAIJ"
725 int MatGetValues_MPIBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
726 {
727   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
728   int        bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
729   int        bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
730 
731   PetscFunctionBegin;
732   for (i=0; i<m; i++) {
733     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
734     if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
735     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
736       row = idxm[i] - bsrstart;
737       for (j=0; j<n; j++) {
738         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
739         if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
740         if (idxn[j] >= bscstart && idxn[j] < bscend){
741           col = idxn[j] - bscstart;
742           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
743         } else {
744           if (!baij->colmap) {
745             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
746           }
747 #if defined (PETSC_USE_CTABLE)
748           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
749           data --;
750 #else
751           data = baij->colmap[idxn[j]/bs]-1;
752 #endif
753           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
754           else {
755             col  = data + idxn[j]%bs;
756             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
757           }
758         }
759       }
760     } else {
761       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
762     }
763   }
764  PetscFunctionReturn(0);
765 }
766 
767 #undef __FUNC__
768 #define __FUNC__ /*<a name=""></a>*/"MatNorm_MPIBAIJ"
769 int MatNorm_MPIBAIJ(Mat mat,NormType type,PetscReal *norm)
770 {
771   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
772   Mat_SeqBAIJ *amat = (Mat_SeqBAIJ*)baij->A->data,*bmat = (Mat_SeqBAIJ*)baij->B->data;
773   int        ierr,i,bs2=baij->bs2;
774   PetscReal  sum = 0.0;
775   MatScalar  *v;
776 
777   PetscFunctionBegin;
778   if (baij->size == 1) {
779     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
780   } else {
781     if (type == NORM_FROBENIUS) {
782       v = amat->a;
783       for (i=0; i<amat->nz*bs2; i++) {
784 #if defined(PETSC_USE_COMPLEX)
785         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
786 #else
787         sum += (*v)*(*v); v++;
788 #endif
789       }
790       v = bmat->a;
791       for (i=0; i<bmat->nz*bs2; i++) {
792 #if defined(PETSC_USE_COMPLEX)
793         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
794 #else
795         sum += (*v)*(*v); v++;
796 #endif
797       }
798       ierr = MPI_Allreduce(&sum,norm,1,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
799       *norm = sqrt(*norm);
800     } else {
801       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
802     }
803   }
804   PetscFunctionReturn(0);
805 }
806 
807 
808 /*
809   Creates the hash table, and sets the table
810   This table is created only once.
811   If new entried need to be added to the matrix
812   then the hash table has to be destroyed and
813   recreated.
814 */
815 #undef __FUNC__
816 #define __FUNC__ /*<a name=""></a>*/"MatCreateHashTable_MPIBAIJ_Private"
817 int MatCreateHashTable_MPIBAIJ_Private(Mat mat,PetscReal factor)
818 {
819   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
820   Mat         A = baij->A,B=baij->B;
821   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
822   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
823   int         size,bs2=baij->bs2,rstart=baij->rstart,ierr;
824   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
825   int         *HT,key;
826   MatScalar   **HD;
827   PetscReal   tmp;
828 #if defined(PETSC_USE_BOPT_g)
829   int         ct=0,max=0;
830 #endif
831 
832   PetscFunctionBegin;
833   baij->ht_size=(int)(factor*nz);
834   size = baij->ht_size;
835 
836   if (baij->ht) {
837     PetscFunctionReturn(0);
838   }
839 
840   /* Allocate Memory for Hash Table */
841   baij->hd = (MatScalar**)PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1);CHKPTRQ(baij->hd);
842   baij->ht = (int*)(baij->hd + size);
843   HD = baij->hd;
844   HT = baij->ht;
845 
846 
847   ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr);
848 
849 
850   /* Loop Over A */
851   for (i=0; i<a->mbs; i++) {
852     for (j=ai[i]; j<ai[i+1]; j++) {
853       row = i+rstart;
854       col = aj[j]+cstart;
855 
856       key = row*Nbs + col + 1;
857       h1  = HASH(size,key,tmp);
858       for (k=0; k<size; k++){
859         if (HT[(h1+k)%size] == 0.0) {
860           HT[(h1+k)%size] = key;
861           HD[(h1+k)%size] = a->a + j*bs2;
862           break;
863 #if defined(PETSC_USE_BOPT_g)
864         } else {
865           ct++;
866 #endif
867         }
868       }
869 #if defined(PETSC_USE_BOPT_g)
870       if (k> max) max = k;
871 #endif
872     }
873   }
874   /* Loop Over B */
875   for (i=0; i<b->mbs; i++) {
876     for (j=bi[i]; j<bi[i+1]; j++) {
877       row = i+rstart;
878       col = garray[bj[j]];
879       key = row*Nbs + col + 1;
880       h1  = HASH(size,key,tmp);
881       for (k=0; k<size; k++){
882         if (HT[(h1+k)%size] == 0.0) {
883           HT[(h1+k)%size] = key;
884           HD[(h1+k)%size] = b->a + j*bs2;
885           break;
886 #if defined(PETSC_USE_BOPT_g)
887         } else {
888           ct++;
889 #endif
890         }
891       }
892 #if defined(PETSC_USE_BOPT_g)
893       if (k> max) max = k;
894 #endif
895     }
896   }
897 
898   /* Print Summary */
899 #if defined(PETSC_USE_BOPT_g)
900   for (i=0,j=0; i<size; i++) {
901     if (HT[i]) {j++;}
902   }
903   PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max);
904 #endif
905   PetscFunctionReturn(0);
906 }
907 
908 #undef __FUNC__
909 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyBegin_MPIBAIJ"
910 int MatAssemblyBegin_MPIBAIJ(Mat mat,MatAssemblyType mode)
911 {
912   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
913   int         ierr,nstash,reallocs;
914   InsertMode  addv;
915 
916   PetscFunctionBegin;
917   if (baij->donotstash) {
918     PetscFunctionReturn(0);
919   }
920 
921   /* make sure all processors are either in INSERTMODE or ADDMODE */
922   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
923   if (addv == (ADD_VALUES|INSERT_VALUES)) {
924     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
925   }
926   mat->insertmode = addv; /* in case this processor had no cache */
927 
928   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
929   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
930   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
931   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
932   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
933   PLogInfo(0,"MatAssemblyBegin_MPIBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
934   PetscFunctionReturn(0);
935 }
936 
937 #undef __FUNC__
938 #define __FUNC__ /*<a name=""></a>*/"MatAssemblyEnd_MPIBAIJ"
939 int MatAssemblyEnd_MPIBAIJ(Mat mat,MatAssemblyType mode)
940 {
941   Mat_MPIBAIJ *baij=(Mat_MPIBAIJ*)mat->data;
942   Mat_SeqBAIJ *a=(Mat_SeqBAIJ*)baij->A->data,*b=(Mat_SeqBAIJ*)baij->B->data;
943   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
944   int         *row,*col,other_disassembled;
945   PetscTruth  r1,r2,r3;
946   MatScalar   *val;
947   InsertMode  addv = mat->insertmode;
948 
949   PetscFunctionBegin;
950   if (!baij->donotstash) {
951     while (1) {
952       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
953       if (!flg) break;
954 
955       for (i=0; i<n;) {
956         /* Now identify the consecutive vals belonging to the same row */
957         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
958         if (j < n) ncols = j-i;
959         else       ncols = n-i;
960         /* Now assemble all these values with a single function call */
961         ierr = MatSetValues_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
962         i = j;
963       }
964     }
965     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
966     /* Now process the block-stash. Since the values are stashed column-oriented,
967        set the roworiented flag to column oriented, and after MatSetValues()
968        restore the original flags */
969     r1 = baij->roworiented;
970     r2 = a->roworiented;
971     r3 = b->roworiented;
972     baij->roworiented = PETSC_FALSE;
973     a->roworiented    = PETSC_FALSE;
974     b->roworiented    = PETSC_FALSE;
975     while (1) {
976       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
977       if (!flg) break;
978 
979       for (i=0; i<n;) {
980         /* Now identify the consecutive vals belonging to the same row */
981         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
982         if (j < n) ncols = j-i;
983         else       ncols = n-i;
984         ierr = MatSetValuesBlocked_MPIBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
985         i = j;
986       }
987     }
988     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
989     baij->roworiented = r1;
990     a->roworiented    = r2;
991     b->roworiented    = r3;
992   }
993 
994   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
995   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
996 
997   /* determine if any processor has disassembled, if so we must
998      also disassemble ourselfs, in order that we may reassemble. */
999   /*
1000      if nonzero structure of submatrix B cannot change then we know that
1001      no processor disassembled thus we can skip this stuff
1002   */
1003   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
1004     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
1005     if (mat->was_assembled && !other_disassembled) {
1006       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
1007     }
1008   }
1009 
1010   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1011     ierr = MatSetUpMultiply_MPIBAIJ(mat);CHKERRQ(ierr);
1012   }
1013   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1014   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1015 
1016 #if defined(PETSC_USE_BOPT_g)
1017   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1018     PLogInfo(0,"MatAssemblyEnd_MPIBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((double)baij->ht_total_ct)/baij->ht_insert_ct);
1019     baij->ht_total_ct  = 0;
1020     baij->ht_insert_ct = 0;
1021   }
1022 #endif
1023   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1024     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1025     mat->ops->setvalues        = MatSetValues_MPIBAIJ_HT;
1026     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT;
1027   }
1028 
1029   if (baij->rowvalues) {
1030     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1031     baij->rowvalues = 0;
1032   }
1033   PetscFunctionReturn(0);
1034 }
1035 
1036 #undef __FUNC__
1037 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIBAIJ_ASCIIorDraworSocket"
1038 static int MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
1039 {
1040   Mat_MPIBAIJ  *baij = (Mat_MPIBAIJ*)mat->data;
1041   int          ierr,format,bs = baij->bs,size = baij->size,rank = baij->rank;
1042   PetscTruth   isascii,isdraw;
1043   Viewer       sviewer;
1044 
1045   PetscFunctionBegin;
1046   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
1047   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
1048   if (isascii) {
1049     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1050     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
1051       MatInfo info;
1052       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
1053       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1054       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
1055               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
1056               baij->bs,(int)info.memory);CHKERRQ(ierr);
1057       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1058       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1059       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1060       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1061       ierr = ViewerFlush(viewer);CHKERRQ(ierr);
1062       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1063       PetscFunctionReturn(0);
1064     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
1065       ierr = ViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
1066       PetscFunctionReturn(0);
1067     }
1068   }
1069 
1070   if (isdraw) {
1071     Draw       draw;
1072     PetscTruth isnull;
1073     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1074     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1075   }
1076 
1077   if (size == 1) {
1078     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1079   } else {
1080     /* assemble the entire matrix onto first processor. */
1081     Mat         A;
1082     Mat_SeqBAIJ *Aloc;
1083     int         M = baij->M,N = baij->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1084     MatScalar   *a;
1085 
1086     if (!rank) {
1087       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1088     } else {
1089       ierr = MatCreateMPIBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1090     }
1091     PLogObjectParent(mat,A);
1092 
1093     /* copy over the A part */
1094     Aloc  = (Mat_SeqBAIJ*)baij->A->data;
1095     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
1096     rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
1097 
1098     for (i=0; i<mbs; i++) {
1099       rvals[0] = bs*(baij->rstart + i);
1100       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1101       for (j=ai[i]; j<ai[i+1]; j++) {
1102         col = (baij->cstart+aj[j])*bs;
1103         for (k=0; k<bs; k++) {
1104           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1105           col++; a += bs;
1106         }
1107       }
1108     }
1109     /* copy over the B part */
1110     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1111     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1112     for (i=0; i<mbs; i++) {
1113       rvals[0] = bs*(baij->rstart + i);
1114       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1115       for (j=ai[i]; j<ai[i+1]; j++) {
1116         col = baij->garray[aj[j]]*bs;
1117         for (k=0; k<bs; k++) {
1118           ierr = MatSetValues_MPIBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1119           col++; a += bs;
1120         }
1121       }
1122     }
1123     ierr = PetscFree(rvals);CHKERRQ(ierr);
1124     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1125     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1126     /*
1127        Everyone has to call to draw the matrix since the graphics waits are
1128        synchronized across all processors that share the Draw object
1129     */
1130     ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1131     if (!rank) {
1132       ierr = MatView(((Mat_MPIBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1133     }
1134     ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1135     ierr = MatDestroy(A);CHKERRQ(ierr);
1136   }
1137   PetscFunctionReturn(0);
1138 }
1139 
1140 #undef __FUNC__
1141 #define __FUNC__ /*<a name=""></a>*/"MatView_MPIBAIJ"
1142 int MatView_MPIBAIJ(Mat mat,Viewer viewer)
1143 {
1144   int        ierr;
1145   PetscTruth isascii,isdraw,issocket,isbinary;
1146 
1147   PetscFunctionBegin;
1148   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
1149   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
1150   ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr);
1151   ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
1152   if (isascii || isdraw || issocket || isbinary) {
1153     ierr = MatView_MPIBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1154   } else {
1155     SETERRQ1(1,1,"Viewer type %s not supported by MPIBAIJ matrices",((PetscObject)viewer)->type_name);
1156   }
1157   PetscFunctionReturn(0);
1158 }
1159 
1160 #undef __FUNC__
1161 #define __FUNC__ /*<a name=""></a>*/"MatDestroy_MPIBAIJ"
1162 int MatDestroy_MPIBAIJ(Mat mat)
1163 {
1164   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1165   int         ierr;
1166 
1167   PetscFunctionBegin;
1168 
1169   if (mat->mapping) {
1170     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
1171   }
1172   if (mat->bmapping) {
1173     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
1174   }
1175   if (mat->rmap) {
1176     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
1177   }
1178   if (mat->cmap) {
1179     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
1180   }
1181 #if defined(PETSC_USE_LOG)
1182   PLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",baij->M,baij->N);
1183 #endif
1184 
1185   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1186   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1187 
1188   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1189   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1190   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1191 #if defined (PETSC_USE_CTABLE)
1192   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1193 #else
1194   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1195 #endif
1196   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1197   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1198   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1199   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1200   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1201   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
1202 #if defined(PETSC_USE_MAT_SINGLE)
1203   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
1204 #endif
1205   ierr = PetscFree(baij);CHKERRQ(ierr);
1206   PLogObjectDestroy(mat);
1207   PetscHeaderDestroy(mat);
1208   PetscFunctionReturn(0);
1209 }
1210 
1211 #undef __FUNC__
1212 #define __FUNC__ /*<a name=""></a>*/"MatMult_MPIBAIJ"
1213 int MatMult_MPIBAIJ(Mat A,Vec xx,Vec yy)
1214 {
1215   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1216   int         ierr,nt;
1217 
1218   PetscFunctionBegin;
1219   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1220   if (nt != a->n) {
1221     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
1222   }
1223   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1224   if (nt != a->m) {
1225     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy");
1226   }
1227   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1228   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1229   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1230   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1231   ierr = VecScatterPostRecvs(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1232   PetscFunctionReturn(0);
1233 }
1234 
1235 #undef __FUNC__
1236 #define __FUNC__ /*<a name=""></a>*/"MatMultAdd_MPIBAIJ"
1237 int MatMultAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1238 {
1239   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1240   int        ierr;
1241 
1242   PetscFunctionBegin;
1243   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1244   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1245   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1246   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1247   PetscFunctionReturn(0);
1248 }
1249 
1250 #undef __FUNC__
1251 #define __FUNC__ /*<a name=""></a>*/"MatMultTranspose_MPIBAIJ"
1252 int MatMultTranspose_MPIBAIJ(Mat A,Vec xx,Vec yy)
1253 {
1254   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1255   int         ierr;
1256 
1257   PetscFunctionBegin;
1258   /* do nondiagonal part */
1259   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1260   /* send it on its way */
1261   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1262   /* do local part */
1263   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1264   /* receive remote parts: note this assumes the values are not actually */
1265   /* inserted in yy until the next line, which is true for my implementation*/
1266   /* but is not perhaps always true. */
1267   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1268   PetscFunctionReturn(0);
1269 }
1270 
1271 #undef __FUNC__
1272 #define __FUNC__ /*<a name=""></a>*/"MatMultTransposeAdd_MPIBAIJ"
1273 int MatMultTransposeAdd_MPIBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1274 {
1275   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1276   int         ierr;
1277 
1278   PetscFunctionBegin;
1279   /* do nondiagonal part */
1280   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1281   /* send it on its way */
1282   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1283   /* do local part */
1284   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1285   /* receive remote parts: note this assumes the values are not actually */
1286   /* inserted in yy until the next line, which is true for my implementation*/
1287   /* but is not perhaps always true. */
1288   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1289   PetscFunctionReturn(0);
1290 }
1291 
1292 /*
1293   This only works correctly for square matrices where the subblock A->A is the
1294    diagonal block
1295 */
1296 #undef __FUNC__
1297 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonal_MPIBAIJ"
1298 int MatGetDiagonal_MPIBAIJ(Mat A,Vec v)
1299 {
1300   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1301   int         ierr;
1302 
1303   PetscFunctionBegin;
1304   if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block");
1305   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1306   PetscFunctionReturn(0);
1307 }
1308 
1309 #undef __FUNC__
1310 #define __FUNC__ /*<a name=""></a>*/"MatScale_MPIBAIJ"
1311 int MatScale_MPIBAIJ(Scalar *aa,Mat A)
1312 {
1313   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1314   int         ierr;
1315 
1316   PetscFunctionBegin;
1317   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1318   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 #undef __FUNC__
1323 #define __FUNC__ /*<a name=""></a>*/"MatGetSize_MPIBAIJ"
1324 int MatGetSize_MPIBAIJ(Mat matin,int *m,int *n)
1325 {
1326   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1327 
1328   PetscFunctionBegin;
1329   if (m) *m = mat->M;
1330   if (n) *n = mat->N;
1331   PetscFunctionReturn(0);
1332 }
1333 
1334 #undef __FUNC__
1335 #define __FUNC__ /*<a name=""></a>*/"MatGetLocalSize_MPIBAIJ"
1336 int MatGetLocalSize_MPIBAIJ(Mat matin,int *m,int *n)
1337 {
1338   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1339 
1340   PetscFunctionBegin;
1341   *m = mat->m; *n = mat->n;
1342   PetscFunctionReturn(0);
1343 }
1344 
1345 #undef __FUNC__
1346 #define __FUNC__ /*<a name=""></a>*/"MatGetOwnershipRange_MPIBAIJ"
1347 int MatGetOwnershipRange_MPIBAIJ(Mat matin,int *m,int *n)
1348 {
1349   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1350 
1351   PetscFunctionBegin;
1352   if (m) *m = mat->rstart*mat->bs;
1353   if (n) *n = mat->rend*mat->bs;
1354   PetscFunctionReturn(0);
1355 }
1356 
1357 #undef __FUNC__
1358 #define __FUNC__ /*<a name=""></a>*/"MatGetRow_MPIBAIJ"
1359 int MatGetRow_MPIBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1360 {
1361   Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)matin->data;
1362   Scalar     *vworkA,*vworkB,**pvA,**pvB,*v_p;
1363   int        bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1364   int        nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1365   int        *cmap,*idx_p,cstart = mat->cstart;
1366 
1367   PetscFunctionBegin;
1368   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1369   mat->getrowactive = PETSC_TRUE;
1370 
1371   if (!mat->rowvalues && (idx || v)) {
1372     /*
1373         allocate enough space to hold information from the longest row.
1374     */
1375     Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ*)mat->A->data,*Ba = (Mat_SeqBAIJ*)mat->B->data;
1376     int     max = 1,mbs = mat->mbs,tmp;
1377     for (i=0; i<mbs; i++) {
1378       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1379       if (max < tmp) { max = tmp; }
1380     }
1381     mat->rowvalues = (Scalar*)PetscMalloc(max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1382     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1383   }
1384 
1385   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows")
1386   lrow = row - brstart;
1387 
1388   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1389   if (!v)   {pvA = 0; pvB = 0;}
1390   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1391   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1392   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1393   nztot = nzA + nzB;
1394 
1395   cmap  = mat->garray;
1396   if (v  || idx) {
1397     if (nztot) {
1398       /* Sort by increasing column numbers, assuming A and B already sorted */
1399       int imark = -1;
1400       if (v) {
1401         *v = v_p = mat->rowvalues;
1402         for (i=0; i<nzB; i++) {
1403           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1404           else break;
1405         }
1406         imark = i;
1407         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1408         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1409       }
1410       if (idx) {
1411         *idx = idx_p = mat->rowindices;
1412         if (imark > -1) {
1413           for (i=0; i<imark; i++) {
1414             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1415           }
1416         } else {
1417           for (i=0; i<nzB; i++) {
1418             if (cmap[cworkB[i]/bs] < cstart)
1419               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1420             else break;
1421           }
1422           imark = i;
1423         }
1424         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1425         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1426       }
1427     } else {
1428       if (idx) *idx = 0;
1429       if (v)   *v   = 0;
1430     }
1431   }
1432   *nz = nztot;
1433   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1434   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1435   PetscFunctionReturn(0);
1436 }
1437 
1438 #undef __FUNC__
1439 #define __FUNC__ /*<a name=""></a>*/"MatRestoreRow_MPIBAIJ"
1440 int MatRestoreRow_MPIBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1441 {
1442   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1443 
1444   PetscFunctionBegin;
1445   if (baij->getrowactive == PETSC_FALSE) {
1446     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1447   }
1448   baij->getrowactive = PETSC_FALSE;
1449   PetscFunctionReturn(0);
1450 }
1451 
1452 #undef __FUNC__
1453 #define __FUNC__ /*<a name=""></a>*/"MatGetBlockSize_MPIBAIJ"
1454 int MatGetBlockSize_MPIBAIJ(Mat mat,int *bs)
1455 {
1456   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1457 
1458   PetscFunctionBegin;
1459   *bs = baij->bs;
1460   PetscFunctionReturn(0);
1461 }
1462 
1463 #undef __FUNC__
1464 #define __FUNC__ /*<a name=""></a>*/"MatZeroEntries_MPIBAIJ"
1465 int MatZeroEntries_MPIBAIJ(Mat A)
1466 {
1467   Mat_MPIBAIJ *l = (Mat_MPIBAIJ*)A->data;
1468   int         ierr;
1469 
1470   PetscFunctionBegin;
1471   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1472   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1473   PetscFunctionReturn(0);
1474 }
1475 
1476 #undef __FUNC__
1477 #define __FUNC__ /*<a name=""></a>*/"MatGetInfo_MPIBAIJ"
1478 int MatGetInfo_MPIBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1479 {
1480   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)matin->data;
1481   Mat         A = a->A,B = a->B;
1482   int         ierr;
1483   PetscReal   isend[5],irecv[5];
1484 
1485   PetscFunctionBegin;
1486   info->block_size     = (double)a->bs;
1487   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1488   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1489   isend[3] = info->memory;  isend[4] = info->mallocs;
1490   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1491   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1492   isend[3] += info->memory;  isend[4] += info->mallocs;
1493   if (flag == MAT_LOCAL) {
1494     info->nz_used      = isend[0];
1495     info->nz_allocated = isend[1];
1496     info->nz_unneeded  = isend[2];
1497     info->memory       = isend[3];
1498     info->mallocs      = isend[4];
1499   } else if (flag == MAT_GLOBAL_MAX) {
1500     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1501     info->nz_used      = irecv[0];
1502     info->nz_allocated = irecv[1];
1503     info->nz_unneeded  = irecv[2];
1504     info->memory       = irecv[3];
1505     info->mallocs      = irecv[4];
1506   } else if (flag == MAT_GLOBAL_SUM) {
1507     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1508     info->nz_used      = irecv[0];
1509     info->nz_allocated = irecv[1];
1510     info->nz_unneeded  = irecv[2];
1511     info->memory       = irecv[3];
1512     info->mallocs      = irecv[4];
1513   } else {
1514     SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag);
1515   }
1516   info->rows_global       = (double)a->M;
1517   info->columns_global    = (double)a->N;
1518   info->rows_local        = (double)a->m;
1519   info->columns_local     = (double)a->N;
1520   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1521   info->fill_ratio_needed = 0;
1522   info->factor_mallocs    = 0;
1523   PetscFunctionReturn(0);
1524 }
1525 
1526 #undef __FUNC__
1527 #define __FUNC__ /*<a name=""></a>*/"MatSetOption_MPIBAIJ"
1528 int MatSetOption_MPIBAIJ(Mat A,MatOption op)
1529 {
1530   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1531   int         ierr;
1532 
1533   PetscFunctionBegin;
1534   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1535       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1536       op == MAT_COLUMNS_UNSORTED ||
1537       op == MAT_COLUMNS_SORTED ||
1538       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1539       op == MAT_KEEP_ZEROED_ROWS ||
1540       op == MAT_NEW_NONZERO_LOCATION_ERR) {
1541         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1542         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1543   } else if (op == MAT_ROW_ORIENTED) {
1544         a->roworiented = PETSC_TRUE;
1545         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1546         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1547   } else if (op == MAT_ROWS_SORTED ||
1548              op == MAT_ROWS_UNSORTED ||
1549              op == MAT_SYMMETRIC ||
1550              op == MAT_STRUCTURALLY_SYMMETRIC ||
1551              op == MAT_YES_NEW_DIAGONALS ||
1552              op == MAT_USE_HASH_TABLE) {
1553     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1554   } else if (op == MAT_COLUMN_ORIENTED) {
1555     a->roworiented = PETSC_FALSE;
1556     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1557     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1558   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1559     a->donotstash = PETSC_TRUE;
1560   } else if (op == MAT_NO_NEW_DIAGONALS) {
1561     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1562   } else if (op == MAT_USE_HASH_TABLE) {
1563     a->ht_flag = PETSC_TRUE;
1564   } else {
1565     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1566   }
1567   PetscFunctionReturn(0);
1568 }
1569 
1570 #undef __FUNC__
1571 #define __FUNC__ /*<a name=""></a>*/"MatTranspose_MPIBAIJ("
1572 int MatTranspose_MPIBAIJ(Mat A,Mat *matout)
1573 {
1574   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data;
1575   Mat_SeqBAIJ *Aloc;
1576   Mat         B;
1577   int         ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col;
1578   int         bs=baij->bs,mbs=baij->mbs;
1579   MatScalar   *a;
1580 
1581   PetscFunctionBegin;
1582   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1583   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1584 
1585   /* copy over the A part */
1586   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1587   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1588   rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
1589 
1590   for (i=0; i<mbs; i++) {
1591     rvals[0] = bs*(baij->rstart + i);
1592     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1593     for (j=ai[i]; j<ai[i+1]; j++) {
1594       col = (baij->cstart+aj[j])*bs;
1595       for (k=0; k<bs; k++) {
1596         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1597         col++; a += bs;
1598       }
1599     }
1600   }
1601   /* copy over the B part */
1602   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1603   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1604   for (i=0; i<mbs; i++) {
1605     rvals[0] = bs*(baij->rstart + i);
1606     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1607     for (j=ai[i]; j<ai[i+1]; j++) {
1608       col = baij->garray[aj[j]]*bs;
1609       for (k=0; k<bs; k++) {
1610         ierr = MatSetValues_MPIBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1611         col++; a += bs;
1612       }
1613     }
1614   }
1615   ierr = PetscFree(rvals);CHKERRQ(ierr);
1616   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1617   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1618 
1619   if (matout) {
1620     *matout = B;
1621   } else {
1622     PetscOps *Abops;
1623     MatOps   Aops;
1624 
1625     /* This isn't really an in-place transpose .... but free data structures from baij */
1626     ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1627     ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1628     ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1629 #if defined (PETSC_USE_CTABLE)
1630     if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1631 #else
1632     if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1633 #endif
1634     if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1635     if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1636     if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1637     ierr = PetscFree(baij);CHKERRQ(ierr);
1638 
1639     /*
1640        This is horrible, horrible code. We need to keep the
1641       A pointers for the bops and ops but copy everything
1642       else from C.
1643     */
1644     Abops   = A->bops;
1645     Aops    = A->ops;
1646     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
1647     A->bops = Abops;
1648     A->ops  = Aops;
1649 
1650     PetscHeaderDestroy(B);
1651   }
1652   PetscFunctionReturn(0);
1653 }
1654 
1655 #undef __FUNC__
1656 #define __FUNC__ /*<a name=""></a>*/"MatDiagonalScale_MPIBAIJ"
1657 int MatDiagonalScale_MPIBAIJ(Mat mat,Vec ll,Vec rr)
1658 {
1659   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
1660   Mat         a = baij->A,b = baij->B;
1661   int         ierr,s1,s2,s3;
1662 
1663   PetscFunctionBegin;
1664   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1665   if (rr) {
1666     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1667     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
1668     /* Overlap communication with computation. */
1669     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1670   }
1671   if (ll) {
1672     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1673     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1674     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1675   }
1676   /* scale  the diagonal block */
1677   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1678 
1679   if (rr) {
1680     /* Do a scatter end and then right scale the off-diagonal block */
1681     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1682     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1683   }
1684 
1685   PetscFunctionReturn(0);
1686 }
1687 
1688 #undef __FUNC__
1689 #define __FUNC__ /*<a name=""></a>*/"MatZeroRows_MPIBAIJ"
1690 int MatZeroRows_MPIBAIJ(Mat A,IS is,Scalar *diag)
1691 {
1692   Mat_MPIBAIJ    *l = (Mat_MPIBAIJ*)A->data;
1693   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
1694   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
1695   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1696   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1697   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1698   MPI_Comm       comm = A->comm;
1699   MPI_Request    *send_waits,*recv_waits;
1700   MPI_Status     recv_status,*send_status;
1701   IS             istmp;
1702 
1703   PetscFunctionBegin;
1704   ierr = ISGetLocalSize(is,&N);CHKERRQ(ierr);
1705   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1706 
1707   /*  first count number of contributors to each processor */
1708   nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs);
1709   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
1710   procs  = nprocs + size;
1711   owner  = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
1712   for (i=0; i<N; i++) {
1713     idx   = rows[i];
1714     found = 0;
1715     for (j=0; j<size; j++) {
1716       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1717         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
1718       }
1719     }
1720     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
1721   }
1722   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
1723 
1724   /* inform other processors of number of messages and max length*/
1725   work   = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work);
1726   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
1727   nmax   = work[rank];
1728   nrecvs = work[size+rank];
1729   ierr = PetscFree(work);CHKERRQ(ierr);
1730 
1731   /* post receives:   */
1732   rvalues = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
1733   recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
1734   for (i=0; i<nrecvs; i++) {
1735     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1736   }
1737 
1738   /* do sends:
1739      1) starts[i] gives the starting index in svalues for stuff going to
1740      the ith processor
1741   */
1742   svalues    = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues);
1743   send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
1744   starts     = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts);
1745   starts[0]  = 0;
1746   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1747   for (i=0; i<N; i++) {
1748     svalues[starts[owner[i]]++] = rows[i];
1749   }
1750   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
1751 
1752   starts[0] = 0;
1753   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1754   count = 0;
1755   for (i=0; i<size; i++) {
1756     if (procs[i]) {
1757       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1758     }
1759   }
1760   ierr = PetscFree(starts);CHKERRQ(ierr);
1761 
1762   base = owners[rank]*bs;
1763 
1764   /*  wait on receives */
1765   lens   = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens);
1766   source = lens + nrecvs;
1767   count  = nrecvs; slen = 0;
1768   while (count) {
1769     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1770     /* unpack receives into our local space */
1771     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1772     source[imdex]  = recv_status.MPI_SOURCE;
1773     lens[imdex]    = n;
1774     slen          += n;
1775     count--;
1776   }
1777   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1778 
1779   /* move the data into the send scatter */
1780   lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows);
1781   count = 0;
1782   for (i=0; i<nrecvs; i++) {
1783     values = rvalues + i*nmax;
1784     for (j=0; j<lens[i]; j++) {
1785       lrows[count++] = values[j] - base;
1786     }
1787   }
1788   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1789   ierr = PetscFree(lens);CHKERRQ(ierr);
1790   ierr = PetscFree(owner);CHKERRQ(ierr);
1791   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1792 
1793   /* actually zap the local rows */
1794   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1795   PLogObjectParent(A,istmp);
1796 
1797   /*
1798         Zero the required rows. If the "diagonal block" of the matrix
1799      is square and the user wishes to set the diagonal we use seperate
1800      code so that MatSetValues() is not called for each diagonal allocating
1801      new memory, thus calling lots of mallocs and slowing things down.
1802 
1803        Contributed by: Mathew Knepley
1804   */
1805   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1806   ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr);
1807   if (diag && (l->A->M == l->A->N)) {
1808     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,diag);CHKERRQ(ierr);
1809   } else if (diag) {
1810     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1811     if (((Mat_SeqBAIJ*)l->A->data)->nonew) {
1812       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1813 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1814     }
1815     for (i=0; i<slen; i++) {
1816       row  = lrows[i] + rstart_bs;
1817       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1818     }
1819     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1820     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1821   } else {
1822     ierr = MatZeroRows_SeqBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1823   }
1824 
1825   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1826   ierr = PetscFree(lrows);CHKERRQ(ierr);
1827 
1828   /* wait on sends */
1829   if (nsends) {
1830     send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1831     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1832     ierr        = PetscFree(send_status);CHKERRQ(ierr);
1833   }
1834   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1835   ierr = PetscFree(svalues);CHKERRQ(ierr);
1836 
1837   PetscFunctionReturn(0);
1838 }
1839 
1840 #undef __FUNC__
1841 #define __FUNC__ /*<a name=""></a>*/"MatPrintHelp_MPIBAIJ"
1842 int MatPrintHelp_MPIBAIJ(Mat A)
1843 {
1844   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1845   MPI_Comm    comm = A->comm;
1846   static int  called = 0;
1847   int         ierr;
1848 
1849   PetscFunctionBegin;
1850   if (!a->rank) {
1851     ierr = MatPrintHelp_SeqBAIJ(a->A);CHKERRQ(ierr);
1852   }
1853   if (called) {PetscFunctionReturn(0);} else called = 1;
1854   ierr = (*PetscHelpPrintf)(comm," Options for MATMPIBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1855   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1856   PetscFunctionReturn(0);
1857 }
1858 
1859 #undef __FUNC__
1860 #define __FUNC__ /*<a name=""></a>*/"MatSetUnfactored_MPIBAIJ"
1861 int MatSetUnfactored_MPIBAIJ(Mat A)
1862 {
1863   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1864   int         ierr;
1865 
1866   PetscFunctionBegin;
1867   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1868   PetscFunctionReturn(0);
1869 }
1870 
1871 static int MatDuplicate_MPIBAIJ(Mat,MatDuplicateOption,Mat *);
1872 
1873 #undef __FUNC__
1874 #define __FUNC__ /*<a name=""></a>*/"MatEqual_MPIBAIJ"
1875 int MatEqual_MPIBAIJ(Mat A,Mat B,PetscTruth *flag)
1876 {
1877   Mat_MPIBAIJ *matB = (Mat_MPIBAIJ*)B->data,*matA = (Mat_MPIBAIJ*)A->data;
1878   Mat         a,b,c,d;
1879   PetscTruth  flg;
1880   int         ierr;
1881 
1882   PetscFunctionBegin;
1883   if (B->type != MATMPIBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1884   a = matA->A; b = matA->B;
1885   c = matB->A; d = matB->B;
1886 
1887   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1888   if (flg == PETSC_TRUE) {
1889     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1890   }
1891   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1892   PetscFunctionReturn(0);
1893 }
1894 
1895 /* -------------------------------------------------------------------*/
1896 static struct _MatOps MatOps_Values = {
1897   MatSetValues_MPIBAIJ,
1898   MatGetRow_MPIBAIJ,
1899   MatRestoreRow_MPIBAIJ,
1900   MatMult_MPIBAIJ,
1901   MatMultAdd_MPIBAIJ,
1902   MatMultTranspose_MPIBAIJ,
1903   MatMultTransposeAdd_MPIBAIJ,
1904   0,
1905   0,
1906   0,
1907   0,
1908   0,
1909   0,
1910   0,
1911   MatTranspose_MPIBAIJ,
1912   MatGetInfo_MPIBAIJ,
1913   MatEqual_MPIBAIJ,
1914   MatGetDiagonal_MPIBAIJ,
1915   MatDiagonalScale_MPIBAIJ,
1916   MatNorm_MPIBAIJ,
1917   MatAssemblyBegin_MPIBAIJ,
1918   MatAssemblyEnd_MPIBAIJ,
1919   0,
1920   MatSetOption_MPIBAIJ,
1921   MatZeroEntries_MPIBAIJ,
1922   MatZeroRows_MPIBAIJ,
1923   0,
1924   0,
1925   0,
1926   0,
1927   MatGetSize_MPIBAIJ,
1928   MatGetLocalSize_MPIBAIJ,
1929   MatGetOwnershipRange_MPIBAIJ,
1930   0,
1931   0,
1932   0,
1933   0,
1934   MatDuplicate_MPIBAIJ,
1935   0,
1936   0,
1937   0,
1938   0,
1939   0,
1940   MatGetSubMatrices_MPIBAIJ,
1941   MatIncreaseOverlap_MPIBAIJ,
1942   MatGetValues_MPIBAIJ,
1943   0,
1944   MatPrintHelp_MPIBAIJ,
1945   MatScale_MPIBAIJ,
1946   0,
1947   0,
1948   0,
1949   MatGetBlockSize_MPIBAIJ,
1950   0,
1951   0,
1952   0,
1953   0,
1954   0,
1955   0,
1956   MatSetUnfactored_MPIBAIJ,
1957   0,
1958   MatSetValuesBlocked_MPIBAIJ,
1959   0,
1960   MatDestroy_MPIBAIJ,
1961   MatView_MPIBAIJ,
1962   MatGetMaps_Petsc};
1963 
1964 
1965 EXTERN_C_BEGIN
1966 #undef __FUNC__
1967 #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonalBlock_MPIBAIJ"
1968 int MatGetDiagonalBlock_MPIBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1969 {
1970   PetscFunctionBegin;
1971   *a      = ((Mat_MPIBAIJ *)A->data)->A;
1972   *iscopy = PETSC_FALSE;
1973   PetscFunctionReturn(0);
1974 }
1975 EXTERN_C_END
1976 
1977 #undef __FUNC__
1978 #define __FUNC__ /*<a name=""></a>*/"MatCreateMPIBAIJ"
1979 /*@C
1980    MatCreateMPIBAIJ - Creates a sparse parallel matrix in block AIJ format
1981    (block compressed row).  For good matrix assembly performance
1982    the user should preallocate the matrix storage by setting the parameters
1983    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1984    performance can be increased by more than a factor of 50.
1985 
1986    Collective on MPI_Comm
1987 
1988    Input Parameters:
1989 +  comm - MPI communicator
1990 .  bs   - size of blockk
1991 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1992            This value should be the same as the local size used in creating the
1993            y vector for the matrix-vector product y = Ax.
1994 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1995            This value should be the same as the local size used in creating the
1996            x vector for the matrix-vector product y = Ax.
1997 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1998 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1999 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2000            submatrix  (same for all local rows)
2001 .  d_nnz - array containing the number of block nonzeros in the various block rows
2002            of the in diagonal portion of the local (possibly different for each block
2003            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2004 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2005            submatrix (same for all local rows).
2006 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2007            off-diagonal portion of the local submatrix (possibly different for
2008            each block row) or PETSC_NULL.
2009 
2010    Output Parameter:
2011 .  A - the matrix
2012 
2013    Options Database Keys:
2014 .   -mat_no_unroll - uses code that does not unroll the loops in the
2015                      block calculations (much slower)
2016 .   -mat_block_size - size of the blocks to use
2017 .   -mat_mpi - use the parallel matrix data structures even on one processor
2018                (defaults to using SeqBAIJ format on one processor)
2019 
2020    Notes:
2021    The user MUST specify either the local or global matrix dimensions
2022    (possibly both).
2023 
2024    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2025    than it must be used on all processors that share the object for that argument.
2026 
2027    Storage Information:
2028    For a square global matrix we define each processor's diagonal portion
2029    to be its local rows and the corresponding columns (a square submatrix);
2030    each processor's off-diagonal portion encompasses the remainder of the
2031    local matrix (a rectangular submatrix).
2032 
2033    The user can specify preallocated storage for the diagonal part of
2034    the local submatrix with either d_nz or d_nnz (not both).  Set
2035    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2036    memory allocation.  Likewise, specify preallocated storage for the
2037    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2038 
2039    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2040    the figure below we depict these three local rows and all columns (0-11).
2041 
2042 .vb
2043            0 1 2 3 4 5 6 7 8 9 10 11
2044           -------------------
2045    row 3  |  o o o d d d o o o o o o
2046    row 4  |  o o o d d d o o o o o o
2047    row 5  |  o o o d d d o o o o o o
2048           -------------------
2049 .ve
2050 
2051    Thus, any entries in the d locations are stored in the d (diagonal)
2052    submatrix, and any entries in the o locations are stored in the
2053    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2054    stored simply in the MATSEQBAIJ format for compressed row storage.
2055 
2056    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2057    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2058    In general, for PDE problems in which most nonzeros are near the diagonal,
2059    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2060    or you will get TERRIBLE performance; see the users' manual chapter on
2061    matrices.
2062 
2063    Level: intermediate
2064 
2065 .keywords: matrix, block, aij, compressed row, sparse, parallel
2066 
2067 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2068 @*/
2069 int MatCreateMPIBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2070 {
2071   Mat          B;
2072   Mat_MPIBAIJ  *b;
2073   int          ierr,i,sum[2],work[2],mbs,nbs,Mbs=PETSC_DECIDE,Nbs=PETSC_DECIDE,size;
2074   PetscTruth   flag1,flag2,flg;
2075 
2076   PetscFunctionBegin;
2077   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2078 
2079   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive");
2080   if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -1: value %d",d_nz);
2081   if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -1: value %d",o_nz);
2082   if (d_nnz) {
2083     for (i=0; i<m/bs; i++) {
2084       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
2085     }
2086   }
2087   if (o_nnz) {
2088     for (i=0; i<m/bs; i++) {
2089       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
2090     }
2091   }
2092 
2093   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2094   ierr = OptionsHasName(PETSC_NULL,"-mat_mpibaij",&flag1);CHKERRQ(ierr);
2095   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr);
2096   if (!flag1 && !flag2 && size == 1) {
2097     if (M == PETSC_DECIDE) M = m;
2098     if (N == PETSC_DECIDE) N = n;
2099     ierr = MatCreateSeqBAIJ(comm,bs,M,N,d_nz,d_nnz,A);CHKERRQ(ierr);
2100     PetscFunctionReturn(0);
2101   }
2102 
2103   *A = 0;
2104   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",comm,MatDestroy,MatView);
2105   PLogObjectCreate(B);
2106   B->data = (void*)(b = PetscNew(Mat_MPIBAIJ));CHKPTRQ(b);
2107   ierr    = PetscMemzero(b,sizeof(Mat_MPIBAIJ));CHKERRQ(ierr);
2108   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2109   B->mapping    = 0;
2110   B->factor     = 0;
2111   B->assembled  = PETSC_FALSE;
2112 
2113   B->insertmode = NOT_SET_VALUES;
2114   ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr);
2115   ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr);
2116 
2117   if (m == PETSC_DECIDE && (d_nnz || o_nnz)) {
2118     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
2119   }
2120   if (M == PETSC_DECIDE && m == PETSC_DECIDE) {
2121     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified");
2122   }
2123   if (N == PETSC_DECIDE && n == PETSC_DECIDE) {
2124     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either N or n should be specified");
2125   }
2126   if (M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
2127   if (N != PETSC_DECIDE && n != PETSC_DECIDE) N = PETSC_DECIDE;
2128 
2129   if (M == PETSC_DECIDE || N == PETSC_DECIDE) {
2130     work[0] = m; work[1] = n;
2131     mbs = m/bs; nbs = n/bs;
2132     ierr = MPI_Allreduce(work,sum,2,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2133     if (M == PETSC_DECIDE) {M = sum[0]; Mbs = M/bs;}
2134     if (N == PETSC_DECIDE) {N = sum[1]; Nbs = N/bs;}
2135   }
2136   if (m == PETSC_DECIDE) {
2137     Mbs = M/bs;
2138     if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize");
2139     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
2140     m   = mbs*bs;
2141   }
2142   if (n == PETSC_DECIDE) {
2143     Nbs = N/bs;
2144     if (Nbs*bs != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global cols must be divisible by blocksize");
2145     nbs = Nbs/b->size + ((Nbs % b->size) > b->rank);
2146     n   = nbs*bs;
2147   }
2148   if (mbs*bs != m || nbs*bs != n) {
2149     SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows, cols must be divisible by blocksize");
2150   }
2151 
2152   b->m = m; B->m = m;
2153   b->n = n; B->n = n;
2154   b->N = N; B->N = N;
2155   b->M = M; B->M = M;
2156   b->bs  = bs;
2157   b->bs2 = bs*bs;
2158   b->mbs = mbs;
2159   b->nbs = nbs;
2160   b->Mbs = Mbs;
2161   b->Nbs = Nbs;
2162 
2163   /* the information in the maps duplicates the information computed below, eventually
2164      we should remove the duplicate information that is not contained in the maps */
2165   ierr = MapCreateMPI(B->comm,m,M,&B->rmap);CHKERRQ(ierr);
2166   ierr = MapCreateMPI(B->comm,n,N,&B->cmap);CHKERRQ(ierr);
2167 
2168   /* build local table of row and column ownerships */
2169   b->rowners = (int*)PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners);
2170   PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2171   b->cowners    = b->rowners + b->size + 2;
2172   b->rowners_bs = b->cowners + b->size + 2;
2173   ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2174   b->rowners[0]    = 0;
2175   for (i=2; i<=b->size; i++) {
2176     b->rowners[i] += b->rowners[i-1];
2177   }
2178   for (i=0; i<=b->size; i++) {
2179     b->rowners_bs[i] = b->rowners[i]*bs;
2180   }
2181   b->rstart    = b->rowners[b->rank];
2182   b->rend      = b->rowners[b->rank+1];
2183   b->rstart_bs = b->rstart * bs;
2184   b->rend_bs   = b->rend * bs;
2185 
2186   ierr = MPI_Allgather(&nbs,1,MPI_INT,b->cowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2187   b->cowners[0] = 0;
2188   for (i=2; i<=b->size; i++) {
2189     b->cowners[i] += b->cowners[i-1];
2190   }
2191   b->cstart    = b->cowners[b->rank];
2192   b->cend      = b->cowners[b->rank+1];
2193   b->cstart_bs = b->cstart * bs;
2194   b->cend_bs   = b->cend * bs;
2195 
2196 
2197   if (d_nz == PETSC_DEFAULT) d_nz = 5;
2198   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,n,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2199   PLogObjectParent(B,b->A);
2200   if (o_nz == PETSC_DEFAULT) o_nz = 0;
2201   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,N,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2202   PLogObjectParent(B,b->B);
2203 
2204   /* build cache for off array entries formed */
2205   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
2206   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
2207   b->donotstash  = PETSC_FALSE;
2208   b->colmap      = PETSC_NULL;
2209   b->garray      = PETSC_NULL;
2210   b->roworiented = PETSC_TRUE;
2211 
2212 #if defined(PEYSC_USE_MAT_SINGLE)
2213   /* stuff for MatSetValues_XXX in single precision */
2214   b->lensetvalues     = 0;
2215   b->setvaluescopy    = PETSC_NULL;
2216 #endif
2217 
2218   /* stuff used in block assembly */
2219   b->barray       = 0;
2220 
2221   /* stuff used for matrix vector multiply */
2222   b->lvec         = 0;
2223   b->Mvctx        = 0;
2224 
2225   /* stuff for MatGetRow() */
2226   b->rowindices   = 0;
2227   b->rowvalues    = 0;
2228   b->getrowactive = PETSC_FALSE;
2229 
2230   /* hash table stuff */
2231   b->ht           = 0;
2232   b->hd           = 0;
2233   b->ht_size      = 0;
2234   b->ht_flag      = PETSC_FALSE;
2235   b->ht_fact      = 0;
2236   b->ht_total_ct  = 0;
2237   b->ht_insert_ct = 0;
2238 
2239   *A = B;
2240   ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2241   if (flg) {
2242     double fact = 1.39;
2243     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2244     ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2245     if (fact <= 1.0) fact = 1.39;
2246     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2247     PLogInfo(0,"MatCreateMPIBAIJ:Hash table Factor used %5.2f\n",fact);
2248   }
2249   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2250                                      "MatStoreValues_MPIBAIJ",
2251                                      MatStoreValues_MPIBAIJ);CHKERRQ(ierr);
2252   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2253                                      "MatRetrieveValues_MPIBAIJ",
2254                                      MatRetrieveValues_MPIBAIJ);CHKERRQ(ierr);
2255   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2256                                      "MatGetDiagonalBlock_MPIBAIJ",
2257                                      MatGetDiagonalBlock_MPIBAIJ);CHKERRQ(ierr);
2258   PetscFunctionReturn(0);
2259 }
2260 
2261 #undef __FUNC__
2262 #define __FUNC__ /*<a name=""></a>*/"MatDuplicate_MPIBAIJ"
2263 static int MatDuplicate_MPIBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2264 {
2265   Mat         mat;
2266   Mat_MPIBAIJ *a,*oldmat = (Mat_MPIBAIJ*)matin->data;
2267   int         ierr,len=0;
2268   PetscTruth  flg;
2269 
2270   PetscFunctionBegin;
2271   *newmat       = 0;
2272   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPIBAIJ,"Mat",matin->comm,MatDestroy,MatView);
2273   PLogObjectCreate(mat);
2274   mat->data         = (void*)(a = PetscNew(Mat_MPIBAIJ));CHKPTRQ(a);
2275   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2276   mat->factor       = matin->factor;
2277   mat->assembled    = PETSC_TRUE;
2278   mat->insertmode   = NOT_SET_VALUES;
2279 
2280   a->m = mat->m   = oldmat->m;
2281   a->n = mat->n   = oldmat->n;
2282   a->M = mat->M   = oldmat->M;
2283   a->N = mat->N   = oldmat->N;
2284 
2285   a->bs  = oldmat->bs;
2286   a->bs2 = oldmat->bs2;
2287   a->mbs = oldmat->mbs;
2288   a->nbs = oldmat->nbs;
2289   a->Mbs = oldmat->Mbs;
2290   a->Nbs = oldmat->Nbs;
2291 
2292   a->rstart       = oldmat->rstart;
2293   a->rend         = oldmat->rend;
2294   a->cstart       = oldmat->cstart;
2295   a->cend         = oldmat->cend;
2296   a->size         = oldmat->size;
2297   a->rank         = oldmat->rank;
2298   a->donotstash   = oldmat->donotstash;
2299   a->roworiented  = oldmat->roworiented;
2300   a->rowindices   = 0;
2301   a->rowvalues    = 0;
2302   a->getrowactive = PETSC_FALSE;
2303   a->barray       = 0;
2304   a->rstart_bs    = oldmat->rstart_bs;
2305   a->rend_bs      = oldmat->rend_bs;
2306   a->cstart_bs    = oldmat->cstart_bs;
2307   a->cend_bs      = oldmat->cend_bs;
2308 
2309   /* hash table stuff */
2310   a->ht           = 0;
2311   a->hd           = 0;
2312   a->ht_size      = 0;
2313   a->ht_flag      = oldmat->ht_flag;
2314   a->ht_fact      = oldmat->ht_fact;
2315   a->ht_total_ct  = 0;
2316   a->ht_insert_ct = 0;
2317 
2318 
2319   a->rowners = (int*)PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
2320   PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPIBAIJ));
2321   a->cowners    = a->rowners + a->size + 2;
2322   a->rowners_bs = a->cowners + a->size + 2;
2323   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2324   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2325   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2326   if (oldmat->colmap) {
2327 #if defined (PETSC_USE_CTABLE)
2328   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2329 #else
2330     a->colmap = (int*)PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
2331     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2332     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2333 #endif
2334   } else a->colmap = 0;
2335   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2336     a->garray = (int*)PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray);
2337     PLogObjectMemory(mat,len*sizeof(int));
2338     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2339   } else a->garray = 0;
2340 
2341   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2342   PLogObjectParent(mat,a->lvec);
2343   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2344 
2345   PLogObjectParent(mat,a->Mvctx);
2346   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2347   PLogObjectParent(mat,a->A);
2348   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2349   PLogObjectParent(mat,a->B);
2350   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
2351   ierr = FListDuplicate(matin->qlist,&mat->qlist);CHKERRQ(ierr);
2352   if (flg) {
2353     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
2354   }
2355   *newmat = mat;
2356   PetscFunctionReturn(0);
2357 }
2358 
2359 #include "petscsys.h"
2360 
2361 #undef __FUNC__
2362 #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPIBAIJ"
2363 int MatLoad_MPIBAIJ(Viewer viewer,MatType type,Mat *newmat)
2364 {
2365   Mat          A;
2366   int          i,nz,ierr,j,rstart,rend,fd;
2367   Scalar       *vals,*buf;
2368   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2369   MPI_Status   status;
2370   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2371   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2372   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2373   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2374   int          dcount,kmax,k,nzcount,tmp;
2375 
2376   PetscFunctionBegin;
2377   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2378 
2379   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2380   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2381   if (!rank) {
2382     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2383     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2384     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
2385     if (header[3] < 0) {
2386       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPIBAIJ");
2387     }
2388   }
2389 
2390   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2391   M = header[1]; N = header[2];
2392 
2393   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
2394 
2395   /*
2396      This code adds extra rows to make sure the number of rows is
2397      divisible by the blocksize
2398   */
2399   Mbs        = M/bs;
2400   extra_rows = bs - M + bs*(Mbs);
2401   if (extra_rows == bs) extra_rows = 0;
2402   else                  Mbs++;
2403   if (extra_rows &&!rank) {
2404     PLogInfo(0,"MatLoad_MPIBAIJ:Padding loaded matrix to match blocksize\n");
2405   }
2406 
2407   /* determine ownership of all rows */
2408   mbs = Mbs/size + ((Mbs % size) > rank);
2409   m   = mbs * bs;
2410   rowners = (int*)PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners);
2411   browners = rowners + size + 1;
2412   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2413   rowners[0] = 0;
2414   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2415   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2416   rstart = rowners[rank];
2417   rend   = rowners[rank+1];
2418 
2419   /* distribute row lengths to all processors */
2420   locrowlens = (int*)PetscMalloc((rend-rstart)*bs*sizeof(int));CHKPTRQ(locrowlens);
2421   if (!rank) {
2422     rowlengths = (int*)PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
2423     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2424     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2425     sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts);
2426     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2427     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2428     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2429   } else {
2430     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2431   }
2432 
2433   if (!rank) {
2434     /* calculate the number of nonzeros on each processor */
2435     procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz);
2436     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2437     for (i=0; i<size; i++) {
2438       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2439         procsnz[i] += rowlengths[j];
2440       }
2441     }
2442     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2443 
2444     /* determine max buffer needed and allocate it */
2445     maxnz = 0;
2446     for (i=0; i<size; i++) {
2447       maxnz = PetscMax(maxnz,procsnz[i]);
2448     }
2449     cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols);
2450 
2451     /* read in my part of the matrix column indices  */
2452     nz = procsnz[0];
2453     ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf);
2454     mycols = ibuf;
2455     if (size == 1)  nz -= extra_rows;
2456     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2457     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2458 
2459     /* read in every ones (except the last) and ship off */
2460     for (i=1; i<size-1; i++) {
2461       nz   = procsnz[i];
2462       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2463       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2464     }
2465     /* read in the stuff for the last proc */
2466     if (size != 1) {
2467       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2468       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2469       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2470       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2471     }
2472     ierr = PetscFree(cols);CHKERRQ(ierr);
2473   } else {
2474     /* determine buffer space needed for message */
2475     nz = 0;
2476     for (i=0; i<m; i++) {
2477       nz += locrowlens[i];
2478     }
2479     ibuf   = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf);
2480     mycols = ibuf;
2481     /* receive message of column indices*/
2482     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2483     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2484     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2485   }
2486 
2487   /* loop over local rows, determining number of off diagonal entries */
2488   dlens  = (int*)PetscMalloc(2*(rend-rstart+1)*sizeof(int));CHKPTRQ(dlens);
2489   odlens = dlens + (rend-rstart);
2490   mask   = (int*)PetscMalloc(3*Mbs*sizeof(int));CHKPTRQ(mask);
2491   ierr   = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2492   masked1 = mask    + Mbs;
2493   masked2 = masked1 + Mbs;
2494   rowcount = 0; nzcount = 0;
2495   for (i=0; i<mbs; i++) {
2496     dcount  = 0;
2497     odcount = 0;
2498     for (j=0; j<bs; j++) {
2499       kmax = locrowlens[rowcount];
2500       for (k=0; k<kmax; k++) {
2501         tmp = mycols[nzcount++]/bs;
2502         if (!mask[tmp]) {
2503           mask[tmp] = 1;
2504           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2505           else masked1[dcount++] = tmp;
2506         }
2507       }
2508       rowcount++;
2509     }
2510 
2511     dlens[i]  = dcount;
2512     odlens[i] = odcount;
2513 
2514     /* zero out the mask elements we set */
2515     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2516     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2517   }
2518 
2519   /* create our matrix */
2520   ierr = MatCreateMPIBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr);
2521   A = *newmat;
2522   MatSetOption(A,MAT_COLUMNS_SORTED);
2523 
2524   if (!rank) {
2525     buf = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(buf);
2526     /* read in my part of the matrix numerical values  */
2527     nz = procsnz[0];
2528     vals = buf;
2529     mycols = ibuf;
2530     if (size == 1)  nz -= extra_rows;
2531     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2532     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2533 
2534     /* insert into matrix */
2535     jj      = rstart*bs;
2536     for (i=0; i<m; i++) {
2537       ierr = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2538       mycols += locrowlens[i];
2539       vals   += locrowlens[i];
2540       jj++;
2541     }
2542     /* read in other processors (except the last one) and ship out */
2543     for (i=1; i<size-1; i++) {
2544       nz   = procsnz[i];
2545       vals = buf;
2546       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2547       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2548     }
2549     /* the last proc */
2550     if (size != 1){
2551       nz   = procsnz[i] - extra_rows;
2552       vals = buf;
2553       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2554       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2555       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2556     }
2557     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2558   } else {
2559     /* receive numeric values */
2560     buf = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(buf);
2561 
2562     /* receive message of values*/
2563     vals   = buf;
2564     mycols = ibuf;
2565     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2566     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2567     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2568 
2569     /* insert into matrix */
2570     jj      = rstart*bs;
2571     for (i=0; i<m; i++) {
2572       ierr    = MatSetValues_MPIBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2573       mycols += locrowlens[i];
2574       vals   += locrowlens[i];
2575       jj++;
2576     }
2577   }
2578   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2579   ierr = PetscFree(buf);CHKERRQ(ierr);
2580   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2581   ierr = PetscFree(rowners);CHKERRQ(ierr);
2582   ierr = PetscFree(dlens);CHKERRQ(ierr);
2583   ierr = PetscFree(mask);CHKERRQ(ierr);
2584   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2585   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2586   PetscFunctionReturn(0);
2587 }
2588 
2589 #undef __FUNC__
2590 #define __FUNC__ /*<a name=""></a>*/"MatMPIBAIJSetHashTableFactor"
2591 /*@
2592    MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2593 
2594    Input Parameters:
2595 .  mat  - the matrix
2596 .  fact - factor
2597 
2598    Collective on Mat
2599 
2600    Level: advanced
2601 
2602   Notes:
2603    This can also be set by the command line option: -mat_use_hash_table fact
2604 
2605 .keywords: matrix, hashtable, factor, HT
2606 
2607 .seealso: MatSetOption()
2608 @*/
2609 int MatMPIBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2610 {
2611   Mat_MPIBAIJ *baij;
2612 
2613   PetscFunctionBegin;
2614   PetscValidHeaderSpecific(mat,MAT_COOKIE);
2615   if (mat->type != MATMPIBAIJ) {
2616     SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only.");
2617   }
2618   baij = (Mat_MPIBAIJ*)mat->data;
2619   baij->ht_fact = fact;
2620   PetscFunctionReturn(0);
2621 }
2622