xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision a30f8f8c256dec6207be9fad5c6bf9cc277d94e2)
1*a30f8f8cSSatish Balay /*$Id: mpisbaij.c,v 1.195 2000/04/20 03:27:54 bsmith Exp $*/
2*a30f8f8cSSatish Balay 
3*a30f8f8cSSatish Balay #include "src/mat/impls/baij/mpi/mpibaij.h"
4*a30f8f8cSSatish Balay #include "src/vec/vecimpl.h"
5*a30f8f8cSSatish Balay #include "mpisbaij.h"
6*a30f8f8cSSatish Balay #include "src/mat/impls/sbaij/seq/sbaij.h"
7*a30f8f8cSSatish Balay 
8*a30f8f8cSSatish Balay extern int MatSetUpMultiply_MPISBAIJ(Mat);
9*a30f8f8cSSatish Balay extern int DisAssemble_MPISBAIJ(Mat);
10*a30f8f8cSSatish Balay extern int MatIncreaseOverlap_MPISBAIJ(Mat,int,IS *,int);
11*a30f8f8cSSatish Balay extern int MatGetSubMatrices_MPISBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
12*a30f8f8cSSatish Balay extern int MatGetValues_SeqSBAIJ(Mat,int,int *,int,int *,Scalar *);
13*a30f8f8cSSatish Balay extern int MatSetValues_SeqSBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode);
14*a30f8f8cSSatish Balay extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
15*a30f8f8cSSatish Balay extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,Scalar**);
16*a30f8f8cSSatish Balay extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,Scalar**);
17*a30f8f8cSSatish Balay extern int MatPrintHelp_SeqSBAIJ(Mat);
18*a30f8f8cSSatish Balay extern int MatZeroRows_SeqSBAIJ(Mat,IS,Scalar*);
19*a30f8f8cSSatish Balay 
20*a30f8f8cSSatish Balay /*  UGLY, ugly, ugly
21*a30f8f8cSSatish Balay    When MatScalar == Scalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
22*a30f8f8cSSatish Balay    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
23*a30f8f8cSSatish Balay    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
24*a30f8f8cSSatish Balay    converts the entries into single precision and then calls ..._MatScalar() to put them
25*a30f8f8cSSatish Balay    into the single precision data structures.
26*a30f8f8cSSatish Balay */
27*a30f8f8cSSatish Balay #if defined(PETSC_USE_MAT_SINGLE)
28*a30f8f8cSSatish Balay extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
29*a30f8f8cSSatish Balay extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
30*a30f8f8cSSatish Balay extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
31*a30f8f8cSSatish Balay extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
32*a30f8f8cSSatish Balay extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
33*a30f8f8cSSatish Balay #else
34*a30f8f8cSSatish Balay #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
35*a30f8f8cSSatish Balay #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
36*a30f8f8cSSatish Balay #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
37*a30f8f8cSSatish Balay #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
38*a30f8f8cSSatish Balay #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
39*a30f8f8cSSatish Balay #endif
40*a30f8f8cSSatish Balay 
41*a30f8f8cSSatish Balay EXTERN_C_BEGIN
42*a30f8f8cSSatish Balay #undef __FUNC__
43*a30f8f8cSSatish Balay #define __FUNC__ /*<a name="MatStoreValues_MPIBAIJ"></a>*/"MatStoreValues_MPIBAIJ"
44*a30f8f8cSSatish Balay int MatStoreValues_MPISBAIJ(Mat mat)
45*a30f8f8cSSatish Balay {
46*a30f8f8cSSatish Balay   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
47*a30f8f8cSSatish Balay   int         ierr;
48*a30f8f8cSSatish Balay 
49*a30f8f8cSSatish Balay   PetscFunctionBegin;
50*a30f8f8cSSatish Balay   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
51*a30f8f8cSSatish Balay   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
52*a30f8f8cSSatish Balay   PetscFunctionReturn(0);
53*a30f8f8cSSatish Balay }
54*a30f8f8cSSatish Balay EXTERN_C_END
55*a30f8f8cSSatish Balay 
56*a30f8f8cSSatish Balay EXTERN_C_BEGIN
57*a30f8f8cSSatish Balay #undef __FUNC__
58*a30f8f8cSSatish Balay #define __FUNC__ /*<a name="MatRetrieveValues_MPIBAIJ"></a>*/"MatRetrieveValues_MPIBAIJ"
59*a30f8f8cSSatish Balay int MatRetrieveValues_MPISBAIJ(Mat mat)
60*a30f8f8cSSatish Balay {
61*a30f8f8cSSatish Balay   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
62*a30f8f8cSSatish Balay   int         ierr;
63*a30f8f8cSSatish Balay 
64*a30f8f8cSSatish Balay   PetscFunctionBegin;
65*a30f8f8cSSatish Balay   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
66*a30f8f8cSSatish Balay   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
67*a30f8f8cSSatish Balay   PetscFunctionReturn(0);
68*a30f8f8cSSatish Balay }
69*a30f8f8cSSatish Balay EXTERN_C_END
70*a30f8f8cSSatish Balay 
71*a30f8f8cSSatish Balay /*
72*a30f8f8cSSatish Balay      Local utility routine that creates a mapping from the global column
73*a30f8f8cSSatish Balay    number to the local number in the off-diagonal part of the local
74*a30f8f8cSSatish Balay    storage of the matrix.  This is done in a non scable way since the
75*a30f8f8cSSatish Balay    length of colmap equals the global matrix length.
76*a30f8f8cSSatish Balay */
77*a30f8f8cSSatish Balay #undef __FUNC__
78*a30f8f8cSSatish Balay #define __FUNC__ /*<a name="CreateColmap_MPISBAIJ_Private"></a>*/"CreateColmap_MPISBAIJ_Private"
79*a30f8f8cSSatish Balay static int CreateColmap_MPISBAIJ_Private(Mat mat)
80*a30f8f8cSSatish Balay {
81*a30f8f8cSSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
82*a30f8f8cSSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data;
83*a30f8f8cSSatish Balay   int         nbs = B->nbs,i,bs=B->bs,ierr;
84*a30f8f8cSSatish Balay 
85*a30f8f8cSSatish Balay   PetscFunctionBegin;
86*a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE)
87*a30f8f8cSSatish Balay   ierr = PetscTableCreate(baij->nbs/5,&baij->colmap);CHKERRQ(ierr);
88*a30f8f8cSSatish Balay   for (i=0; i<nbs; i++){
89*a30f8f8cSSatish Balay     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
90*a30f8f8cSSatish Balay   }
91*a30f8f8cSSatish Balay #else
92*a30f8f8cSSatish Balay   baij->colmap = (int*)PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
93*a30f8f8cSSatish Balay   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
94*a30f8f8cSSatish Balay   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr);
95*a30f8f8cSSatish Balay   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
96*a30f8f8cSSatish Balay #endif
97*a30f8f8cSSatish Balay   PetscFunctionReturn(0);
98*a30f8f8cSSatish Balay }
99*a30f8f8cSSatish Balay 
100*a30f8f8cSSatish Balay #define CHUNKSIZE  10
101*a30f8f8cSSatish Balay 
102*a30f8f8cSSatish Balay #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
103*a30f8f8cSSatish Balay { \
104*a30f8f8cSSatish Balay  \
105*a30f8f8cSSatish Balay     brow = row/bs;  \
106*a30f8f8cSSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
107*a30f8f8cSSatish Balay     rmax = aimax[brow]; nrow = ailen[brow]; \
108*a30f8f8cSSatish Balay       bcol = col/bs; \
109*a30f8f8cSSatish Balay       ridx = row % bs; cidx = col % bs; \
110*a30f8f8cSSatish Balay       low = 0; high = nrow; \
111*a30f8f8cSSatish Balay       while (high-low > 3) { \
112*a30f8f8cSSatish Balay         t = (low+high)/2; \
113*a30f8f8cSSatish Balay         if (rp[t] > bcol) high = t; \
114*a30f8f8cSSatish Balay         else              low  = t; \
115*a30f8f8cSSatish Balay       } \
116*a30f8f8cSSatish Balay       for (_i=low; _i<high; _i++) { \
117*a30f8f8cSSatish Balay         if (rp[_i] > bcol) break; \
118*a30f8f8cSSatish Balay         if (rp[_i] == bcol) { \
119*a30f8f8cSSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
120*a30f8f8cSSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
121*a30f8f8cSSatish Balay           else                    *bap  = value;  \
122*a30f8f8cSSatish Balay           goto a_noinsert; \
123*a30f8f8cSSatish Balay         } \
124*a30f8f8cSSatish Balay       } \
125*a30f8f8cSSatish Balay       if (a->nonew == 1) goto a_noinsert; \
126*a30f8f8cSSatish Balay       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
127*a30f8f8cSSatish Balay       if (nrow >= rmax) { \
128*a30f8f8cSSatish Balay         /* there is no extra room in row, therefore enlarge */ \
129*a30f8f8cSSatish Balay         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
130*a30f8f8cSSatish Balay         MatScalar *new_a; \
131*a30f8f8cSSatish Balay  \
132*a30f8f8cSSatish Balay         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
133*a30f8f8cSSatish Balay  \
134*a30f8f8cSSatish Balay         /* malloc new storage space */ \
135*a30f8f8cSSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
136*a30f8f8cSSatish Balay         new_a   = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \
137*a30f8f8cSSatish Balay         new_j   = (int*)(new_a + bs2*new_nz); \
138*a30f8f8cSSatish Balay         new_i   = new_j + new_nz; \
139*a30f8f8cSSatish Balay  \
140*a30f8f8cSSatish Balay         /* copy over old data into new slots */ \
141*a30f8f8cSSatish Balay         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
142*a30f8f8cSSatish Balay         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
143*a30f8f8cSSatish Balay         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
144*a30f8f8cSSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
145*a30f8f8cSSatish Balay         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
146*a30f8f8cSSatish Balay         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
147*a30f8f8cSSatish Balay         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \
148*a30f8f8cSSatish Balay         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
149*a30f8f8cSSatish Balay                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
150*a30f8f8cSSatish Balay         /* free up old matrix storage */ \
151*a30f8f8cSSatish Balay         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
152*a30f8f8cSSatish Balay         if (!a->singlemalloc) { \
153*a30f8f8cSSatish Balay           ierr = PetscFree(a->i);CHKERRQ(ierr); \
154*a30f8f8cSSatish Balay           ierr = PetscFree(a->j);CHKERRQ(ierr);\
155*a30f8f8cSSatish Balay         } \
156*a30f8f8cSSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
157*a30f8f8cSSatish Balay         a->singlemalloc = PETSC_TRUE; \
158*a30f8f8cSSatish Balay  \
159*a30f8f8cSSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
160*a30f8f8cSSatish Balay         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
161*a30f8f8cSSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
162*a30f8f8cSSatish Balay         a->s_maxnz += bs2*CHUNKSIZE; \
163*a30f8f8cSSatish Balay         a->reallocs++; \
164*a30f8f8cSSatish Balay         a->s_nz++; \
165*a30f8f8cSSatish Balay       } \
166*a30f8f8cSSatish Balay       N = nrow++ - 1;  \
167*a30f8f8cSSatish Balay       /* shift up all the later entries in this row */ \
168*a30f8f8cSSatish Balay       for (ii=N; ii>=_i; ii--) { \
169*a30f8f8cSSatish Balay         rp[ii+1] = rp[ii]; \
170*a30f8f8cSSatish Balay         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
171*a30f8f8cSSatish Balay       } \
172*a30f8f8cSSatish Balay       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
173*a30f8f8cSSatish Balay       rp[_i]                      = bcol;  \
174*a30f8f8cSSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
175*a30f8f8cSSatish Balay       a_noinsert:; \
176*a30f8f8cSSatish Balay     ailen[brow] = nrow; \
177*a30f8f8cSSatish Balay }
178*a30f8f8cSSatish Balay #ifndef MatSetValues_SeqBAIJ_B_Private
179*a30f8f8cSSatish Balay #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
180*a30f8f8cSSatish Balay { \
181*a30f8f8cSSatish Balay     brow = row/bs;  \
182*a30f8f8cSSatish Balay     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
183*a30f8f8cSSatish Balay     rmax = bimax[brow]; nrow = bilen[brow]; \
184*a30f8f8cSSatish Balay       bcol = col/bs; \
185*a30f8f8cSSatish Balay       ridx = row % bs; cidx = col % bs; \
186*a30f8f8cSSatish Balay       low = 0; high = nrow; \
187*a30f8f8cSSatish Balay       while (high-low > 3) { \
188*a30f8f8cSSatish Balay         t = (low+high)/2; \
189*a30f8f8cSSatish Balay         if (rp[t] > bcol) high = t; \
190*a30f8f8cSSatish Balay         else              low  = t; \
191*a30f8f8cSSatish Balay       } \
192*a30f8f8cSSatish Balay       for (_i=low; _i<high; _i++) { \
193*a30f8f8cSSatish Balay         if (rp[_i] > bcol) break; \
194*a30f8f8cSSatish Balay         if (rp[_i] == bcol) { \
195*a30f8f8cSSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
196*a30f8f8cSSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
197*a30f8f8cSSatish Balay           else                    *bap  = value;  \
198*a30f8f8cSSatish Balay           goto b_noinsert; \
199*a30f8f8cSSatish Balay         } \
200*a30f8f8cSSatish Balay       } \
201*a30f8f8cSSatish Balay       if (b->nonew == 1) goto b_noinsert; \
202*a30f8f8cSSatish Balay       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
203*a30f8f8cSSatish Balay       if (nrow >= rmax) { \
204*a30f8f8cSSatish Balay         /* there is no extra room in row, therefore enlarge */ \
205*a30f8f8cSSatish Balay         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
206*a30f8f8cSSatish Balay         MatScalar *new_a; \
207*a30f8f8cSSatish Balay  \
208*a30f8f8cSSatish Balay         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
209*a30f8f8cSSatish Balay  \
210*a30f8f8cSSatish Balay         /* malloc new storage space */ \
211*a30f8f8cSSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
212*a30f8f8cSSatish Balay         new_a   = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \
213*a30f8f8cSSatish Balay         new_j   = (int*)(new_a + bs2*new_nz); \
214*a30f8f8cSSatish Balay         new_i   = new_j + new_nz; \
215*a30f8f8cSSatish Balay  \
216*a30f8f8cSSatish Balay         /* copy over old data into new slots */ \
217*a30f8f8cSSatish Balay         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
218*a30f8f8cSSatish Balay         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
219*a30f8f8cSSatish Balay         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
220*a30f8f8cSSatish Balay         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
221*a30f8f8cSSatish Balay         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
222*a30f8f8cSSatish Balay         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
223*a30f8f8cSSatish Balay         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
224*a30f8f8cSSatish Balay         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
225*a30f8f8cSSatish Balay                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
226*a30f8f8cSSatish Balay         /* free up old matrix storage */ \
227*a30f8f8cSSatish Balay         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
228*a30f8f8cSSatish Balay         if (!b->singlemalloc) { \
229*a30f8f8cSSatish Balay           ierr = PetscFree(b->i);CHKERRQ(ierr); \
230*a30f8f8cSSatish Balay           ierr = PetscFree(b->j);CHKERRQ(ierr); \
231*a30f8f8cSSatish Balay         } \
232*a30f8f8cSSatish Balay         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
233*a30f8f8cSSatish Balay         b->singlemalloc = PETSC_TRUE; \
234*a30f8f8cSSatish Balay  \
235*a30f8f8cSSatish Balay         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
236*a30f8f8cSSatish Balay         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
237*a30f8f8cSSatish Balay         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
238*a30f8f8cSSatish Balay         b->maxnz += bs2*CHUNKSIZE; \
239*a30f8f8cSSatish Balay         b->reallocs++; \
240*a30f8f8cSSatish Balay         b->nz++; \
241*a30f8f8cSSatish Balay       } \
242*a30f8f8cSSatish Balay       N = nrow++ - 1;  \
243*a30f8f8cSSatish Balay       /* shift up all the later entries in this row */ \
244*a30f8f8cSSatish Balay       for (ii=N; ii>=_i; ii--) { \
245*a30f8f8cSSatish Balay         rp[ii+1] = rp[ii]; \
246*a30f8f8cSSatish Balay         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
247*a30f8f8cSSatish Balay       } \
248*a30f8f8cSSatish Balay       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
249*a30f8f8cSSatish Balay       rp[_i]                      = bcol;  \
250*a30f8f8cSSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
251*a30f8f8cSSatish Balay       b_noinsert:; \
252*a30f8f8cSSatish Balay     bilen[brow] = nrow; \
253*a30f8f8cSSatish Balay }
254*a30f8f8cSSatish Balay #endif
255*a30f8f8cSSatish Balay 
256*a30f8f8cSSatish Balay #if defined(PETSC_USE_MAT_SINGLE)
257*a30f8f8cSSatish Balay #undef __FUNC__
258*a30f8f8cSSatish Balay #define __FUNC__ /*<a name="MatSetValues_MPISBAIJ"></a>*/"MatSetValues_MPISBAIJ"
259*a30f8f8cSSatish Balay int MatSetValues_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
260*a30f8f8cSSatish Balay {
261*a30f8f8cSSatish Balay   Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data;
262*a30f8f8cSSatish Balay   int         ierr,i,N = m*n;
263*a30f8f8cSSatish Balay   MatScalar   *vsingle;
264*a30f8f8cSSatish Balay 
265*a30f8f8cSSatish Balay   PetscFunctionBegin;
266*a30f8f8cSSatish Balay   if (N > b->setvalueslen) {
267*a30f8f8cSSatish Balay     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
268*a30f8f8cSSatish Balay     b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy);
269*a30f8f8cSSatish Balay     b->setvalueslen  = N;
270*a30f8f8cSSatish Balay   }
271*a30f8f8cSSatish Balay   vsingle = b->setvaluescopy;
272*a30f8f8cSSatish Balay 
273*a30f8f8cSSatish Balay   for (i=0; i<N; i++) {
274*a30f8f8cSSatish Balay     vsingle[i] = v[i];
275*a30f8f8cSSatish Balay   }
276*a30f8f8cSSatish Balay   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
277*a30f8f8cSSatish Balay   PetscFunctionReturn(0);
278*a30f8f8cSSatish Balay }
279*a30f8f8cSSatish Balay 
280*a30f8f8cSSatish Balay #undef __FUNC__
281*a30f8f8cSSatish Balay #define __FUNC__ /*<a name="MatSetValuesBlocked_MPISBAIJ"></a>*/"MatSetValuesBlocked_MPISBAIJ"
282*a30f8f8cSSatish Balay int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
283*a30f8f8cSSatish Balay {
284*a30f8f8cSSatish Balay   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
285*a30f8f8cSSatish Balay   int         ierr,i,N = m*n*b->bs2;
286*a30f8f8cSSatish Balay   MatScalar   *vsingle;
287*a30f8f8cSSatish Balay 
288*a30f8f8cSSatish Balay   PetscFunctionBegin;
289*a30f8f8cSSatish Balay   if (N > b->setvalueslen) {
290*a30f8f8cSSatish Balay     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
291*a30f8f8cSSatish Balay     b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy);
292*a30f8f8cSSatish Balay     b->setvalueslen  = N;
293*a30f8f8cSSatish Balay   }
294*a30f8f8cSSatish Balay   vsingle = b->setvaluescopy;
295*a30f8f8cSSatish Balay   for (i=0; i<N; i++) {
296*a30f8f8cSSatish Balay     vsingle[i] = v[i];
297*a30f8f8cSSatish Balay   }
298*a30f8f8cSSatish Balay   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
299*a30f8f8cSSatish Balay   PetscFunctionReturn(0);
300*a30f8f8cSSatish Balay }
301*a30f8f8cSSatish Balay 
302*a30f8f8cSSatish Balay #undef __FUNC__
303*a30f8f8cSSatish Balay #define __FUNC__ /*<a name="MatSetValues_MPISBAIJ_HT"></a>*/"MatSetValues_MPISBAIJ_HT"
304*a30f8f8cSSatish Balay int MatSetValues_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
305*a30f8f8cSSatish Balay {
306*a30f8f8cSSatish Balay   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
307*a30f8f8cSSatish Balay   int         ierr,i,N = m*n;
308*a30f8f8cSSatish Balay   MatScalar   *vsingle;
309*a30f8f8cSSatish Balay 
310*a30f8f8cSSatish Balay   PetscFunctionBegin;
311*a30f8f8cSSatish Balay   if (N > b->setvalueslen) {
312*a30f8f8cSSatish Balay     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
313*a30f8f8cSSatish Balay     b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy);
314*a30f8f8cSSatish Balay     b->setvalueslen  = N;
315*a30f8f8cSSatish Balay   }
316*a30f8f8cSSatish Balay   vsingle = b->setvaluescopy;
317*a30f8f8cSSatish Balay   for (i=0; i<N; i++) {
318*a30f8f8cSSatish Balay     vsingle[i] = v[i];
319*a30f8f8cSSatish Balay   }
320*a30f8f8cSSatish Balay   ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
321*a30f8f8cSSatish Balay   PetscFunctionReturn(0);
322*a30f8f8cSSatish Balay }
323*a30f8f8cSSatish Balay 
324*a30f8f8cSSatish Balay #undef __FUNC__
325*a30f8f8cSSatish Balay #define __FUNC__ /*<a name="MatSetValuesBlocked_MPISBAIJ_HT"></a>*/"MatSetValuesBlocked_MPISBAIJ_HT"
326*a30f8f8cSSatish Balay int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
327*a30f8f8cSSatish Balay {
328*a30f8f8cSSatish Balay   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
329*a30f8f8cSSatish Balay   int         ierr,i,N = m*n*b->bs2;
330*a30f8f8cSSatish Balay   MatScalar   *vsingle;
331*a30f8f8cSSatish Balay 
332*a30f8f8cSSatish Balay   PetscFunctionBegin;
333*a30f8f8cSSatish Balay   if (N > b->setvalueslen) {
334*a30f8f8cSSatish Balay     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
335*a30f8f8cSSatish Balay     b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy);
336*a30f8f8cSSatish Balay     b->setvalueslen  = N;
337*a30f8f8cSSatish Balay   }
338*a30f8f8cSSatish Balay   vsingle = b->setvaluescopy;
339*a30f8f8cSSatish Balay   for (i=0; i<N; i++) {
340*a30f8f8cSSatish Balay     vsingle[i] = v[i];
341*a30f8f8cSSatish Balay   }
342*a30f8f8cSSatish Balay   ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
343*a30f8f8cSSatish Balay   PetscFunctionReturn(0);
344*a30f8f8cSSatish Balay }
345*a30f8f8cSSatish Balay #endif
346*a30f8f8cSSatish Balay 
347*a30f8f8cSSatish Balay /* Only add/insert a(i,j) with i<=j (blocks).
348*a30f8f8cSSatish Balay    Any a(i,j) with i>j input by user is ingored.
349*a30f8f8cSSatish Balay */
350*a30f8f8cSSatish Balay #undef __FUNC__
351*a30f8f8cSSatish Balay #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ"></a>*/"MatSetValues_MPIBAIJ"
352*a30f8f8cSSatish Balay int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
353*a30f8f8cSSatish Balay {
354*a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
355*a30f8f8cSSatish Balay   MatScalar   value;
356*a30f8f8cSSatish Balay   int         ierr,i,j,row,col;
357*a30f8f8cSSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
358*a30f8f8cSSatish Balay   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
359*a30f8f8cSSatish Balay   int         cend_orig=baij->cend_bs,bs=baij->bs;
360*a30f8f8cSSatish Balay 
361*a30f8f8cSSatish Balay   /* Some Variables required in the macro */
362*a30f8f8cSSatish Balay   Mat         A = baij->A;
363*a30f8f8cSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data;
364*a30f8f8cSSatish Balay   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
365*a30f8f8cSSatish Balay   MatScalar   *aa=a->a;
366*a30f8f8cSSatish Balay 
367*a30f8f8cSSatish Balay   Mat         B = baij->B;
368*a30f8f8cSSatish Balay   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data;
369*a30f8f8cSSatish Balay   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
370*a30f8f8cSSatish Balay   MatScalar   *ba=b->a;
371*a30f8f8cSSatish Balay 
372*a30f8f8cSSatish Balay   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
373*a30f8f8cSSatish Balay   int         low,high,t,ridx,cidx,bs2=a->bs2;
374*a30f8f8cSSatish Balay   MatScalar   *ap,*bap;
375*a30f8f8cSSatish Balay 
376*a30f8f8cSSatish Balay   /* for stash */
377*a30f8f8cSSatish Balay   int         n_loc, *in_loc;
378*a30f8f8cSSatish Balay   MatScalar   *v_loc;
379*a30f8f8cSSatish Balay 
380*a30f8f8cSSatish Balay   PetscFunctionBegin;
381*a30f8f8cSSatish Balay 
382*a30f8f8cSSatish Balay   if(!baij->donotstash){
383*a30f8f8cSSatish Balay     in_loc = (int*)PetscMalloc(n*sizeof(int)); CHKPTRQ(in_loc);
384*a30f8f8cSSatish Balay     v_loc  = (MatScalar*)PetscMalloc(n*sizeof(MatScalar)); CHKPTRQ(v_loc);
385*a30f8f8cSSatish Balay   }
386*a30f8f8cSSatish Balay 
387*a30f8f8cSSatish Balay   for (i=0; i<m; i++) {
388*a30f8f8cSSatish Balay     if (im[i] < 0) continue;
389*a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
390*a30f8f8cSSatish Balay     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
391*a30f8f8cSSatish Balay #endif
392*a30f8f8cSSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
393*a30f8f8cSSatish Balay       row = im[i] - rstart_orig;              /* local row index */
394*a30f8f8cSSatish Balay       for (j=0; j<n; j++) {
395*a30f8f8cSSatish Balay         if (im[i]/bs > in[j]/bs) continue;    /* ignore low triangular blocks */
396*a30f8f8cSSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
397*a30f8f8cSSatish Balay           col = in[j] - cstart_orig;          /* local col index */
398*a30f8f8cSSatish Balay           brow = row/bs; bcol = col/bs;
399*a30f8f8cSSatish Balay           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
400*a30f8f8cSSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
401*a30f8f8cSSatish Balay           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
402*a30f8f8cSSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
403*a30f8f8cSSatish Balay         } else if (in[j] < 0) continue;
404*a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
405*a30f8f8cSSatish Balay         else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");}
406*a30f8f8cSSatish Balay #endif
407*a30f8f8cSSatish Balay         else {  /* off-diag entry (B) */
408*a30f8f8cSSatish Balay           if (mat->was_assembled) {
409*a30f8f8cSSatish Balay             if (!baij->colmap) {
410*a30f8f8cSSatish Balay               ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr);
411*a30f8f8cSSatish Balay             }
412*a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE)
413*a30f8f8cSSatish Balay             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
414*a30f8f8cSSatish Balay             col  = col - 1 + in[j]%bs;
415*a30f8f8cSSatish Balay #else
416*a30f8f8cSSatish Balay             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
417*a30f8f8cSSatish Balay #endif
418*a30f8f8cSSatish Balay             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
419*a30f8f8cSSatish Balay               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
420*a30f8f8cSSatish Balay               col =  in[j];
421*a30f8f8cSSatish Balay               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
422*a30f8f8cSSatish Balay               B = baij->B;
423*a30f8f8cSSatish Balay               b = (Mat_SeqBAIJ*)(B)->data;
424*a30f8f8cSSatish Balay               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
425*a30f8f8cSSatish Balay               ba=b->a;
426*a30f8f8cSSatish Balay             }
427*a30f8f8cSSatish Balay           } else col = in[j];
428*a30f8f8cSSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
429*a30f8f8cSSatish Balay           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
430*a30f8f8cSSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
431*a30f8f8cSSatish Balay         }
432*a30f8f8cSSatish Balay       }
433*a30f8f8cSSatish Balay     } else {  /* off processor entry */
434*a30f8f8cSSatish Balay       if (!baij->donotstash) {
435*a30f8f8cSSatish Balay         n_loc = 0;
436*a30f8f8cSSatish Balay         for (j=0; j<n; j++){
437*a30f8f8cSSatish Balay           if (im[i]/bs > in[j]/bs) continue; /* ignore low triangular blocks */
438*a30f8f8cSSatish Balay           in_loc[n_loc] = in[j];
439*a30f8f8cSSatish Balay           if (roworiented) {
440*a30f8f8cSSatish Balay             v_loc[n_loc] = v[i*n+j];
441*a30f8f8cSSatish Balay           } else {
442*a30f8f8cSSatish Balay             v_loc[n_loc] = v[j*m+i];
443*a30f8f8cSSatish Balay           }
444*a30f8f8cSSatish Balay           n_loc++;
445*a30f8f8cSSatish Balay         }
446*a30f8f8cSSatish Balay         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
447*a30f8f8cSSatish Balay       }
448*a30f8f8cSSatish Balay     }
449*a30f8f8cSSatish Balay   }
450*a30f8f8cSSatish Balay 
451*a30f8f8cSSatish Balay   if(!baij->donotstash){
452*a30f8f8cSSatish Balay     ierr = PetscFree(in_loc);CHKERRQ(ierr);
453*a30f8f8cSSatish Balay     ierr = PetscFree(v_loc);CHKERRQ(ierr);
454*a30f8f8cSSatish Balay   }
455*a30f8f8cSSatish Balay   PetscFunctionReturn(0);
456*a30f8f8cSSatish Balay }
457*a30f8f8cSSatish Balay 
458*a30f8f8cSSatish Balay #undef __FUNC__
459*a30f8f8cSSatish Balay #define __FUNC__ /*<a name="MatSetValuesBlocked_MPISBAIJ"></a>*/"MatSetValuesBlocked_MPISBAIJ"
460*a30f8f8cSSatish Balay int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
461*a30f8f8cSSatish Balay {
462*a30f8f8cSSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
463*a30f8f8cSSatish Balay   MatScalar   *value,*barray=baij->barray;
464*a30f8f8cSSatish Balay   int         ierr,i,j,ii,jj,row,col;
465*a30f8f8cSSatish Balay   int         roworiented = baij->roworiented,rstart=baij->rstart ;
466*a30f8f8cSSatish Balay   int         rend=baij->rend,cstart=baij->cstart,stepval;
467*a30f8f8cSSatish Balay   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
468*a30f8f8cSSatish Balay 
469*a30f8f8cSSatish Balay   PetscFunctionBegin;
470*a30f8f8cSSatish Balay   if(!barray) {
471*a30f8f8cSSatish Balay     baij->barray = barray = (MatScalar*)PetscMalloc(bs2*sizeof(MatScalar));CHKPTRQ(barray);
472*a30f8f8cSSatish Balay   }
473*a30f8f8cSSatish Balay 
474*a30f8f8cSSatish Balay   if (roworiented) {
475*a30f8f8cSSatish Balay     stepval = (n-1)*bs;
476*a30f8f8cSSatish Balay   } else {
477*a30f8f8cSSatish Balay     stepval = (m-1)*bs;
478*a30f8f8cSSatish Balay   }
479*a30f8f8cSSatish Balay   for (i=0; i<m; i++) {
480*a30f8f8cSSatish Balay     if (im[i] < 0) continue;
481*a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
482*a30f8f8cSSatish Balay     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs);
483*a30f8f8cSSatish Balay #endif
484*a30f8f8cSSatish Balay     if (im[i] >= rstart && im[i] < rend) {
485*a30f8f8cSSatish Balay       row = im[i] - rstart;
486*a30f8f8cSSatish Balay       for (j=0; j<n; j++) {
487*a30f8f8cSSatish Balay         /* If NumCol = 1 then a copy is not required */
488*a30f8f8cSSatish Balay         if ((roworiented) && (n == 1)) {
489*a30f8f8cSSatish Balay           barray = v + i*bs2;
490*a30f8f8cSSatish Balay         } else if((!roworiented) && (m == 1)) {
491*a30f8f8cSSatish Balay           barray = v + j*bs2;
492*a30f8f8cSSatish Balay         } else { /* Here a copy is required */
493*a30f8f8cSSatish Balay           if (roworiented) {
494*a30f8f8cSSatish Balay             value = v + i*(stepval+bs)*bs + j*bs;
495*a30f8f8cSSatish Balay           } else {
496*a30f8f8cSSatish Balay             value = v + j*(stepval+bs)*bs + i*bs;
497*a30f8f8cSSatish Balay           }
498*a30f8f8cSSatish Balay           for (ii=0; ii<bs; ii++,value+=stepval) {
499*a30f8f8cSSatish Balay             for (jj=0; jj<bs; jj++) {
500*a30f8f8cSSatish Balay               *barray++  = *value++;
501*a30f8f8cSSatish Balay             }
502*a30f8f8cSSatish Balay           }
503*a30f8f8cSSatish Balay           barray -=bs2;
504*a30f8f8cSSatish Balay         }
505*a30f8f8cSSatish Balay 
506*a30f8f8cSSatish Balay         if (in[j] >= cstart && in[j] < cend){
507*a30f8f8cSSatish Balay           col  = in[j] - cstart;
508*a30f8f8cSSatish Balay           ierr = MatSetValuesBlocked_SeqSBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
509*a30f8f8cSSatish Balay         }
510*a30f8f8cSSatish Balay         else if (in[j] < 0) continue;
511*a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
512*a30f8f8cSSatish Balay         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);}
513*a30f8f8cSSatish Balay #endif
514*a30f8f8cSSatish Balay         else {
515*a30f8f8cSSatish Balay           if (mat->was_assembled) {
516*a30f8f8cSSatish Balay             if (!baij->colmap) {
517*a30f8f8cSSatish Balay               ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr);
518*a30f8f8cSSatish Balay             }
519*a30f8f8cSSatish Balay 
520*a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
521*a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE)
522*a30f8f8cSSatish Balay             { int data;
523*a30f8f8cSSatish Balay               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
524*a30f8f8cSSatish Balay               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");
525*a30f8f8cSSatish Balay             }
526*a30f8f8cSSatish Balay #else
527*a30f8f8cSSatish Balay             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");
528*a30f8f8cSSatish Balay #endif
529*a30f8f8cSSatish Balay #endif
530*a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE)
531*a30f8f8cSSatish Balay 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
532*a30f8f8cSSatish Balay             col  = (col - 1)/bs;
533*a30f8f8cSSatish Balay #else
534*a30f8f8cSSatish Balay             col = (baij->colmap[in[j]] - 1)/bs;
535*a30f8f8cSSatish Balay #endif
536*a30f8f8cSSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
537*a30f8f8cSSatish Balay               ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
538*a30f8f8cSSatish Balay               col =  in[j];
539*a30f8f8cSSatish Balay             }
540*a30f8f8cSSatish Balay           }
541*a30f8f8cSSatish Balay           else col = in[j];
542*a30f8f8cSSatish Balay           ierr = MatSetValuesBlocked_SeqSBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
543*a30f8f8cSSatish Balay         }
544*a30f8f8cSSatish Balay       }
545*a30f8f8cSSatish Balay     } else {
546*a30f8f8cSSatish Balay       if (!baij->donotstash) {
547*a30f8f8cSSatish Balay         if (roworiented) {
548*a30f8f8cSSatish Balay           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
549*a30f8f8cSSatish Balay         } else {
550*a30f8f8cSSatish Balay           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
551*a30f8f8cSSatish Balay         }
552*a30f8f8cSSatish Balay       }
553*a30f8f8cSSatish Balay     }
554*a30f8f8cSSatish Balay   }
555*a30f8f8cSSatish Balay   PetscFunctionReturn(0);
556*a30f8f8cSSatish Balay }
557*a30f8f8cSSatish Balay 
558*a30f8f8cSSatish Balay #define HASH_KEY 0.6180339887
559*a30f8f8cSSatish Balay #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
560*a30f8f8cSSatish Balay /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
561*a30f8f8cSSatish Balay /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
562*a30f8f8cSSatish Balay #undef __FUNC__
563*a30f8f8cSSatish Balay #define __FUNC__ /*<a name="MatSetValues_MPISBAIJ_HT_MatScalar"></a>*/"MatSetValues_MPISBAIJ_HT_MatScalar"
564*a30f8f8cSSatish Balay int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
565*a30f8f8cSSatish Balay {
566*a30f8f8cSSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
567*a30f8f8cSSatish Balay   int         ierr,i,j,row,col;
568*a30f8f8cSSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
569*a30f8f8cSSatish Balay   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
570*a30f8f8cSSatish Balay   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
571*a30f8f8cSSatish Balay   PetscReal   tmp;
572*a30f8f8cSSatish Balay   MatScalar   **HD = baij->hd,value;
573*a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
574*a30f8f8cSSatish Balay   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
575*a30f8f8cSSatish Balay #endif
576*a30f8f8cSSatish Balay 
577*a30f8f8cSSatish Balay   PetscFunctionBegin;
578*a30f8f8cSSatish Balay 
579*a30f8f8cSSatish Balay   for (i=0; i<m; i++) {
580*a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
581*a30f8f8cSSatish Balay     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
582*a30f8f8cSSatish Balay     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
583*a30f8f8cSSatish Balay #endif
584*a30f8f8cSSatish Balay       row = im[i];
585*a30f8f8cSSatish Balay     if (row >= rstart_orig && row < rend_orig) {
586*a30f8f8cSSatish Balay       for (j=0; j<n; j++) {
587*a30f8f8cSSatish Balay         col = in[j];
588*a30f8f8cSSatish Balay         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
589*a30f8f8cSSatish Balay         /* Look up into the Hash Table */
590*a30f8f8cSSatish Balay         key = (row/bs)*Nbs+(col/bs)+1;
591*a30f8f8cSSatish Balay         h1  = HASH(size,key,tmp);
592*a30f8f8cSSatish Balay 
593*a30f8f8cSSatish Balay 
594*a30f8f8cSSatish Balay         idx = h1;
595*a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
596*a30f8f8cSSatish Balay         insert_ct++;
597*a30f8f8cSSatish Balay         total_ct++;
598*a30f8f8cSSatish Balay         if (HT[idx] != key) {
599*a30f8f8cSSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
600*a30f8f8cSSatish Balay           if (idx == size) {
601*a30f8f8cSSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
602*a30f8f8cSSatish Balay             if (idx == h1) {
603*a30f8f8cSSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
604*a30f8f8cSSatish Balay             }
605*a30f8f8cSSatish Balay           }
606*a30f8f8cSSatish Balay         }
607*a30f8f8cSSatish Balay #else
608*a30f8f8cSSatish Balay         if (HT[idx] != key) {
609*a30f8f8cSSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
610*a30f8f8cSSatish Balay           if (idx == size) {
611*a30f8f8cSSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
612*a30f8f8cSSatish Balay             if (idx == h1) {
613*a30f8f8cSSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
614*a30f8f8cSSatish Balay             }
615*a30f8f8cSSatish Balay           }
616*a30f8f8cSSatish Balay         }
617*a30f8f8cSSatish Balay #endif
618*a30f8f8cSSatish Balay         /* A HASH table entry is found, so insert the values at the correct address */
619*a30f8f8cSSatish Balay         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
620*a30f8f8cSSatish Balay         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
621*a30f8f8cSSatish Balay       }
622*a30f8f8cSSatish Balay     } else {
623*a30f8f8cSSatish Balay       if (!baij->donotstash) {
624*a30f8f8cSSatish Balay         if (roworiented) {
625*a30f8f8cSSatish Balay           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
626*a30f8f8cSSatish Balay         } else {
627*a30f8f8cSSatish Balay           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
628*a30f8f8cSSatish Balay         }
629*a30f8f8cSSatish Balay       }
630*a30f8f8cSSatish Balay     }
631*a30f8f8cSSatish Balay   }
632*a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
633*a30f8f8cSSatish Balay   baij->ht_total_ct = total_ct;
634*a30f8f8cSSatish Balay   baij->ht_insert_ct = insert_ct;
635*a30f8f8cSSatish Balay #endif
636*a30f8f8cSSatish Balay   PetscFunctionReturn(0);
637*a30f8f8cSSatish Balay }
638*a30f8f8cSSatish Balay 
639*a30f8f8cSSatish Balay #undef __FUNC__
640*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPISBAIJ_HT_MatScalar"
641*a30f8f8cSSatish Balay int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
642*a30f8f8cSSatish Balay {
643*a30f8f8cSSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
644*a30f8f8cSSatish Balay   int         ierr,i,j,ii,jj,row,col;
645*a30f8f8cSSatish Balay   int         roworiented = baij->roworiented,rstart=baij->rstart ;
646*a30f8f8cSSatish Balay   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
647*a30f8f8cSSatish Balay   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
648*a30f8f8cSSatish Balay   PetscReal   tmp;
649*a30f8f8cSSatish Balay   MatScalar   **HD = baij->hd,*baij_a;
650*a30f8f8cSSatish Balay   MatScalar   *v_t,*value;
651*a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
652*a30f8f8cSSatish Balay   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
653*a30f8f8cSSatish Balay #endif
654*a30f8f8cSSatish Balay 
655*a30f8f8cSSatish Balay   PetscFunctionBegin;
656*a30f8f8cSSatish Balay 
657*a30f8f8cSSatish Balay   if (roworiented) {
658*a30f8f8cSSatish Balay     stepval = (n-1)*bs;
659*a30f8f8cSSatish Balay   } else {
660*a30f8f8cSSatish Balay     stepval = (m-1)*bs;
661*a30f8f8cSSatish Balay   }
662*a30f8f8cSSatish Balay   for (i=0; i<m; i++) {
663*a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
664*a30f8f8cSSatish Balay     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
665*a30f8f8cSSatish Balay     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
666*a30f8f8cSSatish Balay #endif
667*a30f8f8cSSatish Balay     row   = im[i];
668*a30f8f8cSSatish Balay     v_t   = v + i*bs2;
669*a30f8f8cSSatish Balay     if (row >= rstart && row < rend) {
670*a30f8f8cSSatish Balay       for (j=0; j<n; j++) {
671*a30f8f8cSSatish Balay         col = in[j];
672*a30f8f8cSSatish Balay 
673*a30f8f8cSSatish Balay         /* Look up into the Hash Table */
674*a30f8f8cSSatish Balay         key = row*Nbs+col+1;
675*a30f8f8cSSatish Balay         h1  = HASH(size,key,tmp);
676*a30f8f8cSSatish Balay 
677*a30f8f8cSSatish Balay         idx = h1;
678*a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
679*a30f8f8cSSatish Balay         total_ct++;
680*a30f8f8cSSatish Balay         insert_ct++;
681*a30f8f8cSSatish Balay        if (HT[idx] != key) {
682*a30f8f8cSSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
683*a30f8f8cSSatish Balay           if (idx == size) {
684*a30f8f8cSSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
685*a30f8f8cSSatish Balay             if (idx == h1) {
686*a30f8f8cSSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
687*a30f8f8cSSatish Balay             }
688*a30f8f8cSSatish Balay           }
689*a30f8f8cSSatish Balay         }
690*a30f8f8cSSatish Balay #else
691*a30f8f8cSSatish Balay         if (HT[idx] != key) {
692*a30f8f8cSSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
693*a30f8f8cSSatish Balay           if (idx == size) {
694*a30f8f8cSSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
695*a30f8f8cSSatish Balay             if (idx == h1) {
696*a30f8f8cSSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
697*a30f8f8cSSatish Balay             }
698*a30f8f8cSSatish Balay           }
699*a30f8f8cSSatish Balay         }
700*a30f8f8cSSatish Balay #endif
701*a30f8f8cSSatish Balay         baij_a = HD[idx];
702*a30f8f8cSSatish Balay         if (roworiented) {
703*a30f8f8cSSatish Balay           /*value = v + i*(stepval+bs)*bs + j*bs;*/
704*a30f8f8cSSatish Balay           /* value = v + (i*(stepval+bs)+j)*bs; */
705*a30f8f8cSSatish Balay           value = v_t;
706*a30f8f8cSSatish Balay           v_t  += bs;
707*a30f8f8cSSatish Balay           if (addv == ADD_VALUES) {
708*a30f8f8cSSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval) {
709*a30f8f8cSSatish Balay               for (jj=ii; jj<bs2; jj+=bs) {
710*a30f8f8cSSatish Balay                 baij_a[jj]  += *value++;
711*a30f8f8cSSatish Balay               }
712*a30f8f8cSSatish Balay             }
713*a30f8f8cSSatish Balay           } else {
714*a30f8f8cSSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval) {
715*a30f8f8cSSatish Balay               for (jj=ii; jj<bs2; jj+=bs) {
716*a30f8f8cSSatish Balay                 baij_a[jj]  = *value++;
717*a30f8f8cSSatish Balay               }
718*a30f8f8cSSatish Balay             }
719*a30f8f8cSSatish Balay           }
720*a30f8f8cSSatish Balay         } else {
721*a30f8f8cSSatish Balay           value = v + j*(stepval+bs)*bs + i*bs;
722*a30f8f8cSSatish Balay           if (addv == ADD_VALUES) {
723*a30f8f8cSSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
724*a30f8f8cSSatish Balay               for (jj=0; jj<bs; jj++) {
725*a30f8f8cSSatish Balay                 baij_a[jj]  += *value++;
726*a30f8f8cSSatish Balay               }
727*a30f8f8cSSatish Balay             }
728*a30f8f8cSSatish Balay           } else {
729*a30f8f8cSSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
730*a30f8f8cSSatish Balay               for (jj=0; jj<bs; jj++) {
731*a30f8f8cSSatish Balay                 baij_a[jj]  = *value++;
732*a30f8f8cSSatish Balay               }
733*a30f8f8cSSatish Balay             }
734*a30f8f8cSSatish Balay           }
735*a30f8f8cSSatish Balay         }
736*a30f8f8cSSatish Balay       }
737*a30f8f8cSSatish Balay     } else {
738*a30f8f8cSSatish Balay       if (!baij->donotstash) {
739*a30f8f8cSSatish Balay         if (roworiented) {
740*a30f8f8cSSatish Balay           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
741*a30f8f8cSSatish Balay         } else {
742*a30f8f8cSSatish Balay           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
743*a30f8f8cSSatish Balay         }
744*a30f8f8cSSatish Balay       }
745*a30f8f8cSSatish Balay     }
746*a30f8f8cSSatish Balay   }
747*a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
748*a30f8f8cSSatish Balay   baij->ht_total_ct = total_ct;
749*a30f8f8cSSatish Balay   baij->ht_insert_ct = insert_ct;
750*a30f8f8cSSatish Balay #endif
751*a30f8f8cSSatish Balay   PetscFunctionReturn(0);
752*a30f8f8cSSatish Balay }
753*a30f8f8cSSatish Balay 
754*a30f8f8cSSatish Balay #undef __FUNC__
755*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatGetValues_MPISBAIJ"
756*a30f8f8cSSatish Balay int MatGetValues_MPISBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
757*a30f8f8cSSatish Balay {
758*a30f8f8cSSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
759*a30f8f8cSSatish Balay   int        bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
760*a30f8f8cSSatish Balay   int        bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
761*a30f8f8cSSatish Balay 
762*a30f8f8cSSatish Balay   PetscFunctionBegin;
763*a30f8f8cSSatish Balay   for (i=0; i<m; i++) {
764*a30f8f8cSSatish Balay     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
765*a30f8f8cSSatish Balay     if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
766*a30f8f8cSSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
767*a30f8f8cSSatish Balay       row = idxm[i] - bsrstart;
768*a30f8f8cSSatish Balay       for (j=0; j<n; j++) {
769*a30f8f8cSSatish Balay         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
770*a30f8f8cSSatish Balay         if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
771*a30f8f8cSSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
772*a30f8f8cSSatish Balay           col = idxn[j] - bscstart;
773*a30f8f8cSSatish Balay           ierr = MatGetValues_SeqBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
774*a30f8f8cSSatish Balay         } else {
775*a30f8f8cSSatish Balay           if (!baij->colmap) {
776*a30f8f8cSSatish Balay             ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr);
777*a30f8f8cSSatish Balay           }
778*a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE)
779*a30f8f8cSSatish Balay           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
780*a30f8f8cSSatish Balay           data --;
781*a30f8f8cSSatish Balay #else
782*a30f8f8cSSatish Balay           data = baij->colmap[idxn[j]/bs]-1;
783*a30f8f8cSSatish Balay #endif
784*a30f8f8cSSatish Balay           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
785*a30f8f8cSSatish Balay           else {
786*a30f8f8cSSatish Balay             col  = data + idxn[j]%bs;
787*a30f8f8cSSatish Balay             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
788*a30f8f8cSSatish Balay           }
789*a30f8f8cSSatish Balay         }
790*a30f8f8cSSatish Balay       }
791*a30f8f8cSSatish Balay     } else {
792*a30f8f8cSSatish Balay       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
793*a30f8f8cSSatish Balay     }
794*a30f8f8cSSatish Balay   }
795*a30f8f8cSSatish Balay  PetscFunctionReturn(0);
796*a30f8f8cSSatish Balay }
797*a30f8f8cSSatish Balay 
798*a30f8f8cSSatish Balay #undef __FUNC__
799*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatNorm_MPISBAIJ"
800*a30f8f8cSSatish Balay int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
801*a30f8f8cSSatish Balay {
802*a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
803*a30f8f8cSSatish Balay   Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data;
804*a30f8f8cSSatish Balay   Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ*)baij->B->data;
805*a30f8f8cSSatish Balay   int        ierr;
806*a30f8f8cSSatish Balay   PetscReal  sum[2],*lnorm2;
807*a30f8f8cSSatish Balay 
808*a30f8f8cSSatish Balay   PetscFunctionBegin;
809*a30f8f8cSSatish Balay   if (baij->size == 1) {
810*a30f8f8cSSatish Balay     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
811*a30f8f8cSSatish Balay   } else {
812*a30f8f8cSSatish Balay     if (type == NORM_FROBENIUS) {
813*a30f8f8cSSatish Balay       lnorm2 = (Scalar*)PetscMalloc(2*sizeof(Scalar));CHKPTRQ(lnorm2);
814*a30f8f8cSSatish Balay       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
815*a30f8f8cSSatish Balay       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
816*a30f8f8cSSatish Balay       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
817*a30f8f8cSSatish Balay       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
818*a30f8f8cSSatish Balay       /*
819*a30f8f8cSSatish Balay       ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
820*a30f8f8cSSatish Balay       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]);
821*a30f8f8cSSatish Balay       */
822*a30f8f8cSSatish Balay       ierr = MPI_Allreduce(lnorm2,&sum,2,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
823*a30f8f8cSSatish Balay       /*
824*a30f8f8cSSatish Balay       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]);
825*a30f8f8cSSatish Balay       PetscSynchronizedFlush(PETSC_COMM_WORLD); */
826*a30f8f8cSSatish Balay 
827*a30f8f8cSSatish Balay       *norm = sqrt(sum[0] + 2*sum[1]);
828*a30f8f8cSSatish Balay       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
829*a30f8f8cSSatish Balay     } else {
830*a30f8f8cSSatish Balay       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
831*a30f8f8cSSatish Balay     }
832*a30f8f8cSSatish Balay   }
833*a30f8f8cSSatish Balay   PetscFunctionReturn(0);
834*a30f8f8cSSatish Balay }
835*a30f8f8cSSatish Balay 
836*a30f8f8cSSatish Balay /*
837*a30f8f8cSSatish Balay   Creates the hash table, and sets the table
838*a30f8f8cSSatish Balay   This table is created only once.
839*a30f8f8cSSatish Balay   If new entried need to be added to the matrix
840*a30f8f8cSSatish Balay   then the hash table has to be destroyed and
841*a30f8f8cSSatish Balay   recreated.
842*a30f8f8cSSatish Balay */
843*a30f8f8cSSatish Balay #undef __FUNC__
844*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatCreateHashTable_MPISBAIJ_Private"
845*a30f8f8cSSatish Balay int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor)
846*a30f8f8cSSatish Balay {
847*a30f8f8cSSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
848*a30f8f8cSSatish Balay   Mat         A = baij->A,B=baij->B;
849*a30f8f8cSSatish Balay   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
850*a30f8f8cSSatish Balay   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
851*a30f8f8cSSatish Balay   int         size,bs2=baij->bs2,rstart=baij->rstart,ierr;
852*a30f8f8cSSatish Balay   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
853*a30f8f8cSSatish Balay   int         *HT,key;
854*a30f8f8cSSatish Balay   MatScalar   **HD;
855*a30f8f8cSSatish Balay   PetscReal   tmp;
856*a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
857*a30f8f8cSSatish Balay   int         ct=0,max=0;
858*a30f8f8cSSatish Balay #endif
859*a30f8f8cSSatish Balay 
860*a30f8f8cSSatish Balay   PetscFunctionBegin;
861*a30f8f8cSSatish Balay   baij->ht_size=(int)(factor*nz);
862*a30f8f8cSSatish Balay   size = baij->ht_size;
863*a30f8f8cSSatish Balay 
864*a30f8f8cSSatish Balay   if (baij->ht) {
865*a30f8f8cSSatish Balay     PetscFunctionReturn(0);
866*a30f8f8cSSatish Balay   }
867*a30f8f8cSSatish Balay 
868*a30f8f8cSSatish Balay   /* Allocate Memory for Hash Table */
869*a30f8f8cSSatish Balay   baij->hd = (MatScalar**)PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1);CHKPTRQ(baij->hd);
870*a30f8f8cSSatish Balay   baij->ht = (int*)(baij->hd + size);
871*a30f8f8cSSatish Balay   HD = baij->hd;
872*a30f8f8cSSatish Balay   HT = baij->ht;
873*a30f8f8cSSatish Balay 
874*a30f8f8cSSatish Balay 
875*a30f8f8cSSatish Balay   ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr);
876*a30f8f8cSSatish Balay 
877*a30f8f8cSSatish Balay 
878*a30f8f8cSSatish Balay   /* Loop Over A */
879*a30f8f8cSSatish Balay   for (i=0; i<a->mbs; i++) {
880*a30f8f8cSSatish Balay     for (j=ai[i]; j<ai[i+1]; j++) {
881*a30f8f8cSSatish Balay       row = i+rstart;
882*a30f8f8cSSatish Balay       col = aj[j]+cstart;
883*a30f8f8cSSatish Balay 
884*a30f8f8cSSatish Balay       key = row*Nbs + col + 1;
885*a30f8f8cSSatish Balay       h1  = HASH(size,key,tmp);
886*a30f8f8cSSatish Balay       for (k=0; k<size; k++){
887*a30f8f8cSSatish Balay         if (HT[(h1+k)%size] == 0.0) {
888*a30f8f8cSSatish Balay           HT[(h1+k)%size] = key;
889*a30f8f8cSSatish Balay           HD[(h1+k)%size] = a->a + j*bs2;
890*a30f8f8cSSatish Balay           break;
891*a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
892*a30f8f8cSSatish Balay         } else {
893*a30f8f8cSSatish Balay           ct++;
894*a30f8f8cSSatish Balay #endif
895*a30f8f8cSSatish Balay         }
896*a30f8f8cSSatish Balay       }
897*a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
898*a30f8f8cSSatish Balay       if (k> max) max = k;
899*a30f8f8cSSatish Balay #endif
900*a30f8f8cSSatish Balay     }
901*a30f8f8cSSatish Balay   }
902*a30f8f8cSSatish Balay   /* Loop Over B */
903*a30f8f8cSSatish Balay   for (i=0; i<b->mbs; i++) {
904*a30f8f8cSSatish Balay     for (j=bi[i]; j<bi[i+1]; j++) {
905*a30f8f8cSSatish Balay       row = i+rstart;
906*a30f8f8cSSatish Balay       col = garray[bj[j]];
907*a30f8f8cSSatish Balay       key = row*Nbs + col + 1;
908*a30f8f8cSSatish Balay       h1  = HASH(size,key,tmp);
909*a30f8f8cSSatish Balay       for (k=0; k<size; k++){
910*a30f8f8cSSatish Balay         if (HT[(h1+k)%size] == 0.0) {
911*a30f8f8cSSatish Balay           HT[(h1+k)%size] = key;
912*a30f8f8cSSatish Balay           HD[(h1+k)%size] = b->a + j*bs2;
913*a30f8f8cSSatish Balay           break;
914*a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
915*a30f8f8cSSatish Balay         } else {
916*a30f8f8cSSatish Balay           ct++;
917*a30f8f8cSSatish Balay #endif
918*a30f8f8cSSatish Balay         }
919*a30f8f8cSSatish Balay       }
920*a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
921*a30f8f8cSSatish Balay       if (k> max) max = k;
922*a30f8f8cSSatish Balay #endif
923*a30f8f8cSSatish Balay     }
924*a30f8f8cSSatish Balay   }
925*a30f8f8cSSatish Balay 
926*a30f8f8cSSatish Balay   /* Print Summary */
927*a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
928*a30f8f8cSSatish Balay   for (i=0,j=0; i<size; i++) {
929*a30f8f8cSSatish Balay     if (HT[i]) {j++;}
930*a30f8f8cSSatish Balay   }
931*a30f8f8cSSatish Balay   PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max);
932*a30f8f8cSSatish Balay #endif
933*a30f8f8cSSatish Balay   PetscFunctionReturn(0);
934*a30f8f8cSSatish Balay }
935*a30f8f8cSSatish Balay 
936*a30f8f8cSSatish Balay #undef __FUNC__
937*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatAssemblyBegin_MPISBAIJ"
938*a30f8f8cSSatish Balay int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
939*a30f8f8cSSatish Balay {
940*a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
941*a30f8f8cSSatish Balay   int         ierr,nstash,reallocs;
942*a30f8f8cSSatish Balay   InsertMode  addv;
943*a30f8f8cSSatish Balay 
944*a30f8f8cSSatish Balay   PetscFunctionBegin;
945*a30f8f8cSSatish Balay   if (baij->donotstash) {
946*a30f8f8cSSatish Balay     PetscFunctionReturn(0);
947*a30f8f8cSSatish Balay   }
948*a30f8f8cSSatish Balay 
949*a30f8f8cSSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
950*a30f8f8cSSatish Balay   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
951*a30f8f8cSSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
952*a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
953*a30f8f8cSSatish Balay   }
954*a30f8f8cSSatish Balay   mat->insertmode = addv; /* in case this processor had no cache */
955*a30f8f8cSSatish Balay 
956*a30f8f8cSSatish Balay   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
957*a30f8f8cSSatish Balay   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
958*a30f8f8cSSatish Balay   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
959*a30f8f8cSSatish Balay   PLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
960*a30f8f8cSSatish Balay   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
961*a30f8f8cSSatish Balay   PLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
962*a30f8f8cSSatish Balay   PetscFunctionReturn(0);
963*a30f8f8cSSatish Balay }
964*a30f8f8cSSatish Balay 
965*a30f8f8cSSatish Balay #undef __FUNC__
966*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatAssemblyEnd_MPISBAIJ"
967*a30f8f8cSSatish Balay int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
968*a30f8f8cSSatish Balay {
969*a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data;
970*a30f8f8cSSatish Balay   Mat_SeqSBAIJ  *a=(Mat_SeqSBAIJ*)baij->A->data;
971*a30f8f8cSSatish Balay   Mat_SeqBAIJ  *b=(Mat_SeqBAIJ*)baij->B->data;
972*a30f8f8cSSatish Balay   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
973*a30f8f8cSSatish Balay   int         *row,*col,other_disassembled;
974*a30f8f8cSSatish Balay   PetscTruth  r1,r2,r3;
975*a30f8f8cSSatish Balay   MatScalar   *val;
976*a30f8f8cSSatish Balay   InsertMode  addv = mat->insertmode;
977*a30f8f8cSSatish Balay   int         rank;
978*a30f8f8cSSatish Balay 
979*a30f8f8cSSatish Balay   PetscFunctionBegin;
980*a30f8f8cSSatish Balay   /* remove 2 line below later */
981*a30f8f8cSSatish Balay   ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr);
982*a30f8f8cSSatish Balay 
983*a30f8f8cSSatish Balay   if (!baij->donotstash) {
984*a30f8f8cSSatish Balay     while (1) {
985*a30f8f8cSSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
986*a30f8f8cSSatish Balay       /*
987*a30f8f8cSSatish Balay       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg);
988*a30f8f8cSSatish Balay       PetscSynchronizedFlush(PETSC_COMM_WORLD);
989*a30f8f8cSSatish Balay       */
990*a30f8f8cSSatish Balay       if (!flg) break;
991*a30f8f8cSSatish Balay 
992*a30f8f8cSSatish Balay       for (i=0; i<n;) {
993*a30f8f8cSSatish Balay         /* Now identify the consecutive vals belonging to the same row */
994*a30f8f8cSSatish Balay         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
995*a30f8f8cSSatish Balay         if (j < n) ncols = j-i;
996*a30f8f8cSSatish Balay         else       ncols = n-i;
997*a30f8f8cSSatish Balay         /* Now assemble all these values with a single function call */
998*a30f8f8cSSatish Balay         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
999*a30f8f8cSSatish Balay         i = j;
1000*a30f8f8cSSatish Balay       }
1001*a30f8f8cSSatish Balay     }
1002*a30f8f8cSSatish Balay     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
1003*a30f8f8cSSatish Balay     /* Now process the block-stash. Since the values are stashed column-oriented,
1004*a30f8f8cSSatish Balay        set the roworiented flag to column oriented, and after MatSetValues()
1005*a30f8f8cSSatish Balay        restore the original flags */
1006*a30f8f8cSSatish Balay     r1 = baij->roworiented;
1007*a30f8f8cSSatish Balay     r2 = a->roworiented;
1008*a30f8f8cSSatish Balay     r3 = b->roworiented;
1009*a30f8f8cSSatish Balay     baij->roworiented = PETSC_FALSE;
1010*a30f8f8cSSatish Balay     a->roworiented    = PETSC_FALSE;
1011*a30f8f8cSSatish Balay     b->roworiented    = PETSC_FALSE;
1012*a30f8f8cSSatish Balay     while (1) {
1013*a30f8f8cSSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1014*a30f8f8cSSatish Balay       if (!flg) break;
1015*a30f8f8cSSatish Balay 
1016*a30f8f8cSSatish Balay       for (i=0; i<n;) {
1017*a30f8f8cSSatish Balay         /* Now identify the consecutive vals belonging to the same row */
1018*a30f8f8cSSatish Balay         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1019*a30f8f8cSSatish Balay         if (j < n) ncols = j-i;
1020*a30f8f8cSSatish Balay         else       ncols = n-i;
1021*a30f8f8cSSatish Balay         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
1022*a30f8f8cSSatish Balay         i = j;
1023*a30f8f8cSSatish Balay       }
1024*a30f8f8cSSatish Balay     }
1025*a30f8f8cSSatish Balay     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
1026*a30f8f8cSSatish Balay     baij->roworiented = r1;
1027*a30f8f8cSSatish Balay     a->roworiented    = r2;
1028*a30f8f8cSSatish Balay     b->roworiented    = r3;
1029*a30f8f8cSSatish Balay   }
1030*a30f8f8cSSatish Balay 
1031*a30f8f8cSSatish Balay   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
1032*a30f8f8cSSatish Balay   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
1033*a30f8f8cSSatish Balay 
1034*a30f8f8cSSatish Balay   /* determine if any processor has disassembled, if so we must
1035*a30f8f8cSSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
1036*a30f8f8cSSatish Balay   /*
1037*a30f8f8cSSatish Balay      if nonzero structure of submatrix B cannot change then we know that
1038*a30f8f8cSSatish Balay      no processor disassembled thus we can skip this stuff
1039*a30f8f8cSSatish Balay   */
1040*a30f8f8cSSatish Balay   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
1041*a30f8f8cSSatish Balay     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
1042*a30f8f8cSSatish Balay     if (mat->was_assembled && !other_disassembled) {
1043*a30f8f8cSSatish Balay       ierr = DisAssemble_MPIBAIJ(mat);CHKERRQ(ierr);
1044*a30f8f8cSSatish Balay     }
1045*a30f8f8cSSatish Balay   }
1046*a30f8f8cSSatish Balay 
1047*a30f8f8cSSatish Balay   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1048*a30f8f8cSSatish Balay     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr);
1049*a30f8f8cSSatish Balay   }
1050*a30f8f8cSSatish Balay   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1051*a30f8f8cSSatish Balay   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1052*a30f8f8cSSatish Balay 
1053*a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
1054*a30f8f8cSSatish Balay   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1055*a30f8f8cSSatish Balay     PLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((double)baij->ht_total_ct)/baij->ht_insert_ct);
1056*a30f8f8cSSatish Balay     baij->ht_total_ct  = 0;
1057*a30f8f8cSSatish Balay     baij->ht_insert_ct = 0;
1058*a30f8f8cSSatish Balay   }
1059*a30f8f8cSSatish Balay #endif
1060*a30f8f8cSSatish Balay   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1061*a30f8f8cSSatish Balay     ierr = MatCreateHashTable_MPIBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1062*a30f8f8cSSatish Balay     mat->ops->setvalues        = MatSetValues_MPISBAIJ_HT;
1063*a30f8f8cSSatish Balay     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT;
1064*a30f8f8cSSatish Balay   }
1065*a30f8f8cSSatish Balay 
1066*a30f8f8cSSatish Balay   if (baij->rowvalues) {
1067*a30f8f8cSSatish Balay     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1068*a30f8f8cSSatish Balay     baij->rowvalues = 0;
1069*a30f8f8cSSatish Balay   }
1070*a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1071*a30f8f8cSSatish Balay }
1072*a30f8f8cSSatish Balay 
1073*a30f8f8cSSatish Balay #undef __FUNC__
1074*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatView_MPISBAIJ_ASCIIorDraworSocket"
1075*a30f8f8cSSatish Balay static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
1076*a30f8f8cSSatish Balay {
1077*a30f8f8cSSatish Balay   Mat_MPISBAIJ  *baij = (Mat_MPISBAIJ*)mat->data;
1078*a30f8f8cSSatish Balay   int          ierr,format,bs = baij->bs,size = baij->size,rank = baij->rank;
1079*a30f8f8cSSatish Balay   PetscTruth   isascii,isdraw;
1080*a30f8f8cSSatish Balay 
1081*a30f8f8cSSatish Balay   PetscFunctionBegin;
1082*a30f8f8cSSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
1083*a30f8f8cSSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
1084*a30f8f8cSSatish Balay   if (isascii) {
1085*a30f8f8cSSatish Balay     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1086*a30f8f8cSSatish Balay     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
1087*a30f8f8cSSatish Balay       MatInfo info;
1088*a30f8f8cSSatish Balay       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
1089*a30f8f8cSSatish Balay       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1090*a30f8f8cSSatish Balay       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
1091*a30f8f8cSSatish Balay               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
1092*a30f8f8cSSatish Balay               baij->bs,(int)info.memory);CHKERRQ(ierr);
1093*a30f8f8cSSatish Balay       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1094*a30f8f8cSSatish Balay       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1095*a30f8f8cSSatish Balay       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1096*a30f8f8cSSatish Balay       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1097*a30f8f8cSSatish Balay       ierr = ViewerFlush(viewer);CHKERRQ(ierr);
1098*a30f8f8cSSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1099*a30f8f8cSSatish Balay       PetscFunctionReturn(0);
1100*a30f8f8cSSatish Balay     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
1101*a30f8f8cSSatish Balay       ierr = ViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
1102*a30f8f8cSSatish Balay       PetscFunctionReturn(0);
1103*a30f8f8cSSatish Balay     }
1104*a30f8f8cSSatish Balay   }
1105*a30f8f8cSSatish Balay 
1106*a30f8f8cSSatish Balay   if (isdraw) {
1107*a30f8f8cSSatish Balay     Draw       draw;
1108*a30f8f8cSSatish Balay     PetscTruth isnull;
1109*a30f8f8cSSatish Balay     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1110*a30f8f8cSSatish Balay     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1111*a30f8f8cSSatish Balay   }
1112*a30f8f8cSSatish Balay 
1113*a30f8f8cSSatish Balay   if (size == 1) {
1114*a30f8f8cSSatish Balay     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1115*a30f8f8cSSatish Balay   } else {
1116*a30f8f8cSSatish Balay     /* assemble the entire matrix onto first processor. */
1117*a30f8f8cSSatish Balay     Mat         A;
1118*a30f8f8cSSatish Balay     Mat_SeqBAIJ *Aloc;
1119*a30f8f8cSSatish Balay     int         M = baij->M,N = baij->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1120*a30f8f8cSSatish Balay     MatScalar   *a;
1121*a30f8f8cSSatish Balay 
1122*a30f8f8cSSatish Balay     if (!rank) {
1123*a30f8f8cSSatish Balay       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,M,M,M,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1124*a30f8f8cSSatish Balay     } else {
1125*a30f8f8cSSatish Balay       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,M,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1126*a30f8f8cSSatish Balay     }
1127*a30f8f8cSSatish Balay     PLogObjectParent(mat,A);
1128*a30f8f8cSSatish Balay 
1129*a30f8f8cSSatish Balay     /* copy over the A part */
1130*a30f8f8cSSatish Balay     Aloc  = (Mat_SeqBAIJ*)baij->A->data;
1131*a30f8f8cSSatish Balay     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
1132*a30f8f8cSSatish Balay     rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
1133*a30f8f8cSSatish Balay 
1134*a30f8f8cSSatish Balay     for (i=0; i<mbs; i++) {
1135*a30f8f8cSSatish Balay       rvals[0] = bs*(baij->rstart + i);
1136*a30f8f8cSSatish Balay       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1137*a30f8f8cSSatish Balay       for (j=ai[i]; j<ai[i+1]; j++) {
1138*a30f8f8cSSatish Balay         col = (baij->cstart+aj[j])*bs;
1139*a30f8f8cSSatish Balay         for (k=0; k<bs; k++) {
1140*a30f8f8cSSatish Balay           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1141*a30f8f8cSSatish Balay           col++; a += bs;
1142*a30f8f8cSSatish Balay         }
1143*a30f8f8cSSatish Balay       }
1144*a30f8f8cSSatish Balay     }
1145*a30f8f8cSSatish Balay     /* copy over the B part */
1146*a30f8f8cSSatish Balay     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1147*a30f8f8cSSatish Balay     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1148*a30f8f8cSSatish Balay     for (i=0; i<mbs; i++) {
1149*a30f8f8cSSatish Balay       rvals[0] = bs*(baij->rstart + i);
1150*a30f8f8cSSatish Balay       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1151*a30f8f8cSSatish Balay       for (j=ai[i]; j<ai[i+1]; j++) {
1152*a30f8f8cSSatish Balay         col = baij->garray[aj[j]]*bs;
1153*a30f8f8cSSatish Balay         for (k=0; k<bs; k++) {
1154*a30f8f8cSSatish Balay           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1155*a30f8f8cSSatish Balay           col++; a += bs;
1156*a30f8f8cSSatish Balay         }
1157*a30f8f8cSSatish Balay       }
1158*a30f8f8cSSatish Balay     }
1159*a30f8f8cSSatish Balay     ierr = PetscFree(rvals);CHKERRQ(ierr);
1160*a30f8f8cSSatish Balay     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1161*a30f8f8cSSatish Balay     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1162*a30f8f8cSSatish Balay     /*
1163*a30f8f8cSSatish Balay        Everyone has to call to draw the matrix since the graphics waits are
1164*a30f8f8cSSatish Balay        synchronized across all processors that share the Draw object
1165*a30f8f8cSSatish Balay     */
1166*a30f8f8cSSatish Balay     if (!rank) {
1167*a30f8f8cSSatish Balay       Viewer sviewer;
1168*a30f8f8cSSatish Balay       ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1169*a30f8f8cSSatish Balay       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1170*a30f8f8cSSatish Balay       ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1171*a30f8f8cSSatish Balay     }
1172*a30f8f8cSSatish Balay     ierr = MatDestroy(A);CHKERRQ(ierr);
1173*a30f8f8cSSatish Balay   }
1174*a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1175*a30f8f8cSSatish Balay }
1176*a30f8f8cSSatish Balay 
1177*a30f8f8cSSatish Balay #undef __FUNC__
1178*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatView_MPISBAIJ"
1179*a30f8f8cSSatish Balay int MatView_MPISBAIJ(Mat mat,Viewer viewer)
1180*a30f8f8cSSatish Balay {
1181*a30f8f8cSSatish Balay   int        ierr;
1182*a30f8f8cSSatish Balay   PetscTruth isascii,isdraw,issocket,isbinary;
1183*a30f8f8cSSatish Balay 
1184*a30f8f8cSSatish Balay   PetscFunctionBegin;
1185*a30f8f8cSSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
1186*a30f8f8cSSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
1187*a30f8f8cSSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr);
1188*a30f8f8cSSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
1189*a30f8f8cSSatish Balay   if (isascii || isdraw || issocket || isbinary) {
1190*a30f8f8cSSatish Balay     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1191*a30f8f8cSSatish Balay   } else {
1192*a30f8f8cSSatish Balay     SETERRQ1(1,1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
1193*a30f8f8cSSatish Balay   }
1194*a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1195*a30f8f8cSSatish Balay }
1196*a30f8f8cSSatish Balay 
1197*a30f8f8cSSatish Balay #undef __FUNC__
1198*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatDestroy_MPISBAIJ"
1199*a30f8f8cSSatish Balay int MatDestroy_MPISBAIJ(Mat mat)
1200*a30f8f8cSSatish Balay {
1201*a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1202*a30f8f8cSSatish Balay   int         ierr;
1203*a30f8f8cSSatish Balay 
1204*a30f8f8cSSatish Balay   PetscFunctionBegin;
1205*a30f8f8cSSatish Balay 
1206*a30f8f8cSSatish Balay   if (mat->mapping) {
1207*a30f8f8cSSatish Balay     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
1208*a30f8f8cSSatish Balay   }
1209*a30f8f8cSSatish Balay   if (mat->bmapping) {
1210*a30f8f8cSSatish Balay     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
1211*a30f8f8cSSatish Balay   }
1212*a30f8f8cSSatish Balay   if (mat->rmap) {
1213*a30f8f8cSSatish Balay     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
1214*a30f8f8cSSatish Balay   }
1215*a30f8f8cSSatish Balay   if (mat->cmap) {
1216*a30f8f8cSSatish Balay     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
1217*a30f8f8cSSatish Balay   }
1218*a30f8f8cSSatish Balay #if defined(PETSC_USE_LOG)
1219*a30f8f8cSSatish Balay   PLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",baij->M,baij->N);
1220*a30f8f8cSSatish Balay #endif
1221*a30f8f8cSSatish Balay 
1222*a30f8f8cSSatish Balay   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1223*a30f8f8cSSatish Balay   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1224*a30f8f8cSSatish Balay 
1225*a30f8f8cSSatish Balay   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1226*a30f8f8cSSatish Balay   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1227*a30f8f8cSSatish Balay   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1228*a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE)
1229*a30f8f8cSSatish Balay   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1230*a30f8f8cSSatish Balay #else
1231*a30f8f8cSSatish Balay   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1232*a30f8f8cSSatish Balay #endif
1233*a30f8f8cSSatish Balay   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1234*a30f8f8cSSatish Balay   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1235*a30f8f8cSSatish Balay   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1236*a30f8f8cSSatish Balay   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1237*a30f8f8cSSatish Balay   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1238*a30f8f8cSSatish Balay   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
1239*a30f8f8cSSatish Balay #if defined(PETSC_USE_MAT_SINGLE)
1240*a30f8f8cSSatish Balay   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
1241*a30f8f8cSSatish Balay #endif
1242*a30f8f8cSSatish Balay   ierr = PetscFree(baij);CHKERRQ(ierr);
1243*a30f8f8cSSatish Balay   PLogObjectDestroy(mat);
1244*a30f8f8cSSatish Balay   PetscHeaderDestroy(mat);
1245*a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1246*a30f8f8cSSatish Balay }
1247*a30f8f8cSSatish Balay 
1248*a30f8f8cSSatish Balay #undef __FUNC__
1249*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatMult_MPISBAIJ"
1250*a30f8f8cSSatish Balay int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
1251*a30f8f8cSSatish Balay {
1252*a30f8f8cSSatish Balay   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1253*a30f8f8cSSatish Balay   int         ierr,nt;
1254*a30f8f8cSSatish Balay 
1255*a30f8f8cSSatish Balay   PetscFunctionBegin;
1256*a30f8f8cSSatish Balay   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1257*a30f8f8cSSatish Balay   if (nt != a->n) {
1258*a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
1259*a30f8f8cSSatish Balay   }
1260*a30f8f8cSSatish Balay   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1261*a30f8f8cSSatish Balay   if (nt != a->m) {
1262*a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy");
1263*a30f8f8cSSatish Balay   }
1264*a30f8f8cSSatish Balay   /* do subdiagonal part */
1265*a30f8f8cSSatish Balay   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1266*a30f8f8cSSatish Balay   /* send it on its way */
1267*a30f8f8cSSatish Balay   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1268*a30f8f8cSSatish Balay   /* do local part */
1269*a30f8f8cSSatish Balay   ierr = (*a->B->ops->mult)(a->B,xx,yy);CHKERRQ(ierr);
1270*a30f8f8cSSatish Balay   ierr = (*a->A->ops->multadd)(a->A,xx,yy,yy);CHKERRQ(ierr);
1271*a30f8f8cSSatish Balay   /* receive remote parts */
1272*a30f8f8cSSatish Balay   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1273*a30f8f8cSSatish Balay 
1274*a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1275*a30f8f8cSSatish Balay }
1276*a30f8f8cSSatish Balay 
1277*a30f8f8cSSatish Balay #undef __FUNC__
1278*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatMultAdd_MPISBAIJ"
1279*a30f8f8cSSatish Balay int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1280*a30f8f8cSSatish Balay {
1281*a30f8f8cSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1282*a30f8f8cSSatish Balay   int        ierr;
1283*a30f8f8cSSatish Balay 
1284*a30f8f8cSSatish Balay   PetscFunctionBegin;
1285*a30f8f8cSSatish Balay   /* do subdiagonal part */
1286*a30f8f8cSSatish Balay   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1287*a30f8f8cSSatish Balay   /* send it on its way */
1288*a30f8f8cSSatish Balay   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1289*a30f8f8cSSatish Balay   /* do local part */
1290*a30f8f8cSSatish Balay   ierr = (*a->B->ops->multadd)(a->B,xx,yy,zz);CHKERRQ(ierr);
1291*a30f8f8cSSatish Balay   ierr = (*a->A->ops->multadd)(a->A,xx,zz,zz);CHKERRQ(ierr);
1292*a30f8f8cSSatish Balay   /* receive remote parts */
1293*a30f8f8cSSatish Balay   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1294*a30f8f8cSSatish Balay 
1295*a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1296*a30f8f8cSSatish Balay }
1297*a30f8f8cSSatish Balay 
1298*a30f8f8cSSatish Balay #undef __FUNC__
1299*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatMultTranspose_MPISBAIJ"
1300*a30f8f8cSSatish Balay int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
1301*a30f8f8cSSatish Balay {
1302*a30f8f8cSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1303*a30f8f8cSSatish Balay   int         ierr;
1304*a30f8f8cSSatish Balay 
1305*a30f8f8cSSatish Balay   PetscFunctionBegin;
1306*a30f8f8cSSatish Balay   /* do nondiagonal part */
1307*a30f8f8cSSatish Balay   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1308*a30f8f8cSSatish Balay   /* send it on its way */
1309*a30f8f8cSSatish Balay   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1310*a30f8f8cSSatish Balay   /* do local part */
1311*a30f8f8cSSatish Balay   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1312*a30f8f8cSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
1313*a30f8f8cSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
1314*a30f8f8cSSatish Balay   /* but is not perhaps always true. */
1315*a30f8f8cSSatish Balay   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1316*a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1317*a30f8f8cSSatish Balay }
1318*a30f8f8cSSatish Balay 
1319*a30f8f8cSSatish Balay #undef __FUNC__
1320*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatMultTransposeAdd_MPISBAIJ"
1321*a30f8f8cSSatish Balay int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1322*a30f8f8cSSatish Balay {
1323*a30f8f8cSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1324*a30f8f8cSSatish Balay   int         ierr;
1325*a30f8f8cSSatish Balay 
1326*a30f8f8cSSatish Balay   PetscFunctionBegin;
1327*a30f8f8cSSatish Balay   /* do nondiagonal part */
1328*a30f8f8cSSatish Balay   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1329*a30f8f8cSSatish Balay   /* send it on its way */
1330*a30f8f8cSSatish Balay   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1331*a30f8f8cSSatish Balay   /* do local part */
1332*a30f8f8cSSatish Balay   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1333*a30f8f8cSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
1334*a30f8f8cSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
1335*a30f8f8cSSatish Balay   /* but is not perhaps always true. */
1336*a30f8f8cSSatish Balay   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1337*a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1338*a30f8f8cSSatish Balay }
1339*a30f8f8cSSatish Balay 
1340*a30f8f8cSSatish Balay /*
1341*a30f8f8cSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
1342*a30f8f8cSSatish Balay    diagonal block
1343*a30f8f8cSSatish Balay */
1344*a30f8f8cSSatish Balay #undef __FUNC__
1345*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonal_MPISBAIJ"
1346*a30f8f8cSSatish Balay int MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1347*a30f8f8cSSatish Balay {
1348*a30f8f8cSSatish Balay   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1349*a30f8f8cSSatish Balay   int         ierr;
1350*a30f8f8cSSatish Balay 
1351*a30f8f8cSSatish Balay   PetscFunctionBegin;
1352*a30f8f8cSSatish Balay   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); */
1353*a30f8f8cSSatish Balay   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1354*a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1355*a30f8f8cSSatish Balay }
1356*a30f8f8cSSatish Balay 
1357*a30f8f8cSSatish Balay #undef __FUNC__
1358*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatScale_MPISBAIJ"
1359*a30f8f8cSSatish Balay int MatScale_MPISBAIJ(Scalar *aa,Mat A)
1360*a30f8f8cSSatish Balay {
1361*a30f8f8cSSatish Balay   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1362*a30f8f8cSSatish Balay   int         ierr;
1363*a30f8f8cSSatish Balay 
1364*a30f8f8cSSatish Balay   PetscFunctionBegin;
1365*a30f8f8cSSatish Balay   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1366*a30f8f8cSSatish Balay   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1367*a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1368*a30f8f8cSSatish Balay }
1369*a30f8f8cSSatish Balay 
1370*a30f8f8cSSatish Balay #undef __FUNC__
1371*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatGetSize_MPISBAIJ"
1372*a30f8f8cSSatish Balay int MatGetSize_MPISBAIJ(Mat matin,int *m,int *n)
1373*a30f8f8cSSatish Balay {
1374*a30f8f8cSSatish Balay   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;
1375*a30f8f8cSSatish Balay 
1376*a30f8f8cSSatish Balay   PetscFunctionBegin;
1377*a30f8f8cSSatish Balay   if (m) *m = mat->M;
1378*a30f8f8cSSatish Balay   if (n) *n = mat->N;
1379*a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1380*a30f8f8cSSatish Balay }
1381*a30f8f8cSSatish Balay 
1382*a30f8f8cSSatish Balay #undef __FUNC__
1383*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatGetLocalSize_MPISBAIJ"
1384*a30f8f8cSSatish Balay int MatGetLocalSize_MPISBAIJ(Mat matin,int *m,int *n)
1385*a30f8f8cSSatish Balay {
1386*a30f8f8cSSatish Balay   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;
1387*a30f8f8cSSatish Balay 
1388*a30f8f8cSSatish Balay   PetscFunctionBegin;
1389*a30f8f8cSSatish Balay   *m = mat->m; *n = mat->n;
1390*a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1391*a30f8f8cSSatish Balay }
1392*a30f8f8cSSatish Balay 
1393*a30f8f8cSSatish Balay #undef __FUNC__
1394*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatGetOwnershipRange_MPISBAIJ"
1395*a30f8f8cSSatish Balay int MatGetOwnershipRange_MPISBAIJ(Mat matin,int *m,int *n)
1396*a30f8f8cSSatish Balay {
1397*a30f8f8cSSatish Balay   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;
1398*a30f8f8cSSatish Balay 
1399*a30f8f8cSSatish Balay   PetscFunctionBegin;
1400*a30f8f8cSSatish Balay   if (m) *m = mat->rstart*mat->bs;
1401*a30f8f8cSSatish Balay   if (n) *n = mat->rend*mat->bs;
1402*a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1403*a30f8f8cSSatish Balay }
1404*a30f8f8cSSatish Balay 
1405*a30f8f8cSSatish Balay #undef __FUNC__
1406*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatGetRow_MPISBAIJ"
1407*a30f8f8cSSatish Balay int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1408*a30f8f8cSSatish Balay {
1409*a30f8f8cSSatish Balay   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;
1410*a30f8f8cSSatish Balay   Scalar     *vworkA,*vworkB,**pvA,**pvB,*v_p;
1411*a30f8f8cSSatish Balay   int        bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1412*a30f8f8cSSatish Balay   int        nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1413*a30f8f8cSSatish Balay   int        *cmap,*idx_p,cstart = mat->cstart;
1414*a30f8f8cSSatish Balay 
1415*a30f8f8cSSatish Balay   PetscFunctionBegin;
1416*a30f8f8cSSatish Balay   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1417*a30f8f8cSSatish Balay   mat->getrowactive = PETSC_TRUE;
1418*a30f8f8cSSatish Balay 
1419*a30f8f8cSSatish Balay   if (!mat->rowvalues && (idx || v)) {
1420*a30f8f8cSSatish Balay     /*
1421*a30f8f8cSSatish Balay         allocate enough space to hold information from the longest row.
1422*a30f8f8cSSatish Balay     */
1423*a30f8f8cSSatish Balay     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1424*a30f8f8cSSatish Balay     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1425*a30f8f8cSSatish Balay     int     max = 1,mbs = mat->mbs,tmp;
1426*a30f8f8cSSatish Balay     for (i=0; i<mbs; i++) {
1427*a30f8f8cSSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1428*a30f8f8cSSatish Balay       if (max < tmp) { max = tmp; }
1429*a30f8f8cSSatish Balay     }
1430*a30f8f8cSSatish Balay     mat->rowvalues = (Scalar*)PetscMalloc(max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1431*a30f8f8cSSatish Balay     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1432*a30f8f8cSSatish Balay   }
1433*a30f8f8cSSatish Balay 
1434*a30f8f8cSSatish Balay   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows")
1435*a30f8f8cSSatish Balay   lrow = row - brstart;  /* local row index */
1436*a30f8f8cSSatish Balay 
1437*a30f8f8cSSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1438*a30f8f8cSSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
1439*a30f8f8cSSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1440*a30f8f8cSSatish Balay   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1441*a30f8f8cSSatish Balay   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1442*a30f8f8cSSatish Balay   nztot = nzA + nzB;
1443*a30f8f8cSSatish Balay 
1444*a30f8f8cSSatish Balay   cmap  = mat->garray;
1445*a30f8f8cSSatish Balay   if (v  || idx) {
1446*a30f8f8cSSatish Balay     if (nztot) {
1447*a30f8f8cSSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
1448*a30f8f8cSSatish Balay       int imark = -1;
1449*a30f8f8cSSatish Balay       if (v) {
1450*a30f8f8cSSatish Balay         *v = v_p = mat->rowvalues;
1451*a30f8f8cSSatish Balay         for (i=0; i<nzB; i++) {
1452*a30f8f8cSSatish Balay           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1453*a30f8f8cSSatish Balay           else break;
1454*a30f8f8cSSatish Balay         }
1455*a30f8f8cSSatish Balay         imark = i;
1456*a30f8f8cSSatish Balay         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1457*a30f8f8cSSatish Balay         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1458*a30f8f8cSSatish Balay       }
1459*a30f8f8cSSatish Balay       if (idx) {
1460*a30f8f8cSSatish Balay         *idx = idx_p = mat->rowindices;
1461*a30f8f8cSSatish Balay         if (imark > -1) {
1462*a30f8f8cSSatish Balay           for (i=0; i<imark; i++) {
1463*a30f8f8cSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1464*a30f8f8cSSatish Balay           }
1465*a30f8f8cSSatish Balay         } else {
1466*a30f8f8cSSatish Balay           for (i=0; i<nzB; i++) {
1467*a30f8f8cSSatish Balay             if (cmap[cworkB[i]/bs] < cstart)
1468*a30f8f8cSSatish Balay               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1469*a30f8f8cSSatish Balay             else break;
1470*a30f8f8cSSatish Balay           }
1471*a30f8f8cSSatish Balay           imark = i;
1472*a30f8f8cSSatish Balay         }
1473*a30f8f8cSSatish Balay         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1474*a30f8f8cSSatish Balay         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1475*a30f8f8cSSatish Balay       }
1476*a30f8f8cSSatish Balay     } else {
1477*a30f8f8cSSatish Balay       if (idx) *idx = 0;
1478*a30f8f8cSSatish Balay       if (v)   *v   = 0;
1479*a30f8f8cSSatish Balay     }
1480*a30f8f8cSSatish Balay   }
1481*a30f8f8cSSatish Balay   *nz = nztot;
1482*a30f8f8cSSatish Balay   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1483*a30f8f8cSSatish Balay   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1484*a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1485*a30f8f8cSSatish Balay }
1486*a30f8f8cSSatish Balay 
1487*a30f8f8cSSatish Balay #undef __FUNC__
1488*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatRestoreRow_MPISBAIJ"
1489*a30f8f8cSSatish Balay int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1490*a30f8f8cSSatish Balay {
1491*a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1492*a30f8f8cSSatish Balay 
1493*a30f8f8cSSatish Balay   PetscFunctionBegin;
1494*a30f8f8cSSatish Balay   if (baij->getrowactive == PETSC_FALSE) {
1495*a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1496*a30f8f8cSSatish Balay   }
1497*a30f8f8cSSatish Balay   baij->getrowactive = PETSC_FALSE;
1498*a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1499*a30f8f8cSSatish Balay }
1500*a30f8f8cSSatish Balay 
1501*a30f8f8cSSatish Balay #undef __FUNC__
1502*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatGetBlockSize_MPISBAIJ"
1503*a30f8f8cSSatish Balay int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs)
1504*a30f8f8cSSatish Balay {
1505*a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1506*a30f8f8cSSatish Balay 
1507*a30f8f8cSSatish Balay   PetscFunctionBegin;
1508*a30f8f8cSSatish Balay   *bs = baij->bs;
1509*a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1510*a30f8f8cSSatish Balay }
1511*a30f8f8cSSatish Balay 
1512*a30f8f8cSSatish Balay #undef __FUNC__
1513*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatZeroEntries_MPISBAIJ"
1514*a30f8f8cSSatish Balay int MatZeroEntries_MPISBAIJ(Mat A)
1515*a30f8f8cSSatish Balay {
1516*a30f8f8cSSatish Balay   Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data;
1517*a30f8f8cSSatish Balay   int         ierr;
1518*a30f8f8cSSatish Balay 
1519*a30f8f8cSSatish Balay   PetscFunctionBegin;
1520*a30f8f8cSSatish Balay   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1521*a30f8f8cSSatish Balay   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1522*a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1523*a30f8f8cSSatish Balay }
1524*a30f8f8cSSatish Balay 
1525*a30f8f8cSSatish Balay #undef __FUNC__
1526*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatGetInfo_MPISBAIJ"
1527*a30f8f8cSSatish Balay int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1528*a30f8f8cSSatish Balay {
1529*a30f8f8cSSatish Balay   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data;
1530*a30f8f8cSSatish Balay   Mat         A = a->A,B = a->B;
1531*a30f8f8cSSatish Balay   int         ierr;
1532*a30f8f8cSSatish Balay   PetscReal   isend[5],irecv[5];
1533*a30f8f8cSSatish Balay 
1534*a30f8f8cSSatish Balay   PetscFunctionBegin;
1535*a30f8f8cSSatish Balay   info->block_size     = (double)a->bs;
1536*a30f8f8cSSatish Balay   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1537*a30f8f8cSSatish Balay   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1538*a30f8f8cSSatish Balay   isend[3] = info->memory;  isend[4] = info->mallocs;
1539*a30f8f8cSSatish Balay   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1540*a30f8f8cSSatish Balay   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1541*a30f8f8cSSatish Balay   isend[3] += info->memory;  isend[4] += info->mallocs;
1542*a30f8f8cSSatish Balay   if (flag == MAT_LOCAL) {
1543*a30f8f8cSSatish Balay     info->nz_used      = isend[0];
1544*a30f8f8cSSatish Balay     info->nz_allocated = isend[1];
1545*a30f8f8cSSatish Balay     info->nz_unneeded  = isend[2];
1546*a30f8f8cSSatish Balay     info->memory       = isend[3];
1547*a30f8f8cSSatish Balay     info->mallocs      = isend[4];
1548*a30f8f8cSSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
1549*a30f8f8cSSatish Balay     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1550*a30f8f8cSSatish Balay     info->nz_used      = irecv[0];
1551*a30f8f8cSSatish Balay     info->nz_allocated = irecv[1];
1552*a30f8f8cSSatish Balay     info->nz_unneeded  = irecv[2];
1553*a30f8f8cSSatish Balay     info->memory       = irecv[3];
1554*a30f8f8cSSatish Balay     info->mallocs      = irecv[4];
1555*a30f8f8cSSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
1556*a30f8f8cSSatish Balay     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1557*a30f8f8cSSatish Balay     info->nz_used      = irecv[0];
1558*a30f8f8cSSatish Balay     info->nz_allocated = irecv[1];
1559*a30f8f8cSSatish Balay     info->nz_unneeded  = irecv[2];
1560*a30f8f8cSSatish Balay     info->memory       = irecv[3];
1561*a30f8f8cSSatish Balay     info->mallocs      = irecv[4];
1562*a30f8f8cSSatish Balay   } else {
1563*a30f8f8cSSatish Balay     SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag);
1564*a30f8f8cSSatish Balay   }
1565*a30f8f8cSSatish Balay   info->rows_global       = (double)a->M;
1566*a30f8f8cSSatish Balay   info->columns_global    = (double)a->N;
1567*a30f8f8cSSatish Balay   info->rows_local        = (double)a->m;
1568*a30f8f8cSSatish Balay   info->columns_local     = (double)a->N;
1569*a30f8f8cSSatish Balay   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1570*a30f8f8cSSatish Balay   info->fill_ratio_needed = 0;
1571*a30f8f8cSSatish Balay   info->factor_mallocs    = 0;
1572*a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1573*a30f8f8cSSatish Balay }
1574*a30f8f8cSSatish Balay 
1575*a30f8f8cSSatish Balay #undef __FUNC__
1576*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatSetOption_MPISBAIJ"
1577*a30f8f8cSSatish Balay int MatSetOption_MPISBAIJ(Mat A,MatOption op)
1578*a30f8f8cSSatish Balay {
1579*a30f8f8cSSatish Balay   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1580*a30f8f8cSSatish Balay   int         ierr;
1581*a30f8f8cSSatish Balay 
1582*a30f8f8cSSatish Balay   PetscFunctionBegin;
1583*a30f8f8cSSatish Balay   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1584*a30f8f8cSSatish Balay       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1585*a30f8f8cSSatish Balay       op == MAT_COLUMNS_UNSORTED ||
1586*a30f8f8cSSatish Balay       op == MAT_COLUMNS_SORTED ||
1587*a30f8f8cSSatish Balay       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1588*a30f8f8cSSatish Balay       op == MAT_KEEP_ZEROED_ROWS ||
1589*a30f8f8cSSatish Balay       op == MAT_NEW_NONZERO_LOCATION_ERR) {
1590*a30f8f8cSSatish Balay         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1591*a30f8f8cSSatish Balay         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1592*a30f8f8cSSatish Balay   } else if (op == MAT_ROW_ORIENTED) {
1593*a30f8f8cSSatish Balay         a->roworiented = PETSC_TRUE;
1594*a30f8f8cSSatish Balay         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1595*a30f8f8cSSatish Balay         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1596*a30f8f8cSSatish Balay   } else if (op == MAT_ROWS_SORTED ||
1597*a30f8f8cSSatish Balay              op == MAT_ROWS_UNSORTED ||
1598*a30f8f8cSSatish Balay              op == MAT_SYMMETRIC ||
1599*a30f8f8cSSatish Balay              op == MAT_STRUCTURALLY_SYMMETRIC ||
1600*a30f8f8cSSatish Balay              op == MAT_YES_NEW_DIAGONALS ||
1601*a30f8f8cSSatish Balay              op == MAT_USE_HASH_TABLE) {
1602*a30f8f8cSSatish Balay     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1603*a30f8f8cSSatish Balay   } else if (op == MAT_COLUMN_ORIENTED) {
1604*a30f8f8cSSatish Balay     a->roworiented = PETSC_FALSE;
1605*a30f8f8cSSatish Balay     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1606*a30f8f8cSSatish Balay     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1607*a30f8f8cSSatish Balay   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1608*a30f8f8cSSatish Balay     a->donotstash = PETSC_TRUE;
1609*a30f8f8cSSatish Balay   } else if (op == MAT_NO_NEW_DIAGONALS) {
1610*a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1611*a30f8f8cSSatish Balay   } else if (op == MAT_USE_HASH_TABLE) {
1612*a30f8f8cSSatish Balay     a->ht_flag = PETSC_TRUE;
1613*a30f8f8cSSatish Balay   } else {
1614*a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1615*a30f8f8cSSatish Balay   }
1616*a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1617*a30f8f8cSSatish Balay }
1618*a30f8f8cSSatish Balay 
1619*a30f8f8cSSatish Balay #undef __FUNC__
1620*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatTranspose_MPISBAIJ("
1621*a30f8f8cSSatish Balay int MatTranspose_MPISBAIJ(Mat A,Mat *matout)
1622*a30f8f8cSSatish Balay {
1623*a30f8f8cSSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data;
1624*a30f8f8cSSatish Balay   Mat_SeqBAIJ *Aloc;
1625*a30f8f8cSSatish Balay   Mat         B;
1626*a30f8f8cSSatish Balay   int         ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col;
1627*a30f8f8cSSatish Balay   int         bs=baij->bs,mbs=baij->mbs;
1628*a30f8f8cSSatish Balay   MatScalar   *a;
1629*a30f8f8cSSatish Balay 
1630*a30f8f8cSSatish Balay   PetscFunctionBegin;
1631*a30f8f8cSSatish Balay   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1632*a30f8f8cSSatish Balay   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1633*a30f8f8cSSatish Balay 
1634*a30f8f8cSSatish Balay   /* copy over the A part */
1635*a30f8f8cSSatish Balay   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1636*a30f8f8cSSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1637*a30f8f8cSSatish Balay   rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
1638*a30f8f8cSSatish Balay 
1639*a30f8f8cSSatish Balay   for (i=0; i<mbs; i++) {
1640*a30f8f8cSSatish Balay     rvals[0] = bs*(baij->rstart + i);
1641*a30f8f8cSSatish Balay     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1642*a30f8f8cSSatish Balay     for (j=ai[i]; j<ai[i+1]; j++) {
1643*a30f8f8cSSatish Balay       col = (baij->cstart+aj[j])*bs;
1644*a30f8f8cSSatish Balay       for (k=0; k<bs; k++) {
1645*a30f8f8cSSatish Balay         ierr = MatSetValues_MPISBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1646*a30f8f8cSSatish Balay         col++; a += bs;
1647*a30f8f8cSSatish Balay       }
1648*a30f8f8cSSatish Balay     }
1649*a30f8f8cSSatish Balay   }
1650*a30f8f8cSSatish Balay   /* copy over the B part */
1651*a30f8f8cSSatish Balay   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1652*a30f8f8cSSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1653*a30f8f8cSSatish Balay   for (i=0; i<mbs; i++) {
1654*a30f8f8cSSatish Balay     rvals[0] = bs*(baij->rstart + i);
1655*a30f8f8cSSatish Balay     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1656*a30f8f8cSSatish Balay     for (j=ai[i]; j<ai[i+1]; j++) {
1657*a30f8f8cSSatish Balay       col = baij->garray[aj[j]]*bs;
1658*a30f8f8cSSatish Balay       for (k=0; k<bs; k++) {
1659*a30f8f8cSSatish Balay         ierr = MatSetValues_MPISBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1660*a30f8f8cSSatish Balay         col++; a += bs;
1661*a30f8f8cSSatish Balay       }
1662*a30f8f8cSSatish Balay     }
1663*a30f8f8cSSatish Balay   }
1664*a30f8f8cSSatish Balay   ierr = PetscFree(rvals);CHKERRQ(ierr);
1665*a30f8f8cSSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1666*a30f8f8cSSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1667*a30f8f8cSSatish Balay 
1668*a30f8f8cSSatish Balay   if (matout) {
1669*a30f8f8cSSatish Balay     *matout = B;
1670*a30f8f8cSSatish Balay   } else {
1671*a30f8f8cSSatish Balay     PetscOps *Abops;
1672*a30f8f8cSSatish Balay     MatOps   Aops;
1673*a30f8f8cSSatish Balay 
1674*a30f8f8cSSatish Balay     /* This isn't really an in-place transpose .... but free data structures from baij */
1675*a30f8f8cSSatish Balay     ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1676*a30f8f8cSSatish Balay     ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1677*a30f8f8cSSatish Balay     ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1678*a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE)
1679*a30f8f8cSSatish Balay     if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1680*a30f8f8cSSatish Balay #else
1681*a30f8f8cSSatish Balay     if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1682*a30f8f8cSSatish Balay #endif
1683*a30f8f8cSSatish Balay     if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1684*a30f8f8cSSatish Balay     if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1685*a30f8f8cSSatish Balay     if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1686*a30f8f8cSSatish Balay     ierr = PetscFree(baij);CHKERRQ(ierr);
1687*a30f8f8cSSatish Balay 
1688*a30f8f8cSSatish Balay     /*
1689*a30f8f8cSSatish Balay        This is horrible, horrible code. We need to keep the
1690*a30f8f8cSSatish Balay       A pointers for the bops and ops but copy everything
1691*a30f8f8cSSatish Balay       else from C.
1692*a30f8f8cSSatish Balay     */
1693*a30f8f8cSSatish Balay     Abops   = A->bops;
1694*a30f8f8cSSatish Balay     Aops    = A->ops;
1695*a30f8f8cSSatish Balay     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
1696*a30f8f8cSSatish Balay     A->bops = Abops;
1697*a30f8f8cSSatish Balay     A->ops  = Aops;
1698*a30f8f8cSSatish Balay 
1699*a30f8f8cSSatish Balay     PetscHeaderDestroy(B);
1700*a30f8f8cSSatish Balay   }
1701*a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1702*a30f8f8cSSatish Balay }
1703*a30f8f8cSSatish Balay 
1704*a30f8f8cSSatish Balay #undef __FUNC__
1705*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatDiagonalScale_MPISBAIJ"
1706*a30f8f8cSSatish Balay int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1707*a30f8f8cSSatish Balay {
1708*a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1709*a30f8f8cSSatish Balay   Mat         a = baij->A,b = baij->B;
1710*a30f8f8cSSatish Balay   int         ierr,s1,s2,s3;
1711*a30f8f8cSSatish Balay 
1712*a30f8f8cSSatish Balay   PetscFunctionBegin;
1713*a30f8f8cSSatish Balay   if (ll != rr) {
1714*a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"For symmetric format, left and right scaling vectors must be same\n");
1715*a30f8f8cSSatish Balay   }
1716*a30f8f8cSSatish Balay   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1717*a30f8f8cSSatish Balay   if (rr) {
1718*a30f8f8cSSatish Balay     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1719*a30f8f8cSSatish Balay     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
1720*a30f8f8cSSatish Balay     /* Overlap communication with computation. */
1721*a30f8f8cSSatish Balay     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1722*a30f8f8cSSatish Balay     /*} if (ll) { */
1723*a30f8f8cSSatish Balay     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1724*a30f8f8cSSatish Balay     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1725*a30f8f8cSSatish Balay     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1726*a30f8f8cSSatish Balay     /* } */
1727*a30f8f8cSSatish Balay   /* scale  the diagonal block */
1728*a30f8f8cSSatish Balay   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1729*a30f8f8cSSatish Balay 
1730*a30f8f8cSSatish Balay   /* if (rr) { */
1731*a30f8f8cSSatish Balay     /* Do a scatter end and then right scale the off-diagonal block */
1732*a30f8f8cSSatish Balay     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1733*a30f8f8cSSatish Balay     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1734*a30f8f8cSSatish Balay   }
1735*a30f8f8cSSatish Balay 
1736*a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1737*a30f8f8cSSatish Balay }
1738*a30f8f8cSSatish Balay 
1739*a30f8f8cSSatish Balay #undef __FUNC__
1740*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatZeroRows_MPISBAIJ"
1741*a30f8f8cSSatish Balay int MatZeroRows_MPISBAIJ(Mat A,IS is,Scalar *diag)
1742*a30f8f8cSSatish Balay {
1743*a30f8f8cSSatish Balay   Mat_MPISBAIJ    *l = (Mat_MPISBAIJ*)A->data;
1744*a30f8f8cSSatish Balay   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
1745*a30f8f8cSSatish Balay   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
1746*a30f8f8cSSatish Balay   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1747*a30f8f8cSSatish Balay   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1748*a30f8f8cSSatish Balay   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1749*a30f8f8cSSatish Balay   MPI_Comm       comm = A->comm;
1750*a30f8f8cSSatish Balay   MPI_Request    *send_waits,*recv_waits;
1751*a30f8f8cSSatish Balay   MPI_Status     recv_status,*send_status;
1752*a30f8f8cSSatish Balay   IS             istmp;
1753*a30f8f8cSSatish Balay 
1754*a30f8f8cSSatish Balay   PetscFunctionBegin;
1755*a30f8f8cSSatish Balay   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
1756*a30f8f8cSSatish Balay   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1757*a30f8f8cSSatish Balay 
1758*a30f8f8cSSatish Balay   /*  first count number of contributors to each processor */
1759*a30f8f8cSSatish Balay   nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs);
1760*a30f8f8cSSatish Balay   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
1761*a30f8f8cSSatish Balay   procs  = nprocs + size;
1762*a30f8f8cSSatish Balay   owner  = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
1763*a30f8f8cSSatish Balay   for (i=0; i<N; i++) {
1764*a30f8f8cSSatish Balay     idx   = rows[i];
1765*a30f8f8cSSatish Balay     found = 0;
1766*a30f8f8cSSatish Balay     for (j=0; j<size; j++) {
1767*a30f8f8cSSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1768*a30f8f8cSSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
1769*a30f8f8cSSatish Balay       }
1770*a30f8f8cSSatish Balay     }
1771*a30f8f8cSSatish Balay     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
1772*a30f8f8cSSatish Balay   }
1773*a30f8f8cSSatish Balay   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
1774*a30f8f8cSSatish Balay 
1775*a30f8f8cSSatish Balay   /* inform other processors of number of messages and max length*/
1776*a30f8f8cSSatish Balay   work   = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work);
1777*a30f8f8cSSatish Balay   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
1778*a30f8f8cSSatish Balay   nmax   = work[rank];
1779*a30f8f8cSSatish Balay   nrecvs = work[size+rank];
1780*a30f8f8cSSatish Balay   ierr = PetscFree(work);CHKERRQ(ierr);
1781*a30f8f8cSSatish Balay 
1782*a30f8f8cSSatish Balay   /* post receives:   */
1783*a30f8f8cSSatish Balay   rvalues = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
1784*a30f8f8cSSatish Balay   recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
1785*a30f8f8cSSatish Balay   for (i=0; i<nrecvs; i++) {
1786*a30f8f8cSSatish Balay     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1787*a30f8f8cSSatish Balay   }
1788*a30f8f8cSSatish Balay 
1789*a30f8f8cSSatish Balay   /* do sends:
1790*a30f8f8cSSatish Balay      1) starts[i] gives the starting index in svalues for stuff going to
1791*a30f8f8cSSatish Balay      the ith processor
1792*a30f8f8cSSatish Balay   */
1793*a30f8f8cSSatish Balay   svalues    = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues);
1794*a30f8f8cSSatish Balay   send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
1795*a30f8f8cSSatish Balay   starts     = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts);
1796*a30f8f8cSSatish Balay   starts[0]  = 0;
1797*a30f8f8cSSatish Balay   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1798*a30f8f8cSSatish Balay   for (i=0; i<N; i++) {
1799*a30f8f8cSSatish Balay     svalues[starts[owner[i]]++] = rows[i];
1800*a30f8f8cSSatish Balay   }
1801*a30f8f8cSSatish Balay   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
1802*a30f8f8cSSatish Balay 
1803*a30f8f8cSSatish Balay   starts[0] = 0;
1804*a30f8f8cSSatish Balay   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1805*a30f8f8cSSatish Balay   count = 0;
1806*a30f8f8cSSatish Balay   for (i=0; i<size; i++) {
1807*a30f8f8cSSatish Balay     if (procs[i]) {
1808*a30f8f8cSSatish Balay       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1809*a30f8f8cSSatish Balay     }
1810*a30f8f8cSSatish Balay   }
1811*a30f8f8cSSatish Balay   ierr = PetscFree(starts);CHKERRQ(ierr);
1812*a30f8f8cSSatish Balay 
1813*a30f8f8cSSatish Balay   base = owners[rank]*bs;
1814*a30f8f8cSSatish Balay 
1815*a30f8f8cSSatish Balay   /*  wait on receives */
1816*a30f8f8cSSatish Balay   lens   = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens);
1817*a30f8f8cSSatish Balay   source = lens + nrecvs;
1818*a30f8f8cSSatish Balay   count  = nrecvs; slen = 0;
1819*a30f8f8cSSatish Balay   while (count) {
1820*a30f8f8cSSatish Balay     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1821*a30f8f8cSSatish Balay     /* unpack receives into our local space */
1822*a30f8f8cSSatish Balay     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1823*a30f8f8cSSatish Balay     source[imdex]  = recv_status.MPI_SOURCE;
1824*a30f8f8cSSatish Balay     lens[imdex]    = n;
1825*a30f8f8cSSatish Balay     slen          += n;
1826*a30f8f8cSSatish Balay     count--;
1827*a30f8f8cSSatish Balay   }
1828*a30f8f8cSSatish Balay   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1829*a30f8f8cSSatish Balay 
1830*a30f8f8cSSatish Balay   /* move the data into the send scatter */
1831*a30f8f8cSSatish Balay   lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows);
1832*a30f8f8cSSatish Balay   count = 0;
1833*a30f8f8cSSatish Balay   for (i=0; i<nrecvs; i++) {
1834*a30f8f8cSSatish Balay     values = rvalues + i*nmax;
1835*a30f8f8cSSatish Balay     for (j=0; j<lens[i]; j++) {
1836*a30f8f8cSSatish Balay       lrows[count++] = values[j] - base;
1837*a30f8f8cSSatish Balay     }
1838*a30f8f8cSSatish Balay   }
1839*a30f8f8cSSatish Balay   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1840*a30f8f8cSSatish Balay   ierr = PetscFree(lens);CHKERRQ(ierr);
1841*a30f8f8cSSatish Balay   ierr = PetscFree(owner);CHKERRQ(ierr);
1842*a30f8f8cSSatish Balay   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1843*a30f8f8cSSatish Balay 
1844*a30f8f8cSSatish Balay   /* actually zap the local rows */
1845*a30f8f8cSSatish Balay   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1846*a30f8f8cSSatish Balay   PLogObjectParent(A,istmp);
1847*a30f8f8cSSatish Balay 
1848*a30f8f8cSSatish Balay   /*
1849*a30f8f8cSSatish Balay         Zero the required rows. If the "diagonal block" of the matrix
1850*a30f8f8cSSatish Balay      is square and the user wishes to set the diagonal we use seperate
1851*a30f8f8cSSatish Balay      code so that MatSetValues() is not called for each diagonal allocating
1852*a30f8f8cSSatish Balay      new memory, thus calling lots of mallocs and slowing things down.
1853*a30f8f8cSSatish Balay 
1854*a30f8f8cSSatish Balay        Contributed by: Mathew Knepley
1855*a30f8f8cSSatish Balay   */
1856*a30f8f8cSSatish Balay   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1857*a30f8f8cSSatish Balay   ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr);
1858*a30f8f8cSSatish Balay   if (diag && (l->A->M == l->A->N)) {
1859*a30f8f8cSSatish Balay     ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,diag);CHKERRQ(ierr);
1860*a30f8f8cSSatish Balay   } else if (diag) {
1861*a30f8f8cSSatish Balay     ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1862*a30f8f8cSSatish Balay     if (((Mat_SeqSBAIJ*)l->A->data)->nonew) {
1863*a30f8f8cSSatish Balay       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1864*a30f8f8cSSatish Balay MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1865*a30f8f8cSSatish Balay     }
1866*a30f8f8cSSatish Balay     for (i=0; i<slen; i++) {
1867*a30f8f8cSSatish Balay       row  = lrows[i] + rstart_bs;
1868*a30f8f8cSSatish Balay       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1869*a30f8f8cSSatish Balay     }
1870*a30f8f8cSSatish Balay     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1871*a30f8f8cSSatish Balay     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1872*a30f8f8cSSatish Balay   } else {
1873*a30f8f8cSSatish Balay     ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1874*a30f8f8cSSatish Balay   }
1875*a30f8f8cSSatish Balay 
1876*a30f8f8cSSatish Balay   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1877*a30f8f8cSSatish Balay   ierr = PetscFree(lrows);CHKERRQ(ierr);
1878*a30f8f8cSSatish Balay 
1879*a30f8f8cSSatish Balay   /* wait on sends */
1880*a30f8f8cSSatish Balay   if (nsends) {
1881*a30f8f8cSSatish Balay     send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1882*a30f8f8cSSatish Balay     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1883*a30f8f8cSSatish Balay     ierr        = PetscFree(send_status);CHKERRQ(ierr);
1884*a30f8f8cSSatish Balay   }
1885*a30f8f8cSSatish Balay   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1886*a30f8f8cSSatish Balay   ierr = PetscFree(svalues);CHKERRQ(ierr);
1887*a30f8f8cSSatish Balay 
1888*a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1889*a30f8f8cSSatish Balay }
1890*a30f8f8cSSatish Balay 
1891*a30f8f8cSSatish Balay #undef __FUNC__
1892*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatPrintHelp_MPISBAIJ"
1893*a30f8f8cSSatish Balay int MatPrintHelp_MPISBAIJ(Mat A)
1894*a30f8f8cSSatish Balay {
1895*a30f8f8cSSatish Balay   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1896*a30f8f8cSSatish Balay   MPI_Comm    comm = A->comm;
1897*a30f8f8cSSatish Balay   static int  called = 0;
1898*a30f8f8cSSatish Balay   int         ierr;
1899*a30f8f8cSSatish Balay 
1900*a30f8f8cSSatish Balay   PetscFunctionBegin;
1901*a30f8f8cSSatish Balay   if (!a->rank) {
1902*a30f8f8cSSatish Balay     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1903*a30f8f8cSSatish Balay   }
1904*a30f8f8cSSatish Balay   if (called) {PetscFunctionReturn(0);} else called = 1;
1905*a30f8f8cSSatish Balay   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1906*a30f8f8cSSatish Balay   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1907*a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1908*a30f8f8cSSatish Balay }
1909*a30f8f8cSSatish Balay 
1910*a30f8f8cSSatish Balay #undef __FUNC__
1911*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatSetUnfactored_MPISBAIJ"
1912*a30f8f8cSSatish Balay int MatSetUnfactored_MPISBAIJ(Mat A)
1913*a30f8f8cSSatish Balay {
1914*a30f8f8cSSatish Balay   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1915*a30f8f8cSSatish Balay   int         ierr;
1916*a30f8f8cSSatish Balay 
1917*a30f8f8cSSatish Balay   PetscFunctionBegin;
1918*a30f8f8cSSatish Balay   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1919*a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1920*a30f8f8cSSatish Balay }
1921*a30f8f8cSSatish Balay 
1922*a30f8f8cSSatish Balay static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1923*a30f8f8cSSatish Balay 
1924*a30f8f8cSSatish Balay #undef __FUNC__
1925*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatEqual_MPISBAIJ"
1926*a30f8f8cSSatish Balay int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1927*a30f8f8cSSatish Balay {
1928*a30f8f8cSSatish Balay   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1929*a30f8f8cSSatish Balay   Mat         a,b,c,d;
1930*a30f8f8cSSatish Balay   PetscTruth  flg;
1931*a30f8f8cSSatish Balay   int         ierr;
1932*a30f8f8cSSatish Balay 
1933*a30f8f8cSSatish Balay   PetscFunctionBegin;
1934*a30f8f8cSSatish Balay   if (B->type != MATMPISBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1935*a30f8f8cSSatish Balay   a = matA->A; b = matA->B;
1936*a30f8f8cSSatish Balay   c = matB->A; d = matB->B;
1937*a30f8f8cSSatish Balay 
1938*a30f8f8cSSatish Balay   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1939*a30f8f8cSSatish Balay   if (flg == PETSC_TRUE) {
1940*a30f8f8cSSatish Balay     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1941*a30f8f8cSSatish Balay   }
1942*a30f8f8cSSatish Balay   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1943*a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1944*a30f8f8cSSatish Balay }
1945*a30f8f8cSSatish Balay 
1946*a30f8f8cSSatish Balay /* -------------------------------------------------------------------*/
1947*a30f8f8cSSatish Balay static struct _MatOps MatOps_Values = {
1948*a30f8f8cSSatish Balay   MatSetValues_MPISBAIJ,
1949*a30f8f8cSSatish Balay   MatGetRow_MPISBAIJ,
1950*a30f8f8cSSatish Balay   MatRestoreRow_MPISBAIJ,
1951*a30f8f8cSSatish Balay   MatMult_MPISBAIJ,
1952*a30f8f8cSSatish Balay   MatMultAdd_MPISBAIJ,
1953*a30f8f8cSSatish Balay   MatMultTranspose_MPISBAIJ,
1954*a30f8f8cSSatish Balay   MatMultTransposeAdd_MPISBAIJ,
1955*a30f8f8cSSatish Balay   0,
1956*a30f8f8cSSatish Balay   0,
1957*a30f8f8cSSatish Balay   0,
1958*a30f8f8cSSatish Balay   0,
1959*a30f8f8cSSatish Balay   0,
1960*a30f8f8cSSatish Balay   0,
1961*a30f8f8cSSatish Balay   0,
1962*a30f8f8cSSatish Balay   MatTranspose_MPISBAIJ,
1963*a30f8f8cSSatish Balay   MatGetInfo_MPISBAIJ,
1964*a30f8f8cSSatish Balay   MatEqual_MPISBAIJ,
1965*a30f8f8cSSatish Balay   MatGetDiagonal_MPISBAIJ,
1966*a30f8f8cSSatish Balay   MatDiagonalScale_MPISBAIJ,
1967*a30f8f8cSSatish Balay   MatNorm_MPISBAIJ,
1968*a30f8f8cSSatish Balay   MatAssemblyBegin_MPISBAIJ,
1969*a30f8f8cSSatish Balay   MatAssemblyEnd_MPISBAIJ,
1970*a30f8f8cSSatish Balay   0,
1971*a30f8f8cSSatish Balay   MatSetOption_MPISBAIJ,
1972*a30f8f8cSSatish Balay   MatZeroEntries_MPISBAIJ,
1973*a30f8f8cSSatish Balay   MatZeroRows_MPISBAIJ,
1974*a30f8f8cSSatish Balay   0,
1975*a30f8f8cSSatish Balay   0,
1976*a30f8f8cSSatish Balay   0,
1977*a30f8f8cSSatish Balay   0,
1978*a30f8f8cSSatish Balay   MatGetSize_MPISBAIJ,
1979*a30f8f8cSSatish Balay   MatGetLocalSize_MPISBAIJ,
1980*a30f8f8cSSatish Balay   MatGetOwnershipRange_MPISBAIJ,
1981*a30f8f8cSSatish Balay   0,
1982*a30f8f8cSSatish Balay   0,
1983*a30f8f8cSSatish Balay   0,
1984*a30f8f8cSSatish Balay   0,
1985*a30f8f8cSSatish Balay   MatDuplicate_MPISBAIJ,
1986*a30f8f8cSSatish Balay   0,
1987*a30f8f8cSSatish Balay   0,
1988*a30f8f8cSSatish Balay   0,
1989*a30f8f8cSSatish Balay   0,
1990*a30f8f8cSSatish Balay   0,
1991*a30f8f8cSSatish Balay   MatGetSubMatrices_MPISBAIJ,
1992*a30f8f8cSSatish Balay   MatIncreaseOverlap_MPISBAIJ,
1993*a30f8f8cSSatish Balay   MatGetValues_MPISBAIJ,
1994*a30f8f8cSSatish Balay   0,
1995*a30f8f8cSSatish Balay   MatPrintHelp_MPISBAIJ,
1996*a30f8f8cSSatish Balay   MatScale_MPISBAIJ,
1997*a30f8f8cSSatish Balay   0,
1998*a30f8f8cSSatish Balay   0,
1999*a30f8f8cSSatish Balay   0,
2000*a30f8f8cSSatish Balay   MatGetBlockSize_MPISBAIJ,
2001*a30f8f8cSSatish Balay   0,
2002*a30f8f8cSSatish Balay   0,
2003*a30f8f8cSSatish Balay   0,
2004*a30f8f8cSSatish Balay   0,
2005*a30f8f8cSSatish Balay   0,
2006*a30f8f8cSSatish Balay   0,
2007*a30f8f8cSSatish Balay   MatSetUnfactored_MPISBAIJ,
2008*a30f8f8cSSatish Balay   0,
2009*a30f8f8cSSatish Balay   MatSetValuesBlocked_MPISBAIJ,
2010*a30f8f8cSSatish Balay   0,
2011*a30f8f8cSSatish Balay   0,
2012*a30f8f8cSSatish Balay   0,
2013*a30f8f8cSSatish Balay   MatGetMaps_Petsc};
2014*a30f8f8cSSatish Balay 
2015*a30f8f8cSSatish Balay 
2016*a30f8f8cSSatish Balay EXTERN_C_BEGIN
2017*a30f8f8cSSatish Balay #undef __FUNC__
2018*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonalBlock_MPISBAIJ"
2019*a30f8f8cSSatish Balay int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
2020*a30f8f8cSSatish Balay {
2021*a30f8f8cSSatish Balay   PetscFunctionBegin;
2022*a30f8f8cSSatish Balay   *a      = ((Mat_MPISBAIJ *)A->data)->A;
2023*a30f8f8cSSatish Balay   *iscopy = PETSC_FALSE;
2024*a30f8f8cSSatish Balay   PetscFunctionReturn(0);
2025*a30f8f8cSSatish Balay }
2026*a30f8f8cSSatish Balay EXTERN_C_END
2027*a30f8f8cSSatish Balay 
2028*a30f8f8cSSatish Balay #undef __FUNC__
2029*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatCreateMPISBAIJ"
2030*a30f8f8cSSatish Balay /*@C
2031*a30f8f8cSSatish Balay    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2032*a30f8f8cSSatish Balay    (block compressed row).  For good matrix assembly performance
2033*a30f8f8cSSatish Balay    the user should preallocate the matrix storage by setting the parameters
2034*a30f8f8cSSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2035*a30f8f8cSSatish Balay    performance can be increased by more than a factor of 50.
2036*a30f8f8cSSatish Balay 
2037*a30f8f8cSSatish Balay    Collective on MPI_Comm
2038*a30f8f8cSSatish Balay 
2039*a30f8f8cSSatish Balay    Input Parameters:
2040*a30f8f8cSSatish Balay +  comm - MPI communicator
2041*a30f8f8cSSatish Balay .  bs   - size of blockk
2042*a30f8f8cSSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2043*a30f8f8cSSatish Balay            This value should be the same as the local size used in creating the
2044*a30f8f8cSSatish Balay            y vector for the matrix-vector product y = Ax.
2045*a30f8f8cSSatish Balay .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2046*a30f8f8cSSatish Balay            This value should be the same as the local size used in creating the
2047*a30f8f8cSSatish Balay            x vector for the matrix-vector product y = Ax.
2048*a30f8f8cSSatish Balay .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2049*a30f8f8cSSatish Balay .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2050*a30f8f8cSSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2051*a30f8f8cSSatish Balay            submatrix  (same for all local rows)
2052*a30f8f8cSSatish Balay .  d_nnz - array containing the number of block nonzeros in the various block rows
2053*a30f8f8cSSatish Balay            of the in diagonal portion of the local (possibly different for each block
2054*a30f8f8cSSatish Balay            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2055*a30f8f8cSSatish Balay .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2056*a30f8f8cSSatish Balay            submatrix (same for all local rows).
2057*a30f8f8cSSatish Balay -  o_nnz - array containing the number of nonzeros in the various block rows of the
2058*a30f8f8cSSatish Balay            off-diagonal portion of the local submatrix (possibly different for
2059*a30f8f8cSSatish Balay            each block row) or PETSC_NULL.
2060*a30f8f8cSSatish Balay 
2061*a30f8f8cSSatish Balay    Output Parameter:
2062*a30f8f8cSSatish Balay .  A - the matrix
2063*a30f8f8cSSatish Balay 
2064*a30f8f8cSSatish Balay    Options Database Keys:
2065*a30f8f8cSSatish Balay .   -mat_no_unroll - uses code that does not unroll the loops in the
2066*a30f8f8cSSatish Balay                      block calculations (much slower)
2067*a30f8f8cSSatish Balay .   -mat_block_size - size of the blocks to use
2068*a30f8f8cSSatish Balay .   -mat_mpi - use the parallel matrix data structures even on one processor
2069*a30f8f8cSSatish Balay                (defaults to using SeqBAIJ format on one processor)
2070*a30f8f8cSSatish Balay 
2071*a30f8f8cSSatish Balay    Notes:
2072*a30f8f8cSSatish Balay    The user MUST specify either the local or global matrix dimensions
2073*a30f8f8cSSatish Balay    (possibly both).
2074*a30f8f8cSSatish Balay 
2075*a30f8f8cSSatish Balay    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2076*a30f8f8cSSatish Balay    than it must be used on all processors that share the object for that argument.
2077*a30f8f8cSSatish Balay 
2078*a30f8f8cSSatish Balay    Storage Information:
2079*a30f8f8cSSatish Balay    For a square global matrix we define each processor's diagonal portion
2080*a30f8f8cSSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
2081*a30f8f8cSSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
2082*a30f8f8cSSatish Balay    local matrix (a rectangular submatrix).
2083*a30f8f8cSSatish Balay 
2084*a30f8f8cSSatish Balay    The user can specify preallocated storage for the diagonal part of
2085*a30f8f8cSSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
2086*a30f8f8cSSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2087*a30f8f8cSSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
2088*a30f8f8cSSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2089*a30f8f8cSSatish Balay 
2090*a30f8f8cSSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2091*a30f8f8cSSatish Balay    the figure below we depict these three local rows and all columns (0-11).
2092*a30f8f8cSSatish Balay 
2093*a30f8f8cSSatish Balay .vb
2094*a30f8f8cSSatish Balay            0 1 2 3 4 5 6 7 8 9 10 11
2095*a30f8f8cSSatish Balay           -------------------
2096*a30f8f8cSSatish Balay    row 3  |  o o o d d d o o o o o o
2097*a30f8f8cSSatish Balay    row 4  |  o o o d d d o o o o o o
2098*a30f8f8cSSatish Balay    row 5  |  o o o d d d o o o o o o
2099*a30f8f8cSSatish Balay           -------------------
2100*a30f8f8cSSatish Balay .ve
2101*a30f8f8cSSatish Balay 
2102*a30f8f8cSSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
2103*a30f8f8cSSatish Balay    submatrix, and any entries in the o locations are stored in the
2104*a30f8f8cSSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2105*a30f8f8cSSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
2106*a30f8f8cSSatish Balay 
2107*a30f8f8cSSatish Balay    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2108*a30f8f8cSSatish Balay    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2109*a30f8f8cSSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
2110*a30f8f8cSSatish Balay    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2111*a30f8f8cSSatish Balay    or you will get TERRIBLE performance; see the users' manual chapter on
2112*a30f8f8cSSatish Balay    matrices.
2113*a30f8f8cSSatish Balay 
2114*a30f8f8cSSatish Balay    Level: intermediate
2115*a30f8f8cSSatish Balay 
2116*a30f8f8cSSatish Balay .keywords: matrix, block, aij, compressed row, sparse, parallel
2117*a30f8f8cSSatish Balay 
2118*a30f8f8cSSatish Balay .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPISBAIJ()
2119*a30f8f8cSSatish Balay @*/
2120*a30f8f8cSSatish Balay 
2121*a30f8f8cSSatish Balay int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2122*a30f8f8cSSatish Balay {
2123*a30f8f8cSSatish Balay   Mat          B;
2124*a30f8f8cSSatish Balay   Mat_MPISBAIJ  *b;
2125*a30f8f8cSSatish Balay   int          ierr,i,sum[1],work[1],mbs,nbs,Mbs=PETSC_DECIDE,size;
2126*a30f8f8cSSatish Balay   PetscTruth   flag1,flag2,flg;
2127*a30f8f8cSSatish Balay 
2128*a30f8f8cSSatish Balay   PetscFunctionBegin;
2129*a30f8f8cSSatish Balay   if (M != N || m != n){ /* N and n are not used after this */
2130*a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"For symmetric format, set M=N and m=n");
2131*a30f8f8cSSatish Balay   }
2132*a30f8f8cSSatish Balay   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2133*a30f8f8cSSatish Balay 
2134*a30f8f8cSSatish Balay   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive");
2135*a30f8f8cSSatish Balay   if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -2: value %d",d_nz);
2136*a30f8f8cSSatish Balay   if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -2: value %d",o_nz);
2137*a30f8f8cSSatish Balay   if (d_nnz) {
2138*a30f8f8cSSatish Balay     for (i=0; i<m/bs; i++) {
2139*a30f8f8cSSatish Balay       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
2140*a30f8f8cSSatish Balay     }
2141*a30f8f8cSSatish Balay   }
2142*a30f8f8cSSatish Balay   if (o_nnz) {
2143*a30f8f8cSSatish Balay     for (i=0; i<m/bs; i++) {
2144*a30f8f8cSSatish Balay       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
2145*a30f8f8cSSatish Balay     }
2146*a30f8f8cSSatish Balay   }
2147*a30f8f8cSSatish Balay 
2148*a30f8f8cSSatish Balay   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2149*a30f8f8cSSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-mat_mpisbaij",&flag1);CHKERRQ(ierr);
2150*a30f8f8cSSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr);
2151*a30f8f8cSSatish Balay   if (!flag1 && !flag2 && size == 1) {
2152*a30f8f8cSSatish Balay     if (M == PETSC_DECIDE) M = m;
2153*a30f8f8cSSatish Balay     ierr = MatCreateSeqSBAIJ(comm,bs,M,M,d_nz,d_nnz,A);CHKERRQ(ierr);
2154*a30f8f8cSSatish Balay     PetscFunctionReturn(0);
2155*a30f8f8cSSatish Balay   }
2156*a30f8f8cSSatish Balay 
2157*a30f8f8cSSatish Balay   *A = 0;
2158*a30f8f8cSSatish Balay   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPISBAIJ,"Mat",comm,MatDestroy,MatView);
2159*a30f8f8cSSatish Balay   PLogObjectCreate(B);
2160*a30f8f8cSSatish Balay   B->data = (void*)(b = PetscNew(Mat_MPISBAIJ));CHKPTRQ(b);
2161*a30f8f8cSSatish Balay   ierr    = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
2162*a30f8f8cSSatish Balay   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2163*a30f8f8cSSatish Balay 
2164*a30f8f8cSSatish Balay   B->ops->destroy    = MatDestroy_MPISBAIJ;
2165*a30f8f8cSSatish Balay   B->ops->view       = MatView_MPISBAIJ;
2166*a30f8f8cSSatish Balay   B->mapping    = 0;
2167*a30f8f8cSSatish Balay   B->factor     = 0;
2168*a30f8f8cSSatish Balay   B->assembled  = PETSC_FALSE;
2169*a30f8f8cSSatish Balay 
2170*a30f8f8cSSatish Balay   B->insertmode = NOT_SET_VALUES;
2171*a30f8f8cSSatish Balay   ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr);
2172*a30f8f8cSSatish Balay   ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr);
2173*a30f8f8cSSatish Balay 
2174*a30f8f8cSSatish Balay   if (m == PETSC_DECIDE && (d_nnz || o_nnz)) {
2175*a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
2176*a30f8f8cSSatish Balay   }
2177*a30f8f8cSSatish Balay   if (M == PETSC_DECIDE && m == PETSC_DECIDE) {
2178*a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified");
2179*a30f8f8cSSatish Balay   }
2180*a30f8f8cSSatish Balay   if (M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
2181*a30f8f8cSSatish Balay 
2182*a30f8f8cSSatish Balay   if (M == PETSC_DECIDE) {
2183*a30f8f8cSSatish Balay     work[0] = m; mbs = m/bs;
2184*a30f8f8cSSatish Balay     ierr = MPI_Allreduce(work,sum,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2185*a30f8f8cSSatish Balay     M = sum[0]; Mbs = M/bs;
2186*a30f8f8cSSatish Balay   } else { /* M is specified */
2187*a30f8f8cSSatish Balay     Mbs = M/bs;
2188*a30f8f8cSSatish Balay     if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize");
2189*a30f8f8cSSatish Balay     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
2190*a30f8f8cSSatish Balay     m   = mbs*bs;
2191*a30f8f8cSSatish Balay   }
2192*a30f8f8cSSatish Balay 
2193*a30f8f8cSSatish Balay   if (mbs*bs != m) {
2194*a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows/cols must be divisible by blocksize");
2195*a30f8f8cSSatish Balay   }
2196*a30f8f8cSSatish Balay 
2197*a30f8f8cSSatish Balay   b->m = m; B->m = m;
2198*a30f8f8cSSatish Balay   b->n = m; B->n = m;
2199*a30f8f8cSSatish Balay   b->N = M; B->N = M;
2200*a30f8f8cSSatish Balay   b->M = M;
2201*a30f8f8cSSatish Balay   B->M = M;
2202*a30f8f8cSSatish Balay   b->bs  = bs;
2203*a30f8f8cSSatish Balay   b->bs2 = bs*bs;
2204*a30f8f8cSSatish Balay   b->mbs = mbs;
2205*a30f8f8cSSatish Balay   b->nbs = mbs;
2206*a30f8f8cSSatish Balay   b->Mbs = Mbs;
2207*a30f8f8cSSatish Balay   b->Nbs = Mbs;
2208*a30f8f8cSSatish Balay 
2209*a30f8f8cSSatish Balay   /* the information in the maps duplicates the information computed below, eventually
2210*a30f8f8cSSatish Balay      we should remove the duplicate information that is not contained in the maps */
2211*a30f8f8cSSatish Balay   ierr = MapCreateMPI(B->comm,m,M,&B->rmap);CHKERRQ(ierr);
2212*a30f8f8cSSatish Balay   ierr = MapCreateMPI(B->comm,m,M,&B->cmap);CHKERRQ(ierr);
2213*a30f8f8cSSatish Balay 
2214*a30f8f8cSSatish Balay   /* build local table of row and column ownerships */
2215*a30f8f8cSSatish Balay   b->rowners = (int*)PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners);
2216*a30f8f8cSSatish Balay   PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
2217*a30f8f8cSSatish Balay   b->cowners    = b->rowners + b->size + 2;
2218*a30f8f8cSSatish Balay   b->rowners_bs = b->cowners + b->size + 2;
2219*a30f8f8cSSatish Balay   ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2220*a30f8f8cSSatish Balay   b->rowners[0]    = 0;
2221*a30f8f8cSSatish Balay   for (i=2; i<=b->size; i++) {
2222*a30f8f8cSSatish Balay     b->rowners[i] += b->rowners[i-1];
2223*a30f8f8cSSatish Balay   }
2224*a30f8f8cSSatish Balay   for (i=0; i<=b->size; i++) {
2225*a30f8f8cSSatish Balay     b->rowners_bs[i] = b->rowners[i]*bs;
2226*a30f8f8cSSatish Balay   }
2227*a30f8f8cSSatish Balay   b->rstart    = b->rowners[b->rank];
2228*a30f8f8cSSatish Balay   b->rend      = b->rowners[b->rank+1];
2229*a30f8f8cSSatish Balay   b->rstart_bs = b->rstart * bs;
2230*a30f8f8cSSatish Balay   b->rend_bs   = b->rend * bs;
2231*a30f8f8cSSatish Balay 
2232*a30f8f8cSSatish Balay   b->cstart    = b->rstart;
2233*a30f8f8cSSatish Balay   b->cend      = b->rend;
2234*a30f8f8cSSatish Balay   b->cstart_bs = b->cstart * bs;
2235*a30f8f8cSSatish Balay   b->cend_bs   = b->cend * bs;
2236*a30f8f8cSSatish Balay 
2237*a30f8f8cSSatish Balay 
2238*a30f8f8cSSatish Balay   if (d_nz == PETSC_DEFAULT) d_nz = 5;
2239*a30f8f8cSSatish Balay   ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,m,m,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2240*a30f8f8cSSatish Balay   PLogObjectParent(B,b->A);
2241*a30f8f8cSSatish Balay   if (o_nz == PETSC_DEFAULT) o_nz = 0;
2242*a30f8f8cSSatish Balay   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,M,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2243*a30f8f8cSSatish Balay   PLogObjectParent(B,b->B);
2244*a30f8f8cSSatish Balay 
2245*a30f8f8cSSatish Balay   /* build cache for off array entries formed */
2246*a30f8f8cSSatish Balay   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
2247*a30f8f8cSSatish Balay   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
2248*a30f8f8cSSatish Balay   b->donotstash  = PETSC_FALSE;
2249*a30f8f8cSSatish Balay   b->colmap      = PETSC_NULL;
2250*a30f8f8cSSatish Balay   b->garray      = PETSC_NULL;
2251*a30f8f8cSSatish Balay   b->roworiented = PETSC_TRUE;
2252*a30f8f8cSSatish Balay 
2253*a30f8f8cSSatish Balay #if defined(PEYSC_USE_MAT_SINGLE)
2254*a30f8f8cSSatish Balay   /* stuff for MatSetValues_XXX in single precision */
2255*a30f8f8cSSatish Balay   b->lensetvalues     = 0;
2256*a30f8f8cSSatish Balay   b->setvaluescopy    = PETSC_NULL;
2257*a30f8f8cSSatish Balay #endif
2258*a30f8f8cSSatish Balay 
2259*a30f8f8cSSatish Balay   /* stuff used in block assembly */
2260*a30f8f8cSSatish Balay   b->barray       = 0;
2261*a30f8f8cSSatish Balay 
2262*a30f8f8cSSatish Balay   /* stuff used for matrix vector multiply */
2263*a30f8f8cSSatish Balay   b->lvec         = 0;
2264*a30f8f8cSSatish Balay   b->Mvctx        = 0;
2265*a30f8f8cSSatish Balay 
2266*a30f8f8cSSatish Balay   /* stuff for MatGetRow() */
2267*a30f8f8cSSatish Balay   b->rowindices   = 0;
2268*a30f8f8cSSatish Balay   b->rowvalues    = 0;
2269*a30f8f8cSSatish Balay   b->getrowactive = PETSC_FALSE;
2270*a30f8f8cSSatish Balay 
2271*a30f8f8cSSatish Balay   /* hash table stuff */
2272*a30f8f8cSSatish Balay   b->ht           = 0;
2273*a30f8f8cSSatish Balay   b->hd           = 0;
2274*a30f8f8cSSatish Balay   b->ht_size      = 0;
2275*a30f8f8cSSatish Balay   b->ht_flag      = PETSC_FALSE;
2276*a30f8f8cSSatish Balay   b->ht_fact      = 0;
2277*a30f8f8cSSatish Balay   b->ht_total_ct  = 0;
2278*a30f8f8cSSatish Balay   b->ht_insert_ct = 0;
2279*a30f8f8cSSatish Balay 
2280*a30f8f8cSSatish Balay   *A = B;
2281*a30f8f8cSSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2282*a30f8f8cSSatish Balay   if (flg) {
2283*a30f8f8cSSatish Balay     double fact = 1.39;
2284*a30f8f8cSSatish Balay     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2285*a30f8f8cSSatish Balay     ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2286*a30f8f8cSSatish Balay     if (fact <= 1.0) fact = 1.39;
2287*a30f8f8cSSatish Balay     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2288*a30f8f8cSSatish Balay     PLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact);
2289*a30f8f8cSSatish Balay   }
2290*a30f8f8cSSatish Balay   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2291*a30f8f8cSSatish Balay                                      "MatStoreValues_MPISBAIJ",
2292*a30f8f8cSSatish Balay                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
2293*a30f8f8cSSatish Balay   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2294*a30f8f8cSSatish Balay                                      "MatRetrieveValues_MPISBAIJ",
2295*a30f8f8cSSatish Balay                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
2296*a30f8f8cSSatish Balay   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2297*a30f8f8cSSatish Balay                                      "MatGetDiagonalBlock_MPISBAIJ",
2298*a30f8f8cSSatish Balay                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
2299*a30f8f8cSSatish Balay   PetscFunctionReturn(0);
2300*a30f8f8cSSatish Balay }
2301*a30f8f8cSSatish Balay 
2302*a30f8f8cSSatish Balay 
2303*a30f8f8cSSatish Balay #undef __FUNC__
2304*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatDuplicate_MPISBAIJ"
2305*a30f8f8cSSatish Balay static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2306*a30f8f8cSSatish Balay {
2307*a30f8f8cSSatish Balay   Mat         mat;
2308*a30f8f8cSSatish Balay   Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2309*a30f8f8cSSatish Balay   int         ierr,len=0;
2310*a30f8f8cSSatish Balay   PetscTruth  flg;
2311*a30f8f8cSSatish Balay 
2312*a30f8f8cSSatish Balay   PetscFunctionBegin;
2313*a30f8f8cSSatish Balay   *newmat       = 0;
2314*a30f8f8cSSatish Balay   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPISBAIJ,"Mat",matin->comm,MatDestroy,MatView);
2315*a30f8f8cSSatish Balay   PLogObjectCreate(mat);
2316*a30f8f8cSSatish Balay   mat->data         = (void*)(a = PetscNew(Mat_MPISBAIJ));CHKPTRQ(a);
2317*a30f8f8cSSatish Balay   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2318*a30f8f8cSSatish Balay   mat->ops->destroy = MatDestroy_MPISBAIJ;
2319*a30f8f8cSSatish Balay   mat->ops->view    = MatView_MPISBAIJ;
2320*a30f8f8cSSatish Balay   mat->factor       = matin->factor;
2321*a30f8f8cSSatish Balay   mat->assembled    = PETSC_TRUE;
2322*a30f8f8cSSatish Balay   mat->insertmode   = NOT_SET_VALUES;
2323*a30f8f8cSSatish Balay 
2324*a30f8f8cSSatish Balay   a->m = mat->m   = oldmat->m;
2325*a30f8f8cSSatish Balay   a->n = mat->n   = oldmat->n;
2326*a30f8f8cSSatish Balay   a->M = mat->M   = oldmat->M;
2327*a30f8f8cSSatish Balay   a->N = mat->N   = oldmat->N;
2328*a30f8f8cSSatish Balay 
2329*a30f8f8cSSatish Balay   a->bs  = oldmat->bs;
2330*a30f8f8cSSatish Balay   a->bs2 = oldmat->bs2;
2331*a30f8f8cSSatish Balay   a->mbs = oldmat->mbs;
2332*a30f8f8cSSatish Balay   a->nbs = oldmat->nbs;
2333*a30f8f8cSSatish Balay   a->Mbs = oldmat->Mbs;
2334*a30f8f8cSSatish Balay   a->Nbs = oldmat->Nbs;
2335*a30f8f8cSSatish Balay 
2336*a30f8f8cSSatish Balay   a->rstart       = oldmat->rstart;
2337*a30f8f8cSSatish Balay   a->rend         = oldmat->rend;
2338*a30f8f8cSSatish Balay   a->cstart       = oldmat->cstart;
2339*a30f8f8cSSatish Balay   a->cend         = oldmat->cend;
2340*a30f8f8cSSatish Balay   a->size         = oldmat->size;
2341*a30f8f8cSSatish Balay   a->rank         = oldmat->rank;
2342*a30f8f8cSSatish Balay   a->donotstash   = oldmat->donotstash;
2343*a30f8f8cSSatish Balay   a->roworiented  = oldmat->roworiented;
2344*a30f8f8cSSatish Balay   a->rowindices   = 0;
2345*a30f8f8cSSatish Balay   a->rowvalues    = 0;
2346*a30f8f8cSSatish Balay   a->getrowactive = PETSC_FALSE;
2347*a30f8f8cSSatish Balay   a->barray       = 0;
2348*a30f8f8cSSatish Balay   a->rstart_bs    = oldmat->rstart_bs;
2349*a30f8f8cSSatish Balay   a->rend_bs      = oldmat->rend_bs;
2350*a30f8f8cSSatish Balay   a->cstart_bs    = oldmat->cstart_bs;
2351*a30f8f8cSSatish Balay   a->cend_bs      = oldmat->cend_bs;
2352*a30f8f8cSSatish Balay 
2353*a30f8f8cSSatish Balay   /* hash table stuff */
2354*a30f8f8cSSatish Balay   a->ht           = 0;
2355*a30f8f8cSSatish Balay   a->hd           = 0;
2356*a30f8f8cSSatish Balay   a->ht_size      = 0;
2357*a30f8f8cSSatish Balay   a->ht_flag      = oldmat->ht_flag;
2358*a30f8f8cSSatish Balay   a->ht_fact      = oldmat->ht_fact;
2359*a30f8f8cSSatish Balay   a->ht_total_ct  = 0;
2360*a30f8f8cSSatish Balay   a->ht_insert_ct = 0;
2361*a30f8f8cSSatish Balay 
2362*a30f8f8cSSatish Balay 
2363*a30f8f8cSSatish Balay   a->rowners = (int*)PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
2364*a30f8f8cSSatish Balay   PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
2365*a30f8f8cSSatish Balay   a->cowners    = a->rowners + a->size + 2;
2366*a30f8f8cSSatish Balay   a->rowners_bs = a->cowners + a->size + 2;
2367*a30f8f8cSSatish Balay   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2368*a30f8f8cSSatish Balay   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2369*a30f8f8cSSatish Balay   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2370*a30f8f8cSSatish Balay   if (oldmat->colmap) {
2371*a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE)
2372*a30f8f8cSSatish Balay   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2373*a30f8f8cSSatish Balay #else
2374*a30f8f8cSSatish Balay     a->colmap = (int*)PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
2375*a30f8f8cSSatish Balay     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2376*a30f8f8cSSatish Balay     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2377*a30f8f8cSSatish Balay #endif
2378*a30f8f8cSSatish Balay   } else a->colmap = 0;
2379*a30f8f8cSSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2380*a30f8f8cSSatish Balay     a->garray = (int*)PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray);
2381*a30f8f8cSSatish Balay     PLogObjectMemory(mat,len*sizeof(int));
2382*a30f8f8cSSatish Balay     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2383*a30f8f8cSSatish Balay   } else a->garray = 0;
2384*a30f8f8cSSatish Balay 
2385*a30f8f8cSSatish Balay   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2386*a30f8f8cSSatish Balay   PLogObjectParent(mat,a->lvec);
2387*a30f8f8cSSatish Balay   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2388*a30f8f8cSSatish Balay 
2389*a30f8f8cSSatish Balay   PLogObjectParent(mat,a->Mvctx);
2390*a30f8f8cSSatish Balay   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2391*a30f8f8cSSatish Balay   PLogObjectParent(mat,a->A);
2392*a30f8f8cSSatish Balay   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2393*a30f8f8cSSatish Balay   PLogObjectParent(mat,a->B);
2394*a30f8f8cSSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
2395*a30f8f8cSSatish Balay   ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2396*a30f8f8cSSatish Balay   if (flg) {
2397*a30f8f8cSSatish Balay     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
2398*a30f8f8cSSatish Balay   }
2399*a30f8f8cSSatish Balay   *newmat = mat;
2400*a30f8f8cSSatish Balay   PetscFunctionReturn(0);
2401*a30f8f8cSSatish Balay }
2402*a30f8f8cSSatish Balay 
2403*a30f8f8cSSatish Balay #include "petscsys.h"
2404*a30f8f8cSSatish Balay 
2405*a30f8f8cSSatish Balay #undef __FUNC__
2406*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPISBAIJ"
2407*a30f8f8cSSatish Balay int MatLoad_MPISBAIJ(Viewer viewer,MatType type,Mat *newmat)
2408*a30f8f8cSSatish Balay {
2409*a30f8f8cSSatish Balay   Mat          A;
2410*a30f8f8cSSatish Balay   int          i,nz,ierr,j,rstart,rend,fd;
2411*a30f8f8cSSatish Balay   Scalar       *vals,*buf;
2412*a30f8f8cSSatish Balay   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2413*a30f8f8cSSatish Balay   MPI_Status   status;
2414*a30f8f8cSSatish Balay   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2415*a30f8f8cSSatish Balay   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2416*a30f8f8cSSatish Balay   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2417*a30f8f8cSSatish Balay   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2418*a30f8f8cSSatish Balay   int          dcount,kmax,k,nzcount,tmp;
2419*a30f8f8cSSatish Balay 
2420*a30f8f8cSSatish Balay   PetscFunctionBegin;
2421*a30f8f8cSSatish Balay   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2422*a30f8f8cSSatish Balay 
2423*a30f8f8cSSatish Balay   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2424*a30f8f8cSSatish Balay   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2425*a30f8f8cSSatish Balay   if (!rank) {
2426*a30f8f8cSSatish Balay     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2427*a30f8f8cSSatish Balay     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2428*a30f8f8cSSatish Balay     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
2429*a30f8f8cSSatish Balay     if (header[3] < 0) {
2430*a30f8f8cSSatish Balay       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPISBAIJ");
2431*a30f8f8cSSatish Balay     }
2432*a30f8f8cSSatish Balay   }
2433*a30f8f8cSSatish Balay 
2434*a30f8f8cSSatish Balay   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2435*a30f8f8cSSatish Balay   M = header[1]; N = header[2];
2436*a30f8f8cSSatish Balay 
2437*a30f8f8cSSatish Balay   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
2438*a30f8f8cSSatish Balay 
2439*a30f8f8cSSatish Balay   /*
2440*a30f8f8cSSatish Balay      This code adds extra rows to make sure the number of rows is
2441*a30f8f8cSSatish Balay      divisible by the blocksize
2442*a30f8f8cSSatish Balay   */
2443*a30f8f8cSSatish Balay   Mbs        = M/bs;
2444*a30f8f8cSSatish Balay   extra_rows = bs - M + bs*(Mbs);
2445*a30f8f8cSSatish Balay   if (extra_rows == bs) extra_rows = 0;
2446*a30f8f8cSSatish Balay   else                  Mbs++;
2447*a30f8f8cSSatish Balay   if (extra_rows &&!rank) {
2448*a30f8f8cSSatish Balay     PLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n");
2449*a30f8f8cSSatish Balay   }
2450*a30f8f8cSSatish Balay 
2451*a30f8f8cSSatish Balay   /* determine ownership of all rows */
2452*a30f8f8cSSatish Balay   mbs = Mbs/size + ((Mbs % size) > rank);
2453*a30f8f8cSSatish Balay   m   = mbs * bs;
2454*a30f8f8cSSatish Balay   rowners = (int*)PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners);
2455*a30f8f8cSSatish Balay   browners = rowners + size + 1;
2456*a30f8f8cSSatish Balay   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2457*a30f8f8cSSatish Balay   rowners[0] = 0;
2458*a30f8f8cSSatish Balay   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2459*a30f8f8cSSatish Balay   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2460*a30f8f8cSSatish Balay   rstart = rowners[rank];
2461*a30f8f8cSSatish Balay   rend   = rowners[rank+1];
2462*a30f8f8cSSatish Balay 
2463*a30f8f8cSSatish Balay   /* distribute row lengths to all processors */
2464*a30f8f8cSSatish Balay   locrowlens = (int*)PetscMalloc((rend-rstart)*bs*sizeof(int));CHKPTRQ(locrowlens);
2465*a30f8f8cSSatish Balay   if (!rank) {
2466*a30f8f8cSSatish Balay     rowlengths = (int*)PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
2467*a30f8f8cSSatish Balay     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2468*a30f8f8cSSatish Balay     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2469*a30f8f8cSSatish Balay     sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts);
2470*a30f8f8cSSatish Balay     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2471*a30f8f8cSSatish Balay     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2472*a30f8f8cSSatish Balay     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2473*a30f8f8cSSatish Balay   } else {
2474*a30f8f8cSSatish Balay     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2475*a30f8f8cSSatish Balay   }
2476*a30f8f8cSSatish Balay 
2477*a30f8f8cSSatish Balay   if (!rank) {
2478*a30f8f8cSSatish Balay     /* calculate the number of nonzeros on each processor */
2479*a30f8f8cSSatish Balay     procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz);
2480*a30f8f8cSSatish Balay     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2481*a30f8f8cSSatish Balay     for (i=0; i<size; i++) {
2482*a30f8f8cSSatish Balay       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2483*a30f8f8cSSatish Balay         procsnz[i] += rowlengths[j];
2484*a30f8f8cSSatish Balay       }
2485*a30f8f8cSSatish Balay     }
2486*a30f8f8cSSatish Balay     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2487*a30f8f8cSSatish Balay 
2488*a30f8f8cSSatish Balay     /* determine max buffer needed and allocate it */
2489*a30f8f8cSSatish Balay     maxnz = 0;
2490*a30f8f8cSSatish Balay     for (i=0; i<size; i++) {
2491*a30f8f8cSSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
2492*a30f8f8cSSatish Balay     }
2493*a30f8f8cSSatish Balay     cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols);
2494*a30f8f8cSSatish Balay 
2495*a30f8f8cSSatish Balay     /* read in my part of the matrix column indices  */
2496*a30f8f8cSSatish Balay     nz = procsnz[0];
2497*a30f8f8cSSatish Balay     ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf);
2498*a30f8f8cSSatish Balay     mycols = ibuf;
2499*a30f8f8cSSatish Balay     if (size == 1)  nz -= extra_rows;
2500*a30f8f8cSSatish Balay     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2501*a30f8f8cSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2502*a30f8f8cSSatish Balay 
2503*a30f8f8cSSatish Balay     /* read in every ones (except the last) and ship off */
2504*a30f8f8cSSatish Balay     for (i=1; i<size-1; i++) {
2505*a30f8f8cSSatish Balay       nz   = procsnz[i];
2506*a30f8f8cSSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2507*a30f8f8cSSatish Balay       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2508*a30f8f8cSSatish Balay     }
2509*a30f8f8cSSatish Balay     /* read in the stuff for the last proc */
2510*a30f8f8cSSatish Balay     if (size != 1) {
2511*a30f8f8cSSatish Balay       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2512*a30f8f8cSSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2513*a30f8f8cSSatish Balay       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2514*a30f8f8cSSatish Balay       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2515*a30f8f8cSSatish Balay     }
2516*a30f8f8cSSatish Balay     ierr = PetscFree(cols);CHKERRQ(ierr);
2517*a30f8f8cSSatish Balay   } else {
2518*a30f8f8cSSatish Balay     /* determine buffer space needed for message */
2519*a30f8f8cSSatish Balay     nz = 0;
2520*a30f8f8cSSatish Balay     for (i=0; i<m; i++) {
2521*a30f8f8cSSatish Balay       nz += locrowlens[i];
2522*a30f8f8cSSatish Balay     }
2523*a30f8f8cSSatish Balay     ibuf   = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf);
2524*a30f8f8cSSatish Balay     mycols = ibuf;
2525*a30f8f8cSSatish Balay     /* receive message of column indices*/
2526*a30f8f8cSSatish Balay     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2527*a30f8f8cSSatish Balay     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2528*a30f8f8cSSatish Balay     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2529*a30f8f8cSSatish Balay   }
2530*a30f8f8cSSatish Balay 
2531*a30f8f8cSSatish Balay   /* loop over local rows, determining number of off diagonal entries */
2532*a30f8f8cSSatish Balay   dlens  = (int*)PetscMalloc(2*(rend-rstart+1)*sizeof(int));CHKPTRQ(dlens);
2533*a30f8f8cSSatish Balay   odlens = dlens + (rend-rstart);
2534*a30f8f8cSSatish Balay   mask   = (int*)PetscMalloc(3*Mbs*sizeof(int));CHKPTRQ(mask);
2535*a30f8f8cSSatish Balay   ierr   = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2536*a30f8f8cSSatish Balay   masked1 = mask    + Mbs;
2537*a30f8f8cSSatish Balay   masked2 = masked1 + Mbs;
2538*a30f8f8cSSatish Balay   rowcount = 0; nzcount = 0;
2539*a30f8f8cSSatish Balay   for (i=0; i<mbs; i++) {
2540*a30f8f8cSSatish Balay     dcount  = 0;
2541*a30f8f8cSSatish Balay     odcount = 0;
2542*a30f8f8cSSatish Balay     for (j=0; j<bs; j++) {
2543*a30f8f8cSSatish Balay       kmax = locrowlens[rowcount];
2544*a30f8f8cSSatish Balay       for (k=0; k<kmax; k++) {
2545*a30f8f8cSSatish Balay         tmp = mycols[nzcount++]/bs;
2546*a30f8f8cSSatish Balay         if (!mask[tmp]) {
2547*a30f8f8cSSatish Balay           mask[tmp] = 1;
2548*a30f8f8cSSatish Balay           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2549*a30f8f8cSSatish Balay           else masked1[dcount++] = tmp;
2550*a30f8f8cSSatish Balay         }
2551*a30f8f8cSSatish Balay       }
2552*a30f8f8cSSatish Balay       rowcount++;
2553*a30f8f8cSSatish Balay     }
2554*a30f8f8cSSatish Balay 
2555*a30f8f8cSSatish Balay     dlens[i]  = dcount;
2556*a30f8f8cSSatish Balay     odlens[i] = odcount;
2557*a30f8f8cSSatish Balay 
2558*a30f8f8cSSatish Balay     /* zero out the mask elements we set */
2559*a30f8f8cSSatish Balay     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2560*a30f8f8cSSatish Balay     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2561*a30f8f8cSSatish Balay   }
2562*a30f8f8cSSatish Balay 
2563*a30f8f8cSSatish Balay   /* create our matrix */
2564*a30f8f8cSSatish Balay   ierr = MatCreateMPISBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr);
2565*a30f8f8cSSatish Balay   A = *newmat;
2566*a30f8f8cSSatish Balay   MatSetOption(A,MAT_COLUMNS_SORTED);
2567*a30f8f8cSSatish Balay 
2568*a30f8f8cSSatish Balay   if (!rank) {
2569*a30f8f8cSSatish Balay     buf = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(buf);
2570*a30f8f8cSSatish Balay     /* read in my part of the matrix numerical values  */
2571*a30f8f8cSSatish Balay     nz = procsnz[0];
2572*a30f8f8cSSatish Balay     vals = buf;
2573*a30f8f8cSSatish Balay     mycols = ibuf;
2574*a30f8f8cSSatish Balay     if (size == 1)  nz -= extra_rows;
2575*a30f8f8cSSatish Balay     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2576*a30f8f8cSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2577*a30f8f8cSSatish Balay 
2578*a30f8f8cSSatish Balay     /* insert into matrix */
2579*a30f8f8cSSatish Balay     jj      = rstart*bs;
2580*a30f8f8cSSatish Balay     for (i=0; i<m; i++) {
2581*a30f8f8cSSatish Balay       ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2582*a30f8f8cSSatish Balay       mycols += locrowlens[i];
2583*a30f8f8cSSatish Balay       vals   += locrowlens[i];
2584*a30f8f8cSSatish Balay       jj++;
2585*a30f8f8cSSatish Balay     }
2586*a30f8f8cSSatish Balay     /* read in other processors (except the last one) and ship out */
2587*a30f8f8cSSatish Balay     for (i=1; i<size-1; i++) {
2588*a30f8f8cSSatish Balay       nz   = procsnz[i];
2589*a30f8f8cSSatish Balay       vals = buf;
2590*a30f8f8cSSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2591*a30f8f8cSSatish Balay       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2592*a30f8f8cSSatish Balay     }
2593*a30f8f8cSSatish Balay     /* the last proc */
2594*a30f8f8cSSatish Balay     if (size != 1){
2595*a30f8f8cSSatish Balay       nz   = procsnz[i] - extra_rows;
2596*a30f8f8cSSatish Balay       vals = buf;
2597*a30f8f8cSSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2598*a30f8f8cSSatish Balay       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2599*a30f8f8cSSatish Balay       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2600*a30f8f8cSSatish Balay     }
2601*a30f8f8cSSatish Balay     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2602*a30f8f8cSSatish Balay   } else {
2603*a30f8f8cSSatish Balay     /* receive numeric values */
2604*a30f8f8cSSatish Balay     buf = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(buf);
2605*a30f8f8cSSatish Balay 
2606*a30f8f8cSSatish Balay     /* receive message of values*/
2607*a30f8f8cSSatish Balay     vals   = buf;
2608*a30f8f8cSSatish Balay     mycols = ibuf;
2609*a30f8f8cSSatish Balay     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2610*a30f8f8cSSatish Balay     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2611*a30f8f8cSSatish Balay     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2612*a30f8f8cSSatish Balay 
2613*a30f8f8cSSatish Balay     /* insert into matrix */
2614*a30f8f8cSSatish Balay     jj      = rstart*bs;
2615*a30f8f8cSSatish Balay     for (i=0; i<m; i++) {
2616*a30f8f8cSSatish Balay       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2617*a30f8f8cSSatish Balay       mycols += locrowlens[i];
2618*a30f8f8cSSatish Balay       vals   += locrowlens[i];
2619*a30f8f8cSSatish Balay       jj++;
2620*a30f8f8cSSatish Balay     }
2621*a30f8f8cSSatish Balay   }
2622*a30f8f8cSSatish Balay   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2623*a30f8f8cSSatish Balay   ierr = PetscFree(buf);CHKERRQ(ierr);
2624*a30f8f8cSSatish Balay   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2625*a30f8f8cSSatish Balay   ierr = PetscFree(rowners);CHKERRQ(ierr);
2626*a30f8f8cSSatish Balay   ierr = PetscFree(dlens);CHKERRQ(ierr);
2627*a30f8f8cSSatish Balay   ierr = PetscFree(mask);CHKERRQ(ierr);
2628*a30f8f8cSSatish Balay   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2629*a30f8f8cSSatish Balay   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2630*a30f8f8cSSatish Balay   PetscFunctionReturn(0);
2631*a30f8f8cSSatish Balay }
2632*a30f8f8cSSatish Balay 
2633*a30f8f8cSSatish Balay #undef __FUNC__
2634*a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatMPISBAIJSetHashTableFactor"
2635*a30f8f8cSSatish Balay /*@
2636*a30f8f8cSSatish Balay    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2637*a30f8f8cSSatish Balay 
2638*a30f8f8cSSatish Balay    Input Parameters:
2639*a30f8f8cSSatish Balay .  mat  - the matrix
2640*a30f8f8cSSatish Balay .  fact - factor
2641*a30f8f8cSSatish Balay 
2642*a30f8f8cSSatish Balay    Collective on Mat
2643*a30f8f8cSSatish Balay 
2644*a30f8f8cSSatish Balay    Level: advanced
2645*a30f8f8cSSatish Balay 
2646*a30f8f8cSSatish Balay   Notes:
2647*a30f8f8cSSatish Balay    This can also be set by the command line option: -mat_use_hash_table fact
2648*a30f8f8cSSatish Balay 
2649*a30f8f8cSSatish Balay .keywords: matrix, hashtable, factor, HT
2650*a30f8f8cSSatish Balay 
2651*a30f8f8cSSatish Balay .seealso: MatSetOption()
2652*a30f8f8cSSatish Balay @*/
2653*a30f8f8cSSatish Balay int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2654*a30f8f8cSSatish Balay {
2655*a30f8f8cSSatish Balay   Mat_MPIBAIJ *baij;
2656*a30f8f8cSSatish Balay 
2657*a30f8f8cSSatish Balay   PetscFunctionBegin;
2658*a30f8f8cSSatish Balay   PetscValidHeaderSpecific(mat,MAT_COOKIE);
2659*a30f8f8cSSatish Balay   if (mat->type != MATMPIBAIJ) {
2660*a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only.");
2661*a30f8f8cSSatish Balay   }
2662*a30f8f8cSSatish Balay   baij = (Mat_MPIBAIJ*)mat->data;
2663*a30f8f8cSSatish Balay   baij->ht_fact = fact;
2664*a30f8f8cSSatish Balay   PetscFunctionReturn(0);
2665*a30f8f8cSSatish Balay }
2666