xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 65d70643ff288764ab28cae5053c4ca08fa568be)
1*65d70643SHong Zhang /*$Id: mpisbaij.c,v 1.5 2000/07/17 18:41:49 hzhang Exp hzhang $*/
2a30f8f8cSSatish Balay 
3a30f8f8cSSatish Balay #include "src/mat/impls/baij/mpi/mpibaij.h"
4a30f8f8cSSatish Balay #include "src/vec/vecimpl.h"
5a30f8f8cSSatish Balay #include "mpisbaij.h"
6a30f8f8cSSatish Balay #include "src/mat/impls/sbaij/seq/sbaij.h"
7a30f8f8cSSatish Balay 
8a30f8f8cSSatish Balay extern int MatSetUpMultiply_MPISBAIJ(Mat);
9a30f8f8cSSatish Balay extern int DisAssemble_MPISBAIJ(Mat);
10a30f8f8cSSatish Balay extern int MatIncreaseOverlap_MPISBAIJ(Mat,int,IS *,int);
11a30f8f8cSSatish Balay extern int MatGetSubMatrices_MPISBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
12a30f8f8cSSatish Balay extern int MatGetValues_SeqSBAIJ(Mat,int,int *,int,int *,Scalar *);
13a30f8f8cSSatish Balay extern int MatSetValues_SeqSBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode);
14a30f8f8cSSatish Balay extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
15a30f8f8cSSatish Balay extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,Scalar**);
16a30f8f8cSSatish Balay extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,Scalar**);
17a30f8f8cSSatish Balay extern int MatPrintHelp_SeqSBAIJ(Mat);
18a30f8f8cSSatish Balay extern int MatZeroRows_SeqSBAIJ(Mat,IS,Scalar*);
19a30f8f8cSSatish Balay 
20a30f8f8cSSatish Balay /*  UGLY, ugly, ugly
21a30f8f8cSSatish Balay    When MatScalar == Scalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
22a30f8f8cSSatish Balay    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
23a30f8f8cSSatish Balay    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
24a30f8f8cSSatish Balay    converts the entries into single precision and then calls ..._MatScalar() to put them
25a30f8f8cSSatish Balay    into the single precision data structures.
26a30f8f8cSSatish Balay */
27a30f8f8cSSatish Balay #if defined(PETSC_USE_MAT_SINGLE)
28a30f8f8cSSatish Balay extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
29a30f8f8cSSatish Balay extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
30a30f8f8cSSatish Balay extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
31a30f8f8cSSatish Balay extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
32a30f8f8cSSatish Balay extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
33a30f8f8cSSatish Balay #else
34a30f8f8cSSatish Balay #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
35a30f8f8cSSatish Balay #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
36a30f8f8cSSatish Balay #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
37a30f8f8cSSatish Balay #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
38a30f8f8cSSatish Balay #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
39a30f8f8cSSatish Balay #endif
40a30f8f8cSSatish Balay 
41a30f8f8cSSatish Balay EXTERN_C_BEGIN
42a30f8f8cSSatish Balay #undef __FUNC__
43a30f8f8cSSatish Balay #define __FUNC__ /*<a name="MatStoreValues_MPIBAIJ"></a>*/"MatStoreValues_MPIBAIJ"
44a30f8f8cSSatish Balay int MatStoreValues_MPISBAIJ(Mat mat)
45a30f8f8cSSatish Balay {
46a30f8f8cSSatish Balay   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
47a30f8f8cSSatish Balay   int         ierr;
48a30f8f8cSSatish Balay 
49a30f8f8cSSatish Balay   PetscFunctionBegin;
50a30f8f8cSSatish Balay   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
51a30f8f8cSSatish Balay   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
52a30f8f8cSSatish Balay   PetscFunctionReturn(0);
53a30f8f8cSSatish Balay }
54a30f8f8cSSatish Balay EXTERN_C_END
55a30f8f8cSSatish Balay 
56a30f8f8cSSatish Balay EXTERN_C_BEGIN
57a30f8f8cSSatish Balay #undef __FUNC__
58a30f8f8cSSatish Balay #define __FUNC__ /*<a name="MatRetrieveValues_MPIBAIJ"></a>*/"MatRetrieveValues_MPIBAIJ"
59a30f8f8cSSatish Balay int MatRetrieveValues_MPISBAIJ(Mat mat)
60a30f8f8cSSatish Balay {
61a30f8f8cSSatish Balay   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
62a30f8f8cSSatish Balay   int         ierr;
63a30f8f8cSSatish Balay 
64a30f8f8cSSatish Balay   PetscFunctionBegin;
65a30f8f8cSSatish Balay   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
66a30f8f8cSSatish Balay   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
67a30f8f8cSSatish Balay   PetscFunctionReturn(0);
68a30f8f8cSSatish Balay }
69a30f8f8cSSatish Balay EXTERN_C_END
70a30f8f8cSSatish Balay 
71a30f8f8cSSatish Balay /*
72a30f8f8cSSatish Balay      Local utility routine that creates a mapping from the global column
73a30f8f8cSSatish Balay    number to the local number in the off-diagonal part of the local
74a30f8f8cSSatish Balay    storage of the matrix.  This is done in a non scable way since the
75a30f8f8cSSatish Balay    length of colmap equals the global matrix length.
76a30f8f8cSSatish Balay */
77a30f8f8cSSatish Balay #undef __FUNC__
78a30f8f8cSSatish Balay #define __FUNC__ /*<a name="CreateColmap_MPISBAIJ_Private"></a>*/"CreateColmap_MPISBAIJ_Private"
79a30f8f8cSSatish Balay static int CreateColmap_MPISBAIJ_Private(Mat mat)
80a30f8f8cSSatish Balay {
81a30f8f8cSSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
82a30f8f8cSSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data;
83a30f8f8cSSatish Balay   int         nbs = B->nbs,i,bs=B->bs,ierr;
84a30f8f8cSSatish Balay 
85a30f8f8cSSatish Balay   PetscFunctionBegin;
86a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE)
87a30f8f8cSSatish Balay   ierr = PetscTableCreate(baij->nbs/5,&baij->colmap);CHKERRQ(ierr);
88a30f8f8cSSatish Balay   for (i=0; i<nbs; i++){
89a30f8f8cSSatish Balay     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
90a30f8f8cSSatish Balay   }
91a30f8f8cSSatish Balay #else
92a30f8f8cSSatish Balay   baij->colmap = (int*)PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
93a30f8f8cSSatish Balay   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
94a30f8f8cSSatish Balay   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr);
95a30f8f8cSSatish Balay   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
96a30f8f8cSSatish Balay #endif
97a30f8f8cSSatish Balay   PetscFunctionReturn(0);
98a30f8f8cSSatish Balay }
99a30f8f8cSSatish Balay 
100a30f8f8cSSatish Balay #define CHUNKSIZE  10
101a30f8f8cSSatish Balay 
102a30f8f8cSSatish Balay #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
103a30f8f8cSSatish Balay { \
104a30f8f8cSSatish Balay  \
105a30f8f8cSSatish Balay     brow = row/bs;  \
106a30f8f8cSSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
107a30f8f8cSSatish Balay     rmax = aimax[brow]; nrow = ailen[brow]; \
108a30f8f8cSSatish Balay       bcol = col/bs; \
109a30f8f8cSSatish Balay       ridx = row % bs; cidx = col % bs; \
110a30f8f8cSSatish Balay       low = 0; high = nrow; \
111a30f8f8cSSatish Balay       while (high-low > 3) { \
112a30f8f8cSSatish Balay         t = (low+high)/2; \
113a30f8f8cSSatish Balay         if (rp[t] > bcol) high = t; \
114a30f8f8cSSatish Balay         else              low  = t; \
115a30f8f8cSSatish Balay       } \
116a30f8f8cSSatish Balay       for (_i=low; _i<high; _i++) { \
117a30f8f8cSSatish Balay         if (rp[_i] > bcol) break; \
118a30f8f8cSSatish Balay         if (rp[_i] == bcol) { \
119a30f8f8cSSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
120a30f8f8cSSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
121a30f8f8cSSatish Balay           else                    *bap  = value;  \
122a30f8f8cSSatish Balay           goto a_noinsert; \
123a30f8f8cSSatish Balay         } \
124a30f8f8cSSatish Balay       } \
125a30f8f8cSSatish Balay       if (a->nonew == 1) goto a_noinsert; \
126a30f8f8cSSatish Balay       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
127a30f8f8cSSatish Balay       if (nrow >= rmax) { \
128a30f8f8cSSatish Balay         /* there is no extra room in row, therefore enlarge */ \
129a30f8f8cSSatish Balay         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
130a30f8f8cSSatish Balay         MatScalar *new_a; \
131a30f8f8cSSatish Balay  \
132a30f8f8cSSatish Balay         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
133a30f8f8cSSatish Balay  \
134a30f8f8cSSatish Balay         /* malloc new storage space */ \
135a30f8f8cSSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
136a30f8f8cSSatish Balay         new_a   = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \
137a30f8f8cSSatish Balay         new_j   = (int*)(new_a + bs2*new_nz); \
138a30f8f8cSSatish Balay         new_i   = new_j + new_nz; \
139a30f8f8cSSatish Balay  \
140a30f8f8cSSatish Balay         /* copy over old data into new slots */ \
141a30f8f8cSSatish Balay         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
142a30f8f8cSSatish Balay         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
143a30f8f8cSSatish Balay         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
144a30f8f8cSSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
145a30f8f8cSSatish Balay         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
146a30f8f8cSSatish Balay         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
147a30f8f8cSSatish Balay         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \
148a30f8f8cSSatish Balay         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
149a30f8f8cSSatish Balay                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
150a30f8f8cSSatish Balay         /* free up old matrix storage */ \
151a30f8f8cSSatish Balay         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
152a30f8f8cSSatish Balay         if (!a->singlemalloc) { \
153a30f8f8cSSatish Balay           ierr = PetscFree(a->i);CHKERRQ(ierr); \
154a30f8f8cSSatish Balay           ierr = PetscFree(a->j);CHKERRQ(ierr);\
155a30f8f8cSSatish Balay         } \
156a30f8f8cSSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
157a30f8f8cSSatish Balay         a->singlemalloc = PETSC_TRUE; \
158a30f8f8cSSatish Balay  \
159a30f8f8cSSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
160a30f8f8cSSatish Balay         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
161a30f8f8cSSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
162a30f8f8cSSatish Balay         a->s_maxnz += bs2*CHUNKSIZE; \
163a30f8f8cSSatish Balay         a->reallocs++; \
164a30f8f8cSSatish Balay         a->s_nz++; \
165a30f8f8cSSatish Balay       } \
166a30f8f8cSSatish Balay       N = nrow++ - 1;  \
167a30f8f8cSSatish Balay       /* shift up all the later entries in this row */ \
168a30f8f8cSSatish Balay       for (ii=N; ii>=_i; ii--) { \
169a30f8f8cSSatish Balay         rp[ii+1] = rp[ii]; \
170a30f8f8cSSatish Balay         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
171a30f8f8cSSatish Balay       } \
172a30f8f8cSSatish Balay       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
173a30f8f8cSSatish Balay       rp[_i]                      = bcol;  \
174a30f8f8cSSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
175a30f8f8cSSatish Balay       a_noinsert:; \
176a30f8f8cSSatish Balay     ailen[brow] = nrow; \
177a30f8f8cSSatish Balay }
178a30f8f8cSSatish Balay #ifndef MatSetValues_SeqBAIJ_B_Private
179a30f8f8cSSatish Balay #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
180a30f8f8cSSatish Balay { \
181a30f8f8cSSatish Balay     brow = row/bs;  \
182a30f8f8cSSatish Balay     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
183a30f8f8cSSatish Balay     rmax = bimax[brow]; nrow = bilen[brow]; \
184a30f8f8cSSatish Balay       bcol = col/bs; \
185a30f8f8cSSatish Balay       ridx = row % bs; cidx = col % bs; \
186a30f8f8cSSatish Balay       low = 0; high = nrow; \
187a30f8f8cSSatish Balay       while (high-low > 3) { \
188a30f8f8cSSatish Balay         t = (low+high)/2; \
189a30f8f8cSSatish Balay         if (rp[t] > bcol) high = t; \
190a30f8f8cSSatish Balay         else              low  = t; \
191a30f8f8cSSatish Balay       } \
192a30f8f8cSSatish Balay       for (_i=low; _i<high; _i++) { \
193a30f8f8cSSatish Balay         if (rp[_i] > bcol) break; \
194a30f8f8cSSatish Balay         if (rp[_i] == bcol) { \
195a30f8f8cSSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
196a30f8f8cSSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
197a30f8f8cSSatish Balay           else                    *bap  = value;  \
198a30f8f8cSSatish Balay           goto b_noinsert; \
199a30f8f8cSSatish Balay         } \
200a30f8f8cSSatish Balay       } \
201a30f8f8cSSatish Balay       if (b->nonew == 1) goto b_noinsert; \
202a30f8f8cSSatish Balay       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
203a30f8f8cSSatish Balay       if (nrow >= rmax) { \
204a30f8f8cSSatish Balay         /* there is no extra room in row, therefore enlarge */ \
205a30f8f8cSSatish Balay         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
206a30f8f8cSSatish Balay         MatScalar *new_a; \
207a30f8f8cSSatish Balay  \
208a30f8f8cSSatish Balay         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
209a30f8f8cSSatish Balay  \
210a30f8f8cSSatish Balay         /* malloc new storage space */ \
211a30f8f8cSSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
212a30f8f8cSSatish Balay         new_a   = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \
213a30f8f8cSSatish Balay         new_j   = (int*)(new_a + bs2*new_nz); \
214a30f8f8cSSatish Balay         new_i   = new_j + new_nz; \
215a30f8f8cSSatish Balay  \
216a30f8f8cSSatish Balay         /* copy over old data into new slots */ \
217a30f8f8cSSatish Balay         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
218a30f8f8cSSatish Balay         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
219a30f8f8cSSatish Balay         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
220a30f8f8cSSatish Balay         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
221a30f8f8cSSatish Balay         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
222a30f8f8cSSatish Balay         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
223a30f8f8cSSatish Balay         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
224a30f8f8cSSatish Balay         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
225a30f8f8cSSatish Balay                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
226a30f8f8cSSatish Balay         /* free up old matrix storage */ \
227a30f8f8cSSatish Balay         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
228a30f8f8cSSatish Balay         if (!b->singlemalloc) { \
229a30f8f8cSSatish Balay           ierr = PetscFree(b->i);CHKERRQ(ierr); \
230a30f8f8cSSatish Balay           ierr = PetscFree(b->j);CHKERRQ(ierr); \
231a30f8f8cSSatish Balay         } \
232a30f8f8cSSatish Balay         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
233a30f8f8cSSatish Balay         b->singlemalloc = PETSC_TRUE; \
234a30f8f8cSSatish Balay  \
235a30f8f8cSSatish Balay         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
236a30f8f8cSSatish Balay         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
237a30f8f8cSSatish Balay         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
238a30f8f8cSSatish Balay         b->maxnz += bs2*CHUNKSIZE; \
239a30f8f8cSSatish Balay         b->reallocs++; \
240a30f8f8cSSatish Balay         b->nz++; \
241a30f8f8cSSatish Balay       } \
242a30f8f8cSSatish Balay       N = nrow++ - 1;  \
243a30f8f8cSSatish Balay       /* shift up all the later entries in this row */ \
244a30f8f8cSSatish Balay       for (ii=N; ii>=_i; ii--) { \
245a30f8f8cSSatish Balay         rp[ii+1] = rp[ii]; \
246a30f8f8cSSatish Balay         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
247a30f8f8cSSatish Balay       } \
248a30f8f8cSSatish Balay       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
249a30f8f8cSSatish Balay       rp[_i]                      = bcol;  \
250a30f8f8cSSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
251a30f8f8cSSatish Balay       b_noinsert:; \
252a30f8f8cSSatish Balay     bilen[brow] = nrow; \
253a30f8f8cSSatish Balay }
254a30f8f8cSSatish Balay #endif
255a30f8f8cSSatish Balay 
256a30f8f8cSSatish Balay #if defined(PETSC_USE_MAT_SINGLE)
257a30f8f8cSSatish Balay #undef __FUNC__
258a30f8f8cSSatish Balay #define __FUNC__ /*<a name="MatSetValues_MPISBAIJ"></a>*/"MatSetValues_MPISBAIJ"
259a30f8f8cSSatish Balay int MatSetValues_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
260a30f8f8cSSatish Balay {
261a30f8f8cSSatish Balay   Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data;
262a30f8f8cSSatish Balay   int         ierr,i,N = m*n;
263a30f8f8cSSatish Balay   MatScalar   *vsingle;
264a30f8f8cSSatish Balay 
265a30f8f8cSSatish Balay   PetscFunctionBegin;
266a30f8f8cSSatish Balay   if (N > b->setvalueslen) {
267a30f8f8cSSatish Balay     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
268a30f8f8cSSatish Balay     b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy);
269a30f8f8cSSatish Balay     b->setvalueslen  = N;
270a30f8f8cSSatish Balay   }
271a30f8f8cSSatish Balay   vsingle = b->setvaluescopy;
272a30f8f8cSSatish Balay 
273a30f8f8cSSatish Balay   for (i=0; i<N; i++) {
274a30f8f8cSSatish Balay     vsingle[i] = v[i];
275a30f8f8cSSatish Balay   }
276a30f8f8cSSatish Balay   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
277a30f8f8cSSatish Balay   PetscFunctionReturn(0);
278a30f8f8cSSatish Balay }
279a30f8f8cSSatish Balay 
280a30f8f8cSSatish Balay #undef __FUNC__
281a30f8f8cSSatish Balay #define __FUNC__ /*<a name="MatSetValuesBlocked_MPISBAIJ"></a>*/"MatSetValuesBlocked_MPISBAIJ"
282a30f8f8cSSatish Balay int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
283a30f8f8cSSatish Balay {
284a30f8f8cSSatish Balay   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
285a30f8f8cSSatish Balay   int         ierr,i,N = m*n*b->bs2;
286a30f8f8cSSatish Balay   MatScalar   *vsingle;
287a30f8f8cSSatish Balay 
288a30f8f8cSSatish Balay   PetscFunctionBegin;
289a30f8f8cSSatish Balay   if (N > b->setvalueslen) {
290a30f8f8cSSatish Balay     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
291a30f8f8cSSatish Balay     b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy);
292a30f8f8cSSatish Balay     b->setvalueslen  = N;
293a30f8f8cSSatish Balay   }
294a30f8f8cSSatish Balay   vsingle = b->setvaluescopy;
295a30f8f8cSSatish Balay   for (i=0; i<N; i++) {
296a30f8f8cSSatish Balay     vsingle[i] = v[i];
297a30f8f8cSSatish Balay   }
298a30f8f8cSSatish Balay   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
299a30f8f8cSSatish Balay   PetscFunctionReturn(0);
300a30f8f8cSSatish Balay }
301a30f8f8cSSatish Balay 
302a30f8f8cSSatish Balay #undef __FUNC__
303a30f8f8cSSatish Balay #define __FUNC__ /*<a name="MatSetValues_MPISBAIJ_HT"></a>*/"MatSetValues_MPISBAIJ_HT"
304a30f8f8cSSatish Balay int MatSetValues_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
305a30f8f8cSSatish Balay {
306a30f8f8cSSatish Balay   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
307a30f8f8cSSatish Balay   int         ierr,i,N = m*n;
308a30f8f8cSSatish Balay   MatScalar   *vsingle;
309a30f8f8cSSatish Balay 
310a30f8f8cSSatish Balay   PetscFunctionBegin;
311a30f8f8cSSatish Balay   if (N > b->setvalueslen) {
312a30f8f8cSSatish Balay     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
313a30f8f8cSSatish Balay     b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy);
314a30f8f8cSSatish Balay     b->setvalueslen  = N;
315a30f8f8cSSatish Balay   }
316a30f8f8cSSatish Balay   vsingle = b->setvaluescopy;
317a30f8f8cSSatish Balay   for (i=0; i<N; i++) {
318a30f8f8cSSatish Balay     vsingle[i] = v[i];
319a30f8f8cSSatish Balay   }
320a30f8f8cSSatish Balay   ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
321a30f8f8cSSatish Balay   PetscFunctionReturn(0);
322a30f8f8cSSatish Balay }
323a30f8f8cSSatish Balay 
324a30f8f8cSSatish Balay #undef __FUNC__
325a30f8f8cSSatish Balay #define __FUNC__ /*<a name="MatSetValuesBlocked_MPISBAIJ_HT"></a>*/"MatSetValuesBlocked_MPISBAIJ_HT"
326a30f8f8cSSatish Balay int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
327a30f8f8cSSatish Balay {
328a30f8f8cSSatish Balay   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
329a30f8f8cSSatish Balay   int         ierr,i,N = m*n*b->bs2;
330a30f8f8cSSatish Balay   MatScalar   *vsingle;
331a30f8f8cSSatish Balay 
332a30f8f8cSSatish Balay   PetscFunctionBegin;
333a30f8f8cSSatish Balay   if (N > b->setvalueslen) {
334a30f8f8cSSatish Balay     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
335a30f8f8cSSatish Balay     b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy);
336a30f8f8cSSatish Balay     b->setvalueslen  = N;
337a30f8f8cSSatish Balay   }
338a30f8f8cSSatish Balay   vsingle = b->setvaluescopy;
339a30f8f8cSSatish Balay   for (i=0; i<N; i++) {
340a30f8f8cSSatish Balay     vsingle[i] = v[i];
341a30f8f8cSSatish Balay   }
342a30f8f8cSSatish Balay   ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
343a30f8f8cSSatish Balay   PetscFunctionReturn(0);
344a30f8f8cSSatish Balay }
345a30f8f8cSSatish Balay #endif
346a30f8f8cSSatish Balay 
347a30f8f8cSSatish Balay /* Only add/insert a(i,j) with i<=j (blocks).
348a30f8f8cSSatish Balay    Any a(i,j) with i>j input by user is ingored.
349a30f8f8cSSatish Balay */
350a30f8f8cSSatish Balay #undef __FUNC__
351a30f8f8cSSatish Balay #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ"></a>*/"MatSetValues_MPIBAIJ"
352a30f8f8cSSatish Balay int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
353a30f8f8cSSatish Balay {
354a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
355a30f8f8cSSatish Balay   MatScalar   value;
356a30f8f8cSSatish Balay   int         ierr,i,j,row,col;
357a30f8f8cSSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
358a30f8f8cSSatish Balay   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
359a30f8f8cSSatish Balay   int         cend_orig=baij->cend_bs,bs=baij->bs;
360a30f8f8cSSatish Balay 
361a30f8f8cSSatish Balay   /* Some Variables required in the macro */
362a30f8f8cSSatish Balay   Mat         A = baij->A;
363a30f8f8cSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data;
364a30f8f8cSSatish Balay   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
365a30f8f8cSSatish Balay   MatScalar   *aa=a->a;
366a30f8f8cSSatish Balay 
367a30f8f8cSSatish Balay   Mat         B = baij->B;
368a30f8f8cSSatish Balay   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data;
369a30f8f8cSSatish Balay   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
370a30f8f8cSSatish Balay   MatScalar   *ba=b->a;
371a30f8f8cSSatish Balay 
372a30f8f8cSSatish Balay   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
373a30f8f8cSSatish Balay   int         low,high,t,ridx,cidx,bs2=a->bs2;
374a30f8f8cSSatish Balay   MatScalar   *ap,*bap;
375a30f8f8cSSatish Balay 
376a30f8f8cSSatish Balay   /* for stash */
377f65c83cfSHong Zhang   int         n_loc, *in_loc=0;
378f65c83cfSHong Zhang   MatScalar   *v_loc=0;
379a30f8f8cSSatish Balay 
380a30f8f8cSSatish Balay   PetscFunctionBegin;
381a30f8f8cSSatish Balay 
382a30f8f8cSSatish Balay   if(!baij->donotstash){
383a30f8f8cSSatish Balay     in_loc = (int*)PetscMalloc(n*sizeof(int)); CHKPTRQ(in_loc);
384a30f8f8cSSatish Balay     v_loc  = (MatScalar*)PetscMalloc(n*sizeof(MatScalar)); CHKPTRQ(v_loc);
385a30f8f8cSSatish Balay   }
386a30f8f8cSSatish Balay 
387a30f8f8cSSatish Balay   for (i=0; i<m; i++) {
388a30f8f8cSSatish Balay     if (im[i] < 0) continue;
389a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
390a30f8f8cSSatish Balay     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
391a30f8f8cSSatish Balay #endif
392a30f8f8cSSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
393a30f8f8cSSatish Balay       row = im[i] - rstart_orig;              /* local row index */
394a30f8f8cSSatish Balay       for (j=0; j<n; j++) {
395f65c83cfSHong Zhang         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
396a30f8f8cSSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
397a30f8f8cSSatish Balay           col = in[j] - cstart_orig;          /* local col index */
398a30f8f8cSSatish Balay           brow = row/bs; bcol = col/bs;
399a30f8f8cSSatish Balay           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
400a30f8f8cSSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
401a30f8f8cSSatish Balay           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
402a30f8f8cSSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
403a30f8f8cSSatish Balay         } else if (in[j] < 0) continue;
404a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
405a30f8f8cSSatish Balay         else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");}
406a30f8f8cSSatish Balay #endif
407a30f8f8cSSatish Balay         else {  /* off-diag entry (B) */
408a30f8f8cSSatish Balay           if (mat->was_assembled) {
409a30f8f8cSSatish Balay             if (!baij->colmap) {
410a30f8f8cSSatish Balay               ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr);
411a30f8f8cSSatish Balay             }
412a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE)
413a30f8f8cSSatish Balay             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
414a30f8f8cSSatish Balay             col  = col - 1 + in[j]%bs;
415a30f8f8cSSatish Balay #else
416a30f8f8cSSatish Balay             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
417a30f8f8cSSatish Balay #endif
418a30f8f8cSSatish Balay             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
419a30f8f8cSSatish Balay               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
420a30f8f8cSSatish Balay               col =  in[j];
421a30f8f8cSSatish Balay               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
422a30f8f8cSSatish Balay               B = baij->B;
423a30f8f8cSSatish Balay               b = (Mat_SeqBAIJ*)(B)->data;
424a30f8f8cSSatish Balay               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
425a30f8f8cSSatish Balay               ba=b->a;
426a30f8f8cSSatish Balay             }
427a30f8f8cSSatish Balay           } else col = in[j];
428a30f8f8cSSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
429a30f8f8cSSatish Balay           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
430a30f8f8cSSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
431a30f8f8cSSatish Balay         }
432a30f8f8cSSatish Balay       }
433a30f8f8cSSatish Balay     } else {  /* off processor entry */
434a30f8f8cSSatish Balay       if (!baij->donotstash) {
435a30f8f8cSSatish Balay         n_loc = 0;
436a30f8f8cSSatish Balay         for (j=0; j<n; j++){
437f65c83cfSHong Zhang           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
438a30f8f8cSSatish Balay           in_loc[n_loc] = in[j];
439a30f8f8cSSatish Balay           if (roworiented) {
440a30f8f8cSSatish Balay             v_loc[n_loc] = v[i*n+j];
441a30f8f8cSSatish Balay           } else {
442a30f8f8cSSatish Balay             v_loc[n_loc] = v[j*m+i];
443a30f8f8cSSatish Balay           }
444a30f8f8cSSatish Balay           n_loc++;
445a30f8f8cSSatish Balay         }
446a30f8f8cSSatish Balay         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
447a30f8f8cSSatish Balay       }
448a30f8f8cSSatish Balay     }
449a30f8f8cSSatish Balay   }
450a30f8f8cSSatish Balay 
451a30f8f8cSSatish Balay   if(!baij->donotstash){
452a30f8f8cSSatish Balay     ierr = PetscFree(in_loc);CHKERRQ(ierr);
453a30f8f8cSSatish Balay     ierr = PetscFree(v_loc);CHKERRQ(ierr);
454a30f8f8cSSatish Balay   }
455a30f8f8cSSatish Balay   PetscFunctionReturn(0);
456a30f8f8cSSatish Balay }
457a30f8f8cSSatish Balay 
458a30f8f8cSSatish Balay #undef __FUNC__
459a30f8f8cSSatish Balay #define __FUNC__ /*<a name="MatSetValuesBlocked_MPISBAIJ"></a>*/"MatSetValuesBlocked_MPISBAIJ"
460a30f8f8cSSatish Balay int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
461a30f8f8cSSatish Balay {
462a30f8f8cSSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
463a30f8f8cSSatish Balay   MatScalar   *value,*barray=baij->barray;
464a30f8f8cSSatish Balay   int         ierr,i,j,ii,jj,row,col;
465a30f8f8cSSatish Balay   int         roworiented = baij->roworiented,rstart=baij->rstart ;
466a30f8f8cSSatish Balay   int         rend=baij->rend,cstart=baij->cstart,stepval;
467a30f8f8cSSatish Balay   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
468a30f8f8cSSatish Balay 
469a30f8f8cSSatish Balay   PetscFunctionBegin;
470a30f8f8cSSatish Balay   if(!barray) {
471a30f8f8cSSatish Balay     baij->barray = barray = (MatScalar*)PetscMalloc(bs2*sizeof(MatScalar));CHKPTRQ(barray);
472a30f8f8cSSatish Balay   }
473a30f8f8cSSatish Balay 
474a30f8f8cSSatish Balay   if (roworiented) {
475a30f8f8cSSatish Balay     stepval = (n-1)*bs;
476a30f8f8cSSatish Balay   } else {
477a30f8f8cSSatish Balay     stepval = (m-1)*bs;
478a30f8f8cSSatish Balay   }
479a30f8f8cSSatish Balay   for (i=0; i<m; i++) {
480a30f8f8cSSatish Balay     if (im[i] < 0) continue;
481a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
482a30f8f8cSSatish Balay     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs);
483a30f8f8cSSatish Balay #endif
484a30f8f8cSSatish Balay     if (im[i] >= rstart && im[i] < rend) {
485a30f8f8cSSatish Balay       row = im[i] - rstart;
486a30f8f8cSSatish Balay       for (j=0; j<n; j++) {
487a30f8f8cSSatish Balay         /* If NumCol = 1 then a copy is not required */
488a30f8f8cSSatish Balay         if ((roworiented) && (n == 1)) {
489a30f8f8cSSatish Balay           barray = v + i*bs2;
490a30f8f8cSSatish Balay         } else if((!roworiented) && (m == 1)) {
491a30f8f8cSSatish Balay           barray = v + j*bs2;
492a30f8f8cSSatish Balay         } else { /* Here a copy is required */
493a30f8f8cSSatish Balay           if (roworiented) {
494a30f8f8cSSatish Balay             value = v + i*(stepval+bs)*bs + j*bs;
495a30f8f8cSSatish Balay           } else {
496a30f8f8cSSatish Balay             value = v + j*(stepval+bs)*bs + i*bs;
497a30f8f8cSSatish Balay           }
498a30f8f8cSSatish Balay           for (ii=0; ii<bs; ii++,value+=stepval) {
499a30f8f8cSSatish Balay             for (jj=0; jj<bs; jj++) {
500a30f8f8cSSatish Balay               *barray++  = *value++;
501a30f8f8cSSatish Balay             }
502a30f8f8cSSatish Balay           }
503a30f8f8cSSatish Balay           barray -=bs2;
504a30f8f8cSSatish Balay         }
505a30f8f8cSSatish Balay 
506a30f8f8cSSatish Balay         if (in[j] >= cstart && in[j] < cend){
507a30f8f8cSSatish Balay           col  = in[j] - cstart;
508a30f8f8cSSatish Balay           ierr = MatSetValuesBlocked_SeqSBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
509a30f8f8cSSatish Balay         }
510a30f8f8cSSatish Balay         else if (in[j] < 0) continue;
511a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
512a30f8f8cSSatish Balay         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);}
513a30f8f8cSSatish Balay #endif
514a30f8f8cSSatish Balay         else {
515a30f8f8cSSatish Balay           if (mat->was_assembled) {
516a30f8f8cSSatish Balay             if (!baij->colmap) {
517a30f8f8cSSatish Balay               ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr);
518a30f8f8cSSatish Balay             }
519a30f8f8cSSatish Balay 
520a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
521a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE)
522a30f8f8cSSatish Balay             { int data;
523a30f8f8cSSatish Balay               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
524a30f8f8cSSatish Balay               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");
525a30f8f8cSSatish Balay             }
526a30f8f8cSSatish Balay #else
527a30f8f8cSSatish Balay             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");
528a30f8f8cSSatish Balay #endif
529a30f8f8cSSatish Balay #endif
530a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE)
531a30f8f8cSSatish Balay 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
532a30f8f8cSSatish Balay             col  = (col - 1)/bs;
533a30f8f8cSSatish Balay #else
534a30f8f8cSSatish Balay             col = (baij->colmap[in[j]] - 1)/bs;
535a30f8f8cSSatish Balay #endif
536a30f8f8cSSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
537c8407628SSatish Balay               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
538a30f8f8cSSatish Balay               col =  in[j];
539a30f8f8cSSatish Balay             }
540a30f8f8cSSatish Balay           }
541a30f8f8cSSatish Balay           else col = in[j];
542a30f8f8cSSatish Balay           ierr = MatSetValuesBlocked_SeqSBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
543a30f8f8cSSatish Balay         }
544a30f8f8cSSatish Balay       }
545a30f8f8cSSatish Balay     } else {
546a30f8f8cSSatish Balay       if (!baij->donotstash) {
547a30f8f8cSSatish Balay         if (roworiented) {
548a30f8f8cSSatish Balay           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
549a30f8f8cSSatish Balay         } else {
550a30f8f8cSSatish Balay           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
551a30f8f8cSSatish Balay         }
552a30f8f8cSSatish Balay       }
553a30f8f8cSSatish Balay     }
554a30f8f8cSSatish Balay   }
555a30f8f8cSSatish Balay   PetscFunctionReturn(0);
556a30f8f8cSSatish Balay }
557a30f8f8cSSatish Balay 
558a30f8f8cSSatish Balay #define HASH_KEY 0.6180339887
559a30f8f8cSSatish Balay #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
560a30f8f8cSSatish Balay /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
561a30f8f8cSSatish Balay /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
562a30f8f8cSSatish Balay #undef __FUNC__
563a30f8f8cSSatish Balay #define __FUNC__ /*<a name="MatSetValues_MPISBAIJ_HT_MatScalar"></a>*/"MatSetValues_MPISBAIJ_HT_MatScalar"
564a30f8f8cSSatish Balay int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
565a30f8f8cSSatish Balay {
566a30f8f8cSSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
567a30f8f8cSSatish Balay   int         ierr,i,j,row,col;
568a30f8f8cSSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
569a30f8f8cSSatish Balay   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
570a30f8f8cSSatish Balay   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
571a30f8f8cSSatish Balay   PetscReal   tmp;
572a30f8f8cSSatish Balay   MatScalar   **HD = baij->hd,value;
573a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
574a30f8f8cSSatish Balay   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
575a30f8f8cSSatish Balay #endif
576a30f8f8cSSatish Balay 
577a30f8f8cSSatish Balay   PetscFunctionBegin;
578a30f8f8cSSatish Balay 
579a30f8f8cSSatish Balay   for (i=0; i<m; i++) {
580a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
581a30f8f8cSSatish Balay     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
582a30f8f8cSSatish Balay     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
583a30f8f8cSSatish Balay #endif
584a30f8f8cSSatish Balay       row = im[i];
585a30f8f8cSSatish Balay     if (row >= rstart_orig && row < rend_orig) {
586a30f8f8cSSatish Balay       for (j=0; j<n; j++) {
587a30f8f8cSSatish Balay         col = in[j];
588a30f8f8cSSatish Balay         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
589a30f8f8cSSatish Balay         /* Look up into the Hash Table */
590a30f8f8cSSatish Balay         key = (row/bs)*Nbs+(col/bs)+1;
591a30f8f8cSSatish Balay         h1  = HASH(size,key,tmp);
592a30f8f8cSSatish Balay 
593a30f8f8cSSatish Balay 
594a30f8f8cSSatish Balay         idx = h1;
595a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
596a30f8f8cSSatish Balay         insert_ct++;
597a30f8f8cSSatish Balay         total_ct++;
598a30f8f8cSSatish Balay         if (HT[idx] != key) {
599a30f8f8cSSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
600a30f8f8cSSatish Balay           if (idx == size) {
601a30f8f8cSSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
602a30f8f8cSSatish Balay             if (idx == h1) {
603a30f8f8cSSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
604a30f8f8cSSatish Balay             }
605a30f8f8cSSatish Balay           }
606a30f8f8cSSatish Balay         }
607a30f8f8cSSatish Balay #else
608a30f8f8cSSatish Balay         if (HT[idx] != key) {
609a30f8f8cSSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
610a30f8f8cSSatish Balay           if (idx == size) {
611a30f8f8cSSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
612a30f8f8cSSatish Balay             if (idx == h1) {
613a30f8f8cSSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
614a30f8f8cSSatish Balay             }
615a30f8f8cSSatish Balay           }
616a30f8f8cSSatish Balay         }
617a30f8f8cSSatish Balay #endif
618a30f8f8cSSatish Balay         /* A HASH table entry is found, so insert the values at the correct address */
619a30f8f8cSSatish Balay         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
620a30f8f8cSSatish Balay         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
621a30f8f8cSSatish Balay       }
622a30f8f8cSSatish Balay     } else {
623a30f8f8cSSatish Balay       if (!baij->donotstash) {
624a30f8f8cSSatish Balay         if (roworiented) {
625a30f8f8cSSatish Balay           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
626a30f8f8cSSatish Balay         } else {
627a30f8f8cSSatish Balay           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
628a30f8f8cSSatish Balay         }
629a30f8f8cSSatish Balay       }
630a30f8f8cSSatish Balay     }
631a30f8f8cSSatish Balay   }
632a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
633a30f8f8cSSatish Balay   baij->ht_total_ct = total_ct;
634a30f8f8cSSatish Balay   baij->ht_insert_ct = insert_ct;
635a30f8f8cSSatish Balay #endif
636a30f8f8cSSatish Balay   PetscFunctionReturn(0);
637a30f8f8cSSatish Balay }
638a30f8f8cSSatish Balay 
639a30f8f8cSSatish Balay #undef __FUNC__
640a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPISBAIJ_HT_MatScalar"
641a30f8f8cSSatish Balay int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
642a30f8f8cSSatish Balay {
643a30f8f8cSSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
644a30f8f8cSSatish Balay   int         ierr,i,j,ii,jj,row,col;
645a30f8f8cSSatish Balay   int         roworiented = baij->roworiented,rstart=baij->rstart ;
646a30f8f8cSSatish Balay   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
647a30f8f8cSSatish Balay   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
648a30f8f8cSSatish Balay   PetscReal   tmp;
649a30f8f8cSSatish Balay   MatScalar   **HD = baij->hd,*baij_a;
650a30f8f8cSSatish Balay   MatScalar   *v_t,*value;
651a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
652a30f8f8cSSatish Balay   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
653a30f8f8cSSatish Balay #endif
654a30f8f8cSSatish Balay 
655a30f8f8cSSatish Balay   PetscFunctionBegin;
656a30f8f8cSSatish Balay 
657a30f8f8cSSatish Balay   if (roworiented) {
658a30f8f8cSSatish Balay     stepval = (n-1)*bs;
659a30f8f8cSSatish Balay   } else {
660a30f8f8cSSatish Balay     stepval = (m-1)*bs;
661a30f8f8cSSatish Balay   }
662a30f8f8cSSatish Balay   for (i=0; i<m; i++) {
663a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
664a30f8f8cSSatish Balay     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
665a30f8f8cSSatish Balay     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
666a30f8f8cSSatish Balay #endif
667a30f8f8cSSatish Balay     row   = im[i];
668a30f8f8cSSatish Balay     v_t   = v + i*bs2;
669a30f8f8cSSatish Balay     if (row >= rstart && row < rend) {
670a30f8f8cSSatish Balay       for (j=0; j<n; j++) {
671a30f8f8cSSatish Balay         col = in[j];
672a30f8f8cSSatish Balay 
673a30f8f8cSSatish Balay         /* Look up into the Hash Table */
674a30f8f8cSSatish Balay         key = row*Nbs+col+1;
675a30f8f8cSSatish Balay         h1  = HASH(size,key,tmp);
676a30f8f8cSSatish Balay 
677a30f8f8cSSatish Balay         idx = h1;
678a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
679a30f8f8cSSatish Balay         total_ct++;
680a30f8f8cSSatish Balay         insert_ct++;
681a30f8f8cSSatish Balay        if (HT[idx] != key) {
682a30f8f8cSSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
683a30f8f8cSSatish Balay           if (idx == size) {
684a30f8f8cSSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
685a30f8f8cSSatish Balay             if (idx == h1) {
686a30f8f8cSSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
687a30f8f8cSSatish Balay             }
688a30f8f8cSSatish Balay           }
689a30f8f8cSSatish Balay         }
690a30f8f8cSSatish Balay #else
691a30f8f8cSSatish Balay         if (HT[idx] != key) {
692a30f8f8cSSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
693a30f8f8cSSatish Balay           if (idx == size) {
694a30f8f8cSSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
695a30f8f8cSSatish Balay             if (idx == h1) {
696a30f8f8cSSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
697a30f8f8cSSatish Balay             }
698a30f8f8cSSatish Balay           }
699a30f8f8cSSatish Balay         }
700a30f8f8cSSatish Balay #endif
701a30f8f8cSSatish Balay         baij_a = HD[idx];
702a30f8f8cSSatish Balay         if (roworiented) {
703a30f8f8cSSatish Balay           /*value = v + i*(stepval+bs)*bs + j*bs;*/
704a30f8f8cSSatish Balay           /* value = v + (i*(stepval+bs)+j)*bs; */
705a30f8f8cSSatish Balay           value = v_t;
706a30f8f8cSSatish Balay           v_t  += bs;
707a30f8f8cSSatish Balay           if (addv == ADD_VALUES) {
708a30f8f8cSSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval) {
709a30f8f8cSSatish Balay               for (jj=ii; jj<bs2; jj+=bs) {
710a30f8f8cSSatish Balay                 baij_a[jj]  += *value++;
711a30f8f8cSSatish Balay               }
712a30f8f8cSSatish Balay             }
713a30f8f8cSSatish Balay           } else {
714a30f8f8cSSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval) {
715a30f8f8cSSatish Balay               for (jj=ii; jj<bs2; jj+=bs) {
716a30f8f8cSSatish Balay                 baij_a[jj]  = *value++;
717a30f8f8cSSatish Balay               }
718a30f8f8cSSatish Balay             }
719a30f8f8cSSatish Balay           }
720a30f8f8cSSatish Balay         } else {
721a30f8f8cSSatish Balay           value = v + j*(stepval+bs)*bs + i*bs;
722a30f8f8cSSatish Balay           if (addv == ADD_VALUES) {
723a30f8f8cSSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
724a30f8f8cSSatish Balay               for (jj=0; jj<bs; jj++) {
725a30f8f8cSSatish Balay                 baij_a[jj]  += *value++;
726a30f8f8cSSatish Balay               }
727a30f8f8cSSatish Balay             }
728a30f8f8cSSatish Balay           } else {
729a30f8f8cSSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
730a30f8f8cSSatish Balay               for (jj=0; jj<bs; jj++) {
731a30f8f8cSSatish Balay                 baij_a[jj]  = *value++;
732a30f8f8cSSatish Balay               }
733a30f8f8cSSatish Balay             }
734a30f8f8cSSatish Balay           }
735a30f8f8cSSatish Balay         }
736a30f8f8cSSatish Balay       }
737a30f8f8cSSatish Balay     } else {
738a30f8f8cSSatish Balay       if (!baij->donotstash) {
739a30f8f8cSSatish Balay         if (roworiented) {
740a30f8f8cSSatish Balay           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
741a30f8f8cSSatish Balay         } else {
742a30f8f8cSSatish Balay           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
743a30f8f8cSSatish Balay         }
744a30f8f8cSSatish Balay       }
745a30f8f8cSSatish Balay     }
746a30f8f8cSSatish Balay   }
747a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
748a30f8f8cSSatish Balay   baij->ht_total_ct = total_ct;
749a30f8f8cSSatish Balay   baij->ht_insert_ct = insert_ct;
750a30f8f8cSSatish Balay #endif
751a30f8f8cSSatish Balay   PetscFunctionReturn(0);
752a30f8f8cSSatish Balay }
753a30f8f8cSSatish Balay 
754a30f8f8cSSatish Balay #undef __FUNC__
755a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatGetValues_MPISBAIJ"
756a30f8f8cSSatish Balay int MatGetValues_MPISBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
757a30f8f8cSSatish Balay {
758a30f8f8cSSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
759a30f8f8cSSatish Balay   int        bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
760a30f8f8cSSatish Balay   int        bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
761a30f8f8cSSatish Balay 
762a30f8f8cSSatish Balay   PetscFunctionBegin;
763a30f8f8cSSatish Balay   for (i=0; i<m; i++) {
764a30f8f8cSSatish Balay     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
765a30f8f8cSSatish Balay     if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
766a30f8f8cSSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
767a30f8f8cSSatish Balay       row = idxm[i] - bsrstart;
768a30f8f8cSSatish Balay       for (j=0; j<n; j++) {
769a30f8f8cSSatish Balay         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
770a30f8f8cSSatish Balay         if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
771a30f8f8cSSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
772a30f8f8cSSatish Balay           col = idxn[j] - bscstart;
773c8407628SSatish Balay           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
774a30f8f8cSSatish Balay         } else {
775a30f8f8cSSatish Balay           if (!baij->colmap) {
776a30f8f8cSSatish Balay             ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr);
777a30f8f8cSSatish Balay           }
778a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE)
779a30f8f8cSSatish Balay           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
780a30f8f8cSSatish Balay           data --;
781a30f8f8cSSatish Balay #else
782a30f8f8cSSatish Balay           data = baij->colmap[idxn[j]/bs]-1;
783a30f8f8cSSatish Balay #endif
784a30f8f8cSSatish Balay           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
785a30f8f8cSSatish Balay           else {
786a30f8f8cSSatish Balay             col  = data + idxn[j]%bs;
787c8407628SSatish Balay             ierr = MatGetValues_SeqSBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
788a30f8f8cSSatish Balay           }
789a30f8f8cSSatish Balay         }
790a30f8f8cSSatish Balay       }
791a30f8f8cSSatish Balay     } else {
792a30f8f8cSSatish Balay       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
793a30f8f8cSSatish Balay     }
794a30f8f8cSSatish Balay   }
795a30f8f8cSSatish Balay  PetscFunctionReturn(0);
796a30f8f8cSSatish Balay }
797a30f8f8cSSatish Balay 
798a30f8f8cSSatish Balay #undef __FUNC__
799a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatNorm_MPISBAIJ"
800a30f8f8cSSatish Balay int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
801a30f8f8cSSatish Balay {
802a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
803bc0ba53dSHong Zhang   /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */
804bc0ba53dSHong Zhang   /* Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ*)baij->B->data; */
805a30f8f8cSSatish Balay   int        ierr;
806a30f8f8cSSatish Balay   PetscReal  sum[2],*lnorm2;
807a30f8f8cSSatish Balay 
808a30f8f8cSSatish Balay   PetscFunctionBegin;
809a30f8f8cSSatish Balay   if (baij->size == 1) {
810a30f8f8cSSatish Balay     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
811a30f8f8cSSatish Balay   } else {
812a30f8f8cSSatish Balay     if (type == NORM_FROBENIUS) {
813ee237329SHong Zhang       lnorm2 = (double*)PetscMalloc(2*sizeof(double));CHKPTRQ(lnorm2);
814a30f8f8cSSatish Balay       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
815a30f8f8cSSatish Balay       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
816a30f8f8cSSatish Balay       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
817a30f8f8cSSatish Balay       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
818a30f8f8cSSatish Balay       /*
819a30f8f8cSSatish Balay       ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
820a30f8f8cSSatish Balay       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]);
821a30f8f8cSSatish Balay       */
822a30f8f8cSSatish Balay       ierr = MPI_Allreduce(lnorm2,&sum,2,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
823a30f8f8cSSatish Balay       /*
824a30f8f8cSSatish Balay       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]);
825a30f8f8cSSatish Balay       PetscSynchronizedFlush(PETSC_COMM_WORLD); */
826a30f8f8cSSatish Balay 
827a30f8f8cSSatish Balay       *norm = sqrt(sum[0] + 2*sum[1]);
828a30f8f8cSSatish Balay       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
829a30f8f8cSSatish Balay     } else {
830a30f8f8cSSatish Balay       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
831a30f8f8cSSatish Balay     }
832a30f8f8cSSatish Balay   }
833a30f8f8cSSatish Balay   PetscFunctionReturn(0);
834a30f8f8cSSatish Balay }
835a30f8f8cSSatish Balay 
836a30f8f8cSSatish Balay /*
837a30f8f8cSSatish Balay   Creates the hash table, and sets the table
838a30f8f8cSSatish Balay   This table is created only once.
839a30f8f8cSSatish Balay   If new entried need to be added to the matrix
840a30f8f8cSSatish Balay   then the hash table has to be destroyed and
841a30f8f8cSSatish Balay   recreated.
842a30f8f8cSSatish Balay */
843a30f8f8cSSatish Balay #undef __FUNC__
844a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatCreateHashTable_MPISBAIJ_Private"
845a30f8f8cSSatish Balay int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor)
846a30f8f8cSSatish Balay {
847a30f8f8cSSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
848a30f8f8cSSatish Balay   Mat         A = baij->A,B=baij->B;
849a30f8f8cSSatish Balay   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
850a30f8f8cSSatish Balay   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
851a30f8f8cSSatish Balay   int         size,bs2=baij->bs2,rstart=baij->rstart,ierr;
852a30f8f8cSSatish Balay   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
853a30f8f8cSSatish Balay   int         *HT,key;
854a30f8f8cSSatish Balay   MatScalar   **HD;
855a30f8f8cSSatish Balay   PetscReal   tmp;
856a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
857a30f8f8cSSatish Balay   int         ct=0,max=0;
858a30f8f8cSSatish Balay #endif
859a30f8f8cSSatish Balay 
860a30f8f8cSSatish Balay   PetscFunctionBegin;
861a30f8f8cSSatish Balay   baij->ht_size=(int)(factor*nz);
862a30f8f8cSSatish Balay   size = baij->ht_size;
863a30f8f8cSSatish Balay 
864a30f8f8cSSatish Balay   if (baij->ht) {
865a30f8f8cSSatish Balay     PetscFunctionReturn(0);
866a30f8f8cSSatish Balay   }
867a30f8f8cSSatish Balay 
868a30f8f8cSSatish Balay   /* Allocate Memory for Hash Table */
869a30f8f8cSSatish Balay   baij->hd = (MatScalar**)PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1);CHKPTRQ(baij->hd);
870a30f8f8cSSatish Balay   baij->ht = (int*)(baij->hd + size);
871a30f8f8cSSatish Balay   HD = baij->hd;
872a30f8f8cSSatish Balay   HT = baij->ht;
873a30f8f8cSSatish Balay 
874a30f8f8cSSatish Balay 
875a30f8f8cSSatish Balay   ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr);
876a30f8f8cSSatish Balay 
877a30f8f8cSSatish Balay 
878a30f8f8cSSatish Balay   /* Loop Over A */
879a30f8f8cSSatish Balay   for (i=0; i<a->mbs; i++) {
880a30f8f8cSSatish Balay     for (j=ai[i]; j<ai[i+1]; j++) {
881a30f8f8cSSatish Balay       row = i+rstart;
882a30f8f8cSSatish Balay       col = aj[j]+cstart;
883a30f8f8cSSatish Balay 
884a30f8f8cSSatish Balay       key = row*Nbs + col + 1;
885a30f8f8cSSatish Balay       h1  = HASH(size,key,tmp);
886a30f8f8cSSatish Balay       for (k=0; k<size; k++){
887a30f8f8cSSatish Balay         if (HT[(h1+k)%size] == 0.0) {
888a30f8f8cSSatish Balay           HT[(h1+k)%size] = key;
889a30f8f8cSSatish Balay           HD[(h1+k)%size] = a->a + j*bs2;
890a30f8f8cSSatish Balay           break;
891a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
892a30f8f8cSSatish Balay         } else {
893a30f8f8cSSatish Balay           ct++;
894a30f8f8cSSatish Balay #endif
895a30f8f8cSSatish Balay         }
896a30f8f8cSSatish Balay       }
897a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
898a30f8f8cSSatish Balay       if (k> max) max = k;
899a30f8f8cSSatish Balay #endif
900a30f8f8cSSatish Balay     }
901a30f8f8cSSatish Balay   }
902a30f8f8cSSatish Balay   /* Loop Over B */
903a30f8f8cSSatish Balay   for (i=0; i<b->mbs; i++) {
904a30f8f8cSSatish Balay     for (j=bi[i]; j<bi[i+1]; j++) {
905a30f8f8cSSatish Balay       row = i+rstart;
906a30f8f8cSSatish Balay       col = garray[bj[j]];
907a30f8f8cSSatish Balay       key = row*Nbs + col + 1;
908a30f8f8cSSatish Balay       h1  = HASH(size,key,tmp);
909a30f8f8cSSatish Balay       for (k=0; k<size; k++){
910a30f8f8cSSatish Balay         if (HT[(h1+k)%size] == 0.0) {
911a30f8f8cSSatish Balay           HT[(h1+k)%size] = key;
912a30f8f8cSSatish Balay           HD[(h1+k)%size] = b->a + j*bs2;
913a30f8f8cSSatish Balay           break;
914a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
915a30f8f8cSSatish Balay         } else {
916a30f8f8cSSatish Balay           ct++;
917a30f8f8cSSatish Balay #endif
918a30f8f8cSSatish Balay         }
919a30f8f8cSSatish Balay       }
920a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
921a30f8f8cSSatish Balay       if (k> max) max = k;
922a30f8f8cSSatish Balay #endif
923a30f8f8cSSatish Balay     }
924a30f8f8cSSatish Balay   }
925a30f8f8cSSatish Balay 
926a30f8f8cSSatish Balay   /* Print Summary */
927a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
928a30f8f8cSSatish Balay   for (i=0,j=0; i<size; i++) {
929a30f8f8cSSatish Balay     if (HT[i]) {j++;}
930a30f8f8cSSatish Balay   }
931a30f8f8cSSatish Balay   PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max);
932a30f8f8cSSatish Balay #endif
933a30f8f8cSSatish Balay   PetscFunctionReturn(0);
934a30f8f8cSSatish Balay }
935a30f8f8cSSatish Balay 
936a30f8f8cSSatish Balay #undef __FUNC__
937a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatAssemblyBegin_MPISBAIJ"
938a30f8f8cSSatish Balay int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
939a30f8f8cSSatish Balay {
940a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
941a30f8f8cSSatish Balay   int         ierr,nstash,reallocs;
942a30f8f8cSSatish Balay   InsertMode  addv;
943a30f8f8cSSatish Balay 
944a30f8f8cSSatish Balay   PetscFunctionBegin;
945a30f8f8cSSatish Balay   if (baij->donotstash) {
946a30f8f8cSSatish Balay     PetscFunctionReturn(0);
947a30f8f8cSSatish Balay   }
948a30f8f8cSSatish Balay 
949a30f8f8cSSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
950a30f8f8cSSatish Balay   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
951a30f8f8cSSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
952a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
953a30f8f8cSSatish Balay   }
954a30f8f8cSSatish Balay   mat->insertmode = addv; /* in case this processor had no cache */
955a30f8f8cSSatish Balay 
956a30f8f8cSSatish Balay   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
957a30f8f8cSSatish Balay   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
958a30f8f8cSSatish Balay   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
959a30f8f8cSSatish Balay   PLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
960a30f8f8cSSatish Balay   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
961a30f8f8cSSatish Balay   PLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
962a30f8f8cSSatish Balay   PetscFunctionReturn(0);
963a30f8f8cSSatish Balay }
964a30f8f8cSSatish Balay 
965a30f8f8cSSatish Balay #undef __FUNC__
966a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatAssemblyEnd_MPISBAIJ"
967a30f8f8cSSatish Balay int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
968a30f8f8cSSatish Balay {
969a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data;
970a30f8f8cSSatish Balay   Mat_SeqSBAIJ  *a=(Mat_SeqSBAIJ*)baij->A->data;
971a30f8f8cSSatish Balay   Mat_SeqBAIJ  *b=(Mat_SeqBAIJ*)baij->B->data;
972a30f8f8cSSatish Balay   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
973a30f8f8cSSatish Balay   int         *row,*col,other_disassembled;
974a30f8f8cSSatish Balay   PetscTruth  r1,r2,r3;
975a30f8f8cSSatish Balay   MatScalar   *val;
976a30f8f8cSSatish Balay   InsertMode  addv = mat->insertmode;
977a30f8f8cSSatish Balay   int         rank;
978a30f8f8cSSatish Balay 
979a30f8f8cSSatish Balay   PetscFunctionBegin;
980a30f8f8cSSatish Balay   /* remove 2 line below later */
981a30f8f8cSSatish Balay   ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr);
982a30f8f8cSSatish Balay 
983a30f8f8cSSatish Balay   if (!baij->donotstash) {
984a30f8f8cSSatish Balay     while (1) {
985a30f8f8cSSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
986a30f8f8cSSatish Balay       /*
987a30f8f8cSSatish Balay       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg);
988a30f8f8cSSatish Balay       PetscSynchronizedFlush(PETSC_COMM_WORLD);
989a30f8f8cSSatish Balay       */
990a30f8f8cSSatish Balay       if (!flg) break;
991a30f8f8cSSatish Balay 
992a30f8f8cSSatish Balay       for (i=0; i<n;) {
993a30f8f8cSSatish Balay         /* Now identify the consecutive vals belonging to the same row */
994a30f8f8cSSatish Balay         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
995a30f8f8cSSatish Balay         if (j < n) ncols = j-i;
996a30f8f8cSSatish Balay         else       ncols = n-i;
997a30f8f8cSSatish Balay         /* Now assemble all these values with a single function call */
998a30f8f8cSSatish Balay         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
999a30f8f8cSSatish Balay         i = j;
1000a30f8f8cSSatish Balay       }
1001a30f8f8cSSatish Balay     }
1002a30f8f8cSSatish Balay     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
1003a30f8f8cSSatish Balay     /* Now process the block-stash. Since the values are stashed column-oriented,
1004a30f8f8cSSatish Balay        set the roworiented flag to column oriented, and after MatSetValues()
1005a30f8f8cSSatish Balay        restore the original flags */
1006a30f8f8cSSatish Balay     r1 = baij->roworiented;
1007a30f8f8cSSatish Balay     r2 = a->roworiented;
1008a30f8f8cSSatish Balay     r3 = b->roworiented;
1009a30f8f8cSSatish Balay     baij->roworiented = PETSC_FALSE;
1010a30f8f8cSSatish Balay     a->roworiented    = PETSC_FALSE;
1011a30f8f8cSSatish Balay     b->roworiented    = PETSC_FALSE;
1012a30f8f8cSSatish Balay     while (1) {
1013a30f8f8cSSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1014a30f8f8cSSatish Balay       if (!flg) break;
1015a30f8f8cSSatish Balay 
1016a30f8f8cSSatish Balay       for (i=0; i<n;) {
1017a30f8f8cSSatish Balay         /* Now identify the consecutive vals belonging to the same row */
1018a30f8f8cSSatish Balay         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1019a30f8f8cSSatish Balay         if (j < n) ncols = j-i;
1020a30f8f8cSSatish Balay         else       ncols = n-i;
1021a30f8f8cSSatish Balay         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
1022a30f8f8cSSatish Balay         i = j;
1023a30f8f8cSSatish Balay       }
1024a30f8f8cSSatish Balay     }
1025a30f8f8cSSatish Balay     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
1026a30f8f8cSSatish Balay     baij->roworiented = r1;
1027a30f8f8cSSatish Balay     a->roworiented    = r2;
1028a30f8f8cSSatish Balay     b->roworiented    = r3;
1029a30f8f8cSSatish Balay   }
1030a30f8f8cSSatish Balay 
1031a30f8f8cSSatish Balay   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
1032a30f8f8cSSatish Balay   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
1033a30f8f8cSSatish Balay 
1034a30f8f8cSSatish Balay   /* determine if any processor has disassembled, if so we must
1035a30f8f8cSSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
1036a30f8f8cSSatish Balay   /*
1037a30f8f8cSSatish Balay      if nonzero structure of submatrix B cannot change then we know that
1038a30f8f8cSSatish Balay      no processor disassembled thus we can skip this stuff
1039a30f8f8cSSatish Balay   */
1040a30f8f8cSSatish Balay   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
1041a30f8f8cSSatish Balay     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
1042a30f8f8cSSatish Balay     if (mat->was_assembled && !other_disassembled) {
1043c8407628SSatish Balay       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
1044a30f8f8cSSatish Balay     }
1045a30f8f8cSSatish Balay   }
1046a30f8f8cSSatish Balay 
1047a30f8f8cSSatish Balay   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1048a30f8f8cSSatish Balay     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr);
1049a30f8f8cSSatish Balay   }
1050a30f8f8cSSatish Balay   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1051a30f8f8cSSatish Balay   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1052a30f8f8cSSatish Balay 
1053a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
1054a30f8f8cSSatish Balay   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1055a30f8f8cSSatish Balay     PLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((double)baij->ht_total_ct)/baij->ht_insert_ct);
1056a30f8f8cSSatish Balay     baij->ht_total_ct  = 0;
1057a30f8f8cSSatish Balay     baij->ht_insert_ct = 0;
1058a30f8f8cSSatish Balay   }
1059a30f8f8cSSatish Balay #endif
1060a30f8f8cSSatish Balay   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1061c8407628SSatish Balay     ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1062a30f8f8cSSatish Balay     mat->ops->setvalues        = MatSetValues_MPISBAIJ_HT;
1063a30f8f8cSSatish Balay     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT;
1064a30f8f8cSSatish Balay   }
1065a30f8f8cSSatish Balay 
1066a30f8f8cSSatish Balay   if (baij->rowvalues) {
1067a30f8f8cSSatish Balay     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1068a30f8f8cSSatish Balay     baij->rowvalues = 0;
1069a30f8f8cSSatish Balay   }
1070a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1071a30f8f8cSSatish Balay }
1072a30f8f8cSSatish Balay 
1073a30f8f8cSSatish Balay #undef __FUNC__
1074a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatView_MPISBAIJ_ASCIIorDraworSocket"
1075a30f8f8cSSatish Balay static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
1076a30f8f8cSSatish Balay {
1077a30f8f8cSSatish Balay   Mat_MPISBAIJ  *baij = (Mat_MPISBAIJ*)mat->data;
1078a30f8f8cSSatish Balay   int          ierr,format,bs = baij->bs,size = baij->size,rank = baij->rank;
1079a30f8f8cSSatish Balay   PetscTruth   isascii,isdraw;
1080*65d70643SHong Zhang   Viewer       sviewer;
1081a30f8f8cSSatish Balay 
1082a30f8f8cSSatish Balay   PetscFunctionBegin;
1083a30f8f8cSSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
1084a30f8f8cSSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
1085a30f8f8cSSatish Balay   if (isascii) {
1086a30f8f8cSSatish Balay     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1087a30f8f8cSSatish Balay     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
1088a30f8f8cSSatish Balay       MatInfo info;
1089a30f8f8cSSatish Balay       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
1090a30f8f8cSSatish Balay       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1091a30f8f8cSSatish Balay       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
1092a30f8f8cSSatish Balay               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
1093a30f8f8cSSatish Balay               baij->bs,(int)info.memory);CHKERRQ(ierr);
1094a30f8f8cSSatish Balay       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1095a30f8f8cSSatish Balay       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1096a30f8f8cSSatish Balay       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1097a30f8f8cSSatish Balay       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1098a30f8f8cSSatish Balay       ierr = ViewerFlush(viewer);CHKERRQ(ierr);
1099a30f8f8cSSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1100a30f8f8cSSatish Balay       PetscFunctionReturn(0);
1101a30f8f8cSSatish Balay     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
1102a30f8f8cSSatish Balay       ierr = ViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
1103a30f8f8cSSatish Balay       PetscFunctionReturn(0);
1104a30f8f8cSSatish Balay     }
1105a30f8f8cSSatish Balay   }
1106a30f8f8cSSatish Balay 
1107a30f8f8cSSatish Balay   if (isdraw) {
1108a30f8f8cSSatish Balay     Draw       draw;
1109a30f8f8cSSatish Balay     PetscTruth isnull;
1110a30f8f8cSSatish Balay     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1111a30f8f8cSSatish Balay     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1112a30f8f8cSSatish Balay   }
1113a30f8f8cSSatish Balay 
1114a30f8f8cSSatish Balay   if (size == 1) {
1115a30f8f8cSSatish Balay     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1116a30f8f8cSSatish Balay   } else {
1117a30f8f8cSSatish Balay     /* assemble the entire matrix onto first processor. */
1118a30f8f8cSSatish Balay     Mat         A;
1119*65d70643SHong Zhang     Mat_SeqSBAIJ *Aloc;
1120*65d70643SHong Zhang     Mat_SeqBAIJ *Bloc;
1121a30f8f8cSSatish Balay     int         M = baij->M,N = baij->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1122a30f8f8cSSatish Balay     MatScalar   *a;
1123a30f8f8cSSatish Balay 
1124a30f8f8cSSatish Balay     if (!rank) {
1125bc0ba53dSHong Zhang       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1126a30f8f8cSSatish Balay     } else {
1127f65c83cfSHong Zhang       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1128a30f8f8cSSatish Balay     }
1129a30f8f8cSSatish Balay     PLogObjectParent(mat,A);
1130a30f8f8cSSatish Balay 
1131a30f8f8cSSatish Balay     /* copy over the A part */
1132*65d70643SHong Zhang     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
1133a30f8f8cSSatish Balay     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
1134a30f8f8cSSatish Balay     rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
1135a30f8f8cSSatish Balay 
1136a30f8f8cSSatish Balay     for (i=0; i<mbs; i++) {
1137a30f8f8cSSatish Balay       rvals[0] = bs*(baij->rstart + i);
1138a30f8f8cSSatish Balay       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1139a30f8f8cSSatish Balay       for (j=ai[i]; j<ai[i+1]; j++) {
1140a30f8f8cSSatish Balay         col = (baij->cstart+aj[j])*bs;
1141a30f8f8cSSatish Balay         for (k=0; k<bs; k++) {
1142a30f8f8cSSatish Balay           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1143a30f8f8cSSatish Balay           col++; a += bs;
1144a30f8f8cSSatish Balay         }
1145a30f8f8cSSatish Balay       }
1146a30f8f8cSSatish Balay     }
1147a30f8f8cSSatish Balay     /* copy over the B part */
1148*65d70643SHong Zhang     Bloc = (Mat_SeqBAIJ*)baij->B->data;
1149*65d70643SHong Zhang     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
1150a30f8f8cSSatish Balay     for (i=0; i<mbs; i++) {
1151a30f8f8cSSatish Balay       rvals[0] = bs*(baij->rstart + i);
1152a30f8f8cSSatish Balay       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1153a30f8f8cSSatish Balay       for (j=ai[i]; j<ai[i+1]; j++) {
1154a30f8f8cSSatish Balay         col = baij->garray[aj[j]]*bs;
1155a30f8f8cSSatish Balay         for (k=0; k<bs; k++) {
1156a30f8f8cSSatish Balay           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1157a30f8f8cSSatish Balay           col++; a += bs;
1158a30f8f8cSSatish Balay         }
1159a30f8f8cSSatish Balay       }
1160a30f8f8cSSatish Balay     }
1161a30f8f8cSSatish Balay     ierr = PetscFree(rvals);CHKERRQ(ierr);
1162a30f8f8cSSatish Balay     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1163a30f8f8cSSatish Balay     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1164a30f8f8cSSatish Balay     /*
1165a30f8f8cSSatish Balay        Everyone has to call to draw the matrix since the graphics waits are
1166a30f8f8cSSatish Balay        synchronized across all processors that share the Draw object
1167a30f8f8cSSatish Balay     */
1168a30f8f8cSSatish Balay     ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1169*65d70643SHong Zhang     if (!rank) {
1170a30f8f8cSSatish Balay       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1171a30f8f8cSSatish Balay     }
1172*65d70643SHong Zhang     ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1173a30f8f8cSSatish Balay     ierr = MatDestroy(A);CHKERRQ(ierr);
1174a30f8f8cSSatish Balay   }
1175a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1176a30f8f8cSSatish Balay }
1177a30f8f8cSSatish Balay 
1178a30f8f8cSSatish Balay #undef __FUNC__
1179a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatView_MPISBAIJ"
1180a30f8f8cSSatish Balay int MatView_MPISBAIJ(Mat mat,Viewer viewer)
1181a30f8f8cSSatish Balay {
1182a30f8f8cSSatish Balay   int        ierr;
1183a30f8f8cSSatish Balay   PetscTruth isascii,isdraw,issocket,isbinary;
1184a30f8f8cSSatish Balay 
1185a30f8f8cSSatish Balay   PetscFunctionBegin;
1186a30f8f8cSSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
1187a30f8f8cSSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
1188a30f8f8cSSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr);
1189a30f8f8cSSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
1190a30f8f8cSSatish Balay   if (isascii || isdraw || issocket || isbinary) {
1191a30f8f8cSSatish Balay     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1192a30f8f8cSSatish Balay   } else {
1193a30f8f8cSSatish Balay     SETERRQ1(1,1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
1194a30f8f8cSSatish Balay   }
1195a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1196a30f8f8cSSatish Balay }
1197a30f8f8cSSatish Balay 
1198a30f8f8cSSatish Balay #undef __FUNC__
1199a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatDestroy_MPISBAIJ"
1200a30f8f8cSSatish Balay int MatDestroy_MPISBAIJ(Mat mat)
1201a30f8f8cSSatish Balay {
1202a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1203a30f8f8cSSatish Balay   int         ierr;
1204a30f8f8cSSatish Balay 
1205a30f8f8cSSatish Balay   PetscFunctionBegin;
1206a30f8f8cSSatish Balay 
1207a30f8f8cSSatish Balay   if (mat->mapping) {
1208a30f8f8cSSatish Balay     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
1209a30f8f8cSSatish Balay   }
1210a30f8f8cSSatish Balay   if (mat->bmapping) {
1211a30f8f8cSSatish Balay     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
1212a30f8f8cSSatish Balay   }
1213a30f8f8cSSatish Balay   if (mat->rmap) {
1214a30f8f8cSSatish Balay     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
1215a30f8f8cSSatish Balay   }
1216a30f8f8cSSatish Balay   if (mat->cmap) {
1217a30f8f8cSSatish Balay     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
1218a30f8f8cSSatish Balay   }
1219a30f8f8cSSatish Balay #if defined(PETSC_USE_LOG)
1220a30f8f8cSSatish Balay   PLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",baij->M,baij->N);
1221a30f8f8cSSatish Balay #endif
1222a30f8f8cSSatish Balay 
1223a30f8f8cSSatish Balay   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1224a30f8f8cSSatish Balay   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1225a30f8f8cSSatish Balay 
1226a30f8f8cSSatish Balay   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1227a30f8f8cSSatish Balay   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1228a30f8f8cSSatish Balay   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1229a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE)
1230a30f8f8cSSatish Balay   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1231a30f8f8cSSatish Balay #else
1232a30f8f8cSSatish Balay   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1233a30f8f8cSSatish Balay #endif
1234a30f8f8cSSatish Balay   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1235a30f8f8cSSatish Balay   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1236a30f8f8cSSatish Balay   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1237a30f8f8cSSatish Balay   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1238a30f8f8cSSatish Balay   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1239a30f8f8cSSatish Balay   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
1240a30f8f8cSSatish Balay #if defined(PETSC_USE_MAT_SINGLE)
1241a30f8f8cSSatish Balay   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
1242a30f8f8cSSatish Balay #endif
1243a30f8f8cSSatish Balay   ierr = PetscFree(baij);CHKERRQ(ierr);
1244a30f8f8cSSatish Balay   PLogObjectDestroy(mat);
1245a30f8f8cSSatish Balay   PetscHeaderDestroy(mat);
1246a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1247a30f8f8cSSatish Balay }
1248a30f8f8cSSatish Balay 
1249a30f8f8cSSatish Balay #undef __FUNC__
1250a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatMult_MPISBAIJ"
1251a30f8f8cSSatish Balay int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
1252a30f8f8cSSatish Balay {
1253a30f8f8cSSatish Balay   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1254a30f8f8cSSatish Balay   int         ierr,nt;
1255a30f8f8cSSatish Balay 
1256a30f8f8cSSatish Balay   PetscFunctionBegin;
1257a30f8f8cSSatish Balay   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1258a30f8f8cSSatish Balay   if (nt != a->n) {
1259a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
1260a30f8f8cSSatish Balay   }
1261a30f8f8cSSatish Balay   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1262a30f8f8cSSatish Balay   if (nt != a->m) {
1263a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy");
1264a30f8f8cSSatish Balay   }
1265*65d70643SHong Zhang 
1266*65d70643SHong Zhang   /* do diagonal part */
1267*65d70643SHong Zhang   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1268*65d70643SHong Zhang 
1269*65d70643SHong Zhang   /* do supperdiagonal part */
1270*65d70643SHong Zhang   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1271*65d70643SHong Zhang   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1272*65d70643SHong Zhang   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1273*65d70643SHong Zhang 
1274*65d70643SHong Zhang   /* do subdiagonal part */
1275*65d70643SHong Zhang   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1276*65d70643SHong Zhang   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1277*65d70643SHong Zhang   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1278*65d70643SHong Zhang #ifdef old
1279a30f8f8cSSatish Balay   /* do subdiagonal part */
1280a30f8f8cSSatish Balay   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1281a30f8f8cSSatish Balay   /* send it on its way */
1282a30f8f8cSSatish Balay   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1283a30f8f8cSSatish Balay   /* do local part */
1284a30f8f8cSSatish Balay   ierr = (*a->B->ops->mult)(a->B,xx,yy);CHKERRQ(ierr);
1285a30f8f8cSSatish Balay   ierr = (*a->A->ops->multadd)(a->A,xx,yy,yy);CHKERRQ(ierr);
1286a30f8f8cSSatish Balay   /* receive remote parts */
1287a30f8f8cSSatish Balay   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1288*65d70643SHong Zhang #endif
1289a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1290a30f8f8cSSatish Balay }
1291a30f8f8cSSatish Balay 
1292a30f8f8cSSatish Balay #undef __FUNC__
1293a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatMultAdd_MPISBAIJ"
1294a30f8f8cSSatish Balay int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1295a30f8f8cSSatish Balay {
1296a30f8f8cSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1297a30f8f8cSSatish Balay   int        ierr;
1298a30f8f8cSSatish Balay 
1299a30f8f8cSSatish Balay   PetscFunctionBegin;
1300a30f8f8cSSatish Balay   /* do subdiagonal part */
1301a30f8f8cSSatish Balay   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1302a30f8f8cSSatish Balay   /* send it on its way */
1303a30f8f8cSSatish Balay   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1304a30f8f8cSSatish Balay   /* do local part */
1305a30f8f8cSSatish Balay   ierr = (*a->B->ops->multadd)(a->B,xx,yy,zz);CHKERRQ(ierr);
1306a30f8f8cSSatish Balay   ierr = (*a->A->ops->multadd)(a->A,xx,zz,zz);CHKERRQ(ierr);
1307a30f8f8cSSatish Balay   /* receive remote parts */
1308a30f8f8cSSatish Balay   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1309a30f8f8cSSatish Balay 
1310a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1311a30f8f8cSSatish Balay }
1312a30f8f8cSSatish Balay 
1313a30f8f8cSSatish Balay #undef __FUNC__
1314a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatMultTranspose_MPISBAIJ"
1315a30f8f8cSSatish Balay int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
1316a30f8f8cSSatish Balay {
1317a30f8f8cSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1318a30f8f8cSSatish Balay   int         ierr;
1319a30f8f8cSSatish Balay 
1320a30f8f8cSSatish Balay   PetscFunctionBegin;
1321a30f8f8cSSatish Balay   /* do nondiagonal part */
1322a30f8f8cSSatish Balay   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1323a30f8f8cSSatish Balay   /* send it on its way */
1324a30f8f8cSSatish Balay   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1325a30f8f8cSSatish Balay   /* do local part */
1326a30f8f8cSSatish Balay   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1327a30f8f8cSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
1328a30f8f8cSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
1329a30f8f8cSSatish Balay   /* but is not perhaps always true. */
1330a30f8f8cSSatish Balay   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1331a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1332a30f8f8cSSatish Balay }
1333a30f8f8cSSatish Balay 
1334a30f8f8cSSatish Balay #undef __FUNC__
1335a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatMultTransposeAdd_MPISBAIJ"
1336a30f8f8cSSatish Balay int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1337a30f8f8cSSatish Balay {
1338a30f8f8cSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1339a30f8f8cSSatish Balay   int         ierr;
1340a30f8f8cSSatish Balay 
1341a30f8f8cSSatish Balay   PetscFunctionBegin;
1342a30f8f8cSSatish Balay   /* do nondiagonal part */
1343a30f8f8cSSatish Balay   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1344a30f8f8cSSatish Balay   /* send it on its way */
1345a30f8f8cSSatish Balay   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1346a30f8f8cSSatish Balay   /* do local part */
1347a30f8f8cSSatish Balay   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1348a30f8f8cSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
1349a30f8f8cSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
1350a30f8f8cSSatish Balay   /* but is not perhaps always true. */
1351a30f8f8cSSatish Balay   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1352a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1353a30f8f8cSSatish Balay }
1354a30f8f8cSSatish Balay 
1355a30f8f8cSSatish Balay /*
1356a30f8f8cSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
1357a30f8f8cSSatish Balay    diagonal block
1358a30f8f8cSSatish Balay */
1359a30f8f8cSSatish Balay #undef __FUNC__
1360a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonal_MPISBAIJ"
1361a30f8f8cSSatish Balay int MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1362a30f8f8cSSatish Balay {
1363a30f8f8cSSatish Balay   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1364a30f8f8cSSatish Balay   int         ierr;
1365a30f8f8cSSatish Balay 
1366a30f8f8cSSatish Balay   PetscFunctionBegin;
1367a30f8f8cSSatish Balay   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); */
1368a30f8f8cSSatish Balay   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1369a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1370a30f8f8cSSatish Balay }
1371a30f8f8cSSatish Balay 
1372a30f8f8cSSatish Balay #undef __FUNC__
1373a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatScale_MPISBAIJ"
1374a30f8f8cSSatish Balay int MatScale_MPISBAIJ(Scalar *aa,Mat A)
1375a30f8f8cSSatish Balay {
1376a30f8f8cSSatish Balay   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1377a30f8f8cSSatish Balay   int         ierr;
1378a30f8f8cSSatish Balay 
1379a30f8f8cSSatish Balay   PetscFunctionBegin;
1380a30f8f8cSSatish Balay   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1381a30f8f8cSSatish Balay   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1382a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1383a30f8f8cSSatish Balay }
1384a30f8f8cSSatish Balay 
1385a30f8f8cSSatish Balay #undef __FUNC__
1386a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatGetSize_MPISBAIJ"
1387a30f8f8cSSatish Balay int MatGetSize_MPISBAIJ(Mat matin,int *m,int *n)
1388a30f8f8cSSatish Balay {
1389a30f8f8cSSatish Balay   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;
1390a30f8f8cSSatish Balay 
1391a30f8f8cSSatish Balay   PetscFunctionBegin;
1392a30f8f8cSSatish Balay   if (m) *m = mat->M;
1393a30f8f8cSSatish Balay   if (n) *n = mat->N;
1394a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1395a30f8f8cSSatish Balay }
1396a30f8f8cSSatish Balay 
1397a30f8f8cSSatish Balay #undef __FUNC__
1398a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatGetLocalSize_MPISBAIJ"
1399a30f8f8cSSatish Balay int MatGetLocalSize_MPISBAIJ(Mat matin,int *m,int *n)
1400a30f8f8cSSatish Balay {
1401a30f8f8cSSatish Balay   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;
1402a30f8f8cSSatish Balay 
1403a30f8f8cSSatish Balay   PetscFunctionBegin;
1404a30f8f8cSSatish Balay   *m = mat->m; *n = mat->n;
1405a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1406a30f8f8cSSatish Balay }
1407a30f8f8cSSatish Balay 
1408a30f8f8cSSatish Balay #undef __FUNC__
1409a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatGetOwnershipRange_MPISBAIJ"
1410a30f8f8cSSatish Balay int MatGetOwnershipRange_MPISBAIJ(Mat matin,int *m,int *n)
1411a30f8f8cSSatish Balay {
1412a30f8f8cSSatish Balay   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;
1413a30f8f8cSSatish Balay 
1414a30f8f8cSSatish Balay   PetscFunctionBegin;
1415a30f8f8cSSatish Balay   if (m) *m = mat->rstart*mat->bs;
1416a30f8f8cSSatish Balay   if (n) *n = mat->rend*mat->bs;
1417a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1418a30f8f8cSSatish Balay }
1419a30f8f8cSSatish Balay 
1420a30f8f8cSSatish Balay #undef __FUNC__
1421a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatGetRow_MPISBAIJ"
1422a30f8f8cSSatish Balay int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1423a30f8f8cSSatish Balay {
1424a30f8f8cSSatish Balay   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;
1425a30f8f8cSSatish Balay   Scalar     *vworkA,*vworkB,**pvA,**pvB,*v_p;
1426a30f8f8cSSatish Balay   int        bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1427a30f8f8cSSatish Balay   int        nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1428a30f8f8cSSatish Balay   int        *cmap,*idx_p,cstart = mat->cstart;
1429a30f8f8cSSatish Balay 
1430a30f8f8cSSatish Balay   PetscFunctionBegin;
1431a30f8f8cSSatish Balay   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1432a30f8f8cSSatish Balay   mat->getrowactive = PETSC_TRUE;
1433a30f8f8cSSatish Balay 
1434a30f8f8cSSatish Balay   if (!mat->rowvalues && (idx || v)) {
1435a30f8f8cSSatish Balay     /*
1436a30f8f8cSSatish Balay         allocate enough space to hold information from the longest row.
1437a30f8f8cSSatish Balay     */
1438a30f8f8cSSatish Balay     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1439a30f8f8cSSatish Balay     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1440a30f8f8cSSatish Balay     int     max = 1,mbs = mat->mbs,tmp;
1441a30f8f8cSSatish Balay     for (i=0; i<mbs; i++) {
1442a30f8f8cSSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1443a30f8f8cSSatish Balay       if (max < tmp) { max = tmp; }
1444a30f8f8cSSatish Balay     }
1445a30f8f8cSSatish Balay     mat->rowvalues = (Scalar*)PetscMalloc(max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1446a30f8f8cSSatish Balay     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1447a30f8f8cSSatish Balay   }
1448a30f8f8cSSatish Balay 
1449a30f8f8cSSatish Balay   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows")
1450a30f8f8cSSatish Balay   lrow = row - brstart;  /* local row index */
1451a30f8f8cSSatish Balay 
1452a30f8f8cSSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1453a30f8f8cSSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
1454a30f8f8cSSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1455a30f8f8cSSatish Balay   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1456a30f8f8cSSatish Balay   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1457a30f8f8cSSatish Balay   nztot = nzA + nzB;
1458a30f8f8cSSatish Balay 
1459a30f8f8cSSatish Balay   cmap  = mat->garray;
1460a30f8f8cSSatish Balay   if (v  || idx) {
1461a30f8f8cSSatish Balay     if (nztot) {
1462a30f8f8cSSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
1463a30f8f8cSSatish Balay       int imark = -1;
1464a30f8f8cSSatish Balay       if (v) {
1465a30f8f8cSSatish Balay         *v = v_p = mat->rowvalues;
1466a30f8f8cSSatish Balay         for (i=0; i<nzB; i++) {
1467a30f8f8cSSatish Balay           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1468a30f8f8cSSatish Balay           else break;
1469a30f8f8cSSatish Balay         }
1470a30f8f8cSSatish Balay         imark = i;
1471a30f8f8cSSatish Balay         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1472a30f8f8cSSatish Balay         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1473a30f8f8cSSatish Balay       }
1474a30f8f8cSSatish Balay       if (idx) {
1475a30f8f8cSSatish Balay         *idx = idx_p = mat->rowindices;
1476a30f8f8cSSatish Balay         if (imark > -1) {
1477a30f8f8cSSatish Balay           for (i=0; i<imark; i++) {
1478a30f8f8cSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1479a30f8f8cSSatish Balay           }
1480a30f8f8cSSatish Balay         } else {
1481a30f8f8cSSatish Balay           for (i=0; i<nzB; i++) {
1482a30f8f8cSSatish Balay             if (cmap[cworkB[i]/bs] < cstart)
1483a30f8f8cSSatish Balay               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1484a30f8f8cSSatish Balay             else break;
1485a30f8f8cSSatish Balay           }
1486a30f8f8cSSatish Balay           imark = i;
1487a30f8f8cSSatish Balay         }
1488a30f8f8cSSatish Balay         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1489a30f8f8cSSatish Balay         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1490a30f8f8cSSatish Balay       }
1491a30f8f8cSSatish Balay     } else {
1492a30f8f8cSSatish Balay       if (idx) *idx = 0;
1493a30f8f8cSSatish Balay       if (v)   *v   = 0;
1494a30f8f8cSSatish Balay     }
1495a30f8f8cSSatish Balay   }
1496a30f8f8cSSatish Balay   *nz = nztot;
1497a30f8f8cSSatish Balay   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1498a30f8f8cSSatish Balay   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1499a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1500a30f8f8cSSatish Balay }
1501a30f8f8cSSatish Balay 
1502a30f8f8cSSatish Balay #undef __FUNC__
1503a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatRestoreRow_MPISBAIJ"
1504a30f8f8cSSatish Balay int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1505a30f8f8cSSatish Balay {
1506a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1507a30f8f8cSSatish Balay 
1508a30f8f8cSSatish Balay   PetscFunctionBegin;
1509a30f8f8cSSatish Balay   if (baij->getrowactive == PETSC_FALSE) {
1510a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1511a30f8f8cSSatish Balay   }
1512a30f8f8cSSatish Balay   baij->getrowactive = PETSC_FALSE;
1513a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1514a30f8f8cSSatish Balay }
1515a30f8f8cSSatish Balay 
1516a30f8f8cSSatish Balay #undef __FUNC__
1517a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatGetBlockSize_MPISBAIJ"
1518a30f8f8cSSatish Balay int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs)
1519a30f8f8cSSatish Balay {
1520a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1521a30f8f8cSSatish Balay 
1522a30f8f8cSSatish Balay   PetscFunctionBegin;
1523a30f8f8cSSatish Balay   *bs = baij->bs;
1524a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1525a30f8f8cSSatish Balay }
1526a30f8f8cSSatish Balay 
1527a30f8f8cSSatish Balay #undef __FUNC__
1528a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatZeroEntries_MPISBAIJ"
1529a30f8f8cSSatish Balay int MatZeroEntries_MPISBAIJ(Mat A)
1530a30f8f8cSSatish Balay {
1531a30f8f8cSSatish Balay   Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data;
1532a30f8f8cSSatish Balay   int         ierr;
1533a30f8f8cSSatish Balay 
1534a30f8f8cSSatish Balay   PetscFunctionBegin;
1535a30f8f8cSSatish Balay   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1536a30f8f8cSSatish Balay   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1537a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1538a30f8f8cSSatish Balay }
1539a30f8f8cSSatish Balay 
1540a30f8f8cSSatish Balay #undef __FUNC__
1541a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatGetInfo_MPISBAIJ"
1542a30f8f8cSSatish Balay int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1543a30f8f8cSSatish Balay {
1544a30f8f8cSSatish Balay   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data;
1545a30f8f8cSSatish Balay   Mat         A = a->A,B = a->B;
1546a30f8f8cSSatish Balay   int         ierr;
1547a30f8f8cSSatish Balay   PetscReal   isend[5],irecv[5];
1548a30f8f8cSSatish Balay 
1549a30f8f8cSSatish Balay   PetscFunctionBegin;
1550a30f8f8cSSatish Balay   info->block_size     = (double)a->bs;
1551a30f8f8cSSatish Balay   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1552a30f8f8cSSatish Balay   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1553a30f8f8cSSatish Balay   isend[3] = info->memory;  isend[4] = info->mallocs;
1554a30f8f8cSSatish Balay   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1555a30f8f8cSSatish Balay   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1556a30f8f8cSSatish Balay   isend[3] += info->memory;  isend[4] += info->mallocs;
1557a30f8f8cSSatish Balay   if (flag == MAT_LOCAL) {
1558a30f8f8cSSatish Balay     info->nz_used      = isend[0];
1559a30f8f8cSSatish Balay     info->nz_allocated = isend[1];
1560a30f8f8cSSatish Balay     info->nz_unneeded  = isend[2];
1561a30f8f8cSSatish Balay     info->memory       = isend[3];
1562a30f8f8cSSatish Balay     info->mallocs      = isend[4];
1563a30f8f8cSSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
1564a30f8f8cSSatish Balay     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1565a30f8f8cSSatish Balay     info->nz_used      = irecv[0];
1566a30f8f8cSSatish Balay     info->nz_allocated = irecv[1];
1567a30f8f8cSSatish Balay     info->nz_unneeded  = irecv[2];
1568a30f8f8cSSatish Balay     info->memory       = irecv[3];
1569a30f8f8cSSatish Balay     info->mallocs      = irecv[4];
1570a30f8f8cSSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
1571a30f8f8cSSatish Balay     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1572a30f8f8cSSatish Balay     info->nz_used      = irecv[0];
1573a30f8f8cSSatish Balay     info->nz_allocated = irecv[1];
1574a30f8f8cSSatish Balay     info->nz_unneeded  = irecv[2];
1575a30f8f8cSSatish Balay     info->memory       = irecv[3];
1576a30f8f8cSSatish Balay     info->mallocs      = irecv[4];
1577a30f8f8cSSatish Balay   } else {
1578a30f8f8cSSatish Balay     SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag);
1579a30f8f8cSSatish Balay   }
1580a30f8f8cSSatish Balay   info->rows_global       = (double)a->M;
1581a30f8f8cSSatish Balay   info->columns_global    = (double)a->N;
1582a30f8f8cSSatish Balay   info->rows_local        = (double)a->m;
1583a30f8f8cSSatish Balay   info->columns_local     = (double)a->N;
1584a30f8f8cSSatish Balay   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1585a30f8f8cSSatish Balay   info->fill_ratio_needed = 0;
1586a30f8f8cSSatish Balay   info->factor_mallocs    = 0;
1587a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1588a30f8f8cSSatish Balay }
1589a30f8f8cSSatish Balay 
1590a30f8f8cSSatish Balay #undef __FUNC__
1591a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatSetOption_MPISBAIJ"
1592a30f8f8cSSatish Balay int MatSetOption_MPISBAIJ(Mat A,MatOption op)
1593a30f8f8cSSatish Balay {
1594a30f8f8cSSatish Balay   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1595a30f8f8cSSatish Balay   int         ierr;
1596a30f8f8cSSatish Balay 
1597a30f8f8cSSatish Balay   PetscFunctionBegin;
1598a30f8f8cSSatish Balay   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1599a30f8f8cSSatish Balay       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1600a30f8f8cSSatish Balay       op == MAT_COLUMNS_UNSORTED ||
1601a30f8f8cSSatish Balay       op == MAT_COLUMNS_SORTED ||
1602a30f8f8cSSatish Balay       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1603a30f8f8cSSatish Balay       op == MAT_KEEP_ZEROED_ROWS ||
1604a30f8f8cSSatish Balay       op == MAT_NEW_NONZERO_LOCATION_ERR) {
1605a30f8f8cSSatish Balay         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1606a30f8f8cSSatish Balay         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1607a30f8f8cSSatish Balay   } else if (op == MAT_ROW_ORIENTED) {
1608a30f8f8cSSatish Balay         a->roworiented = PETSC_TRUE;
1609a30f8f8cSSatish Balay         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1610a30f8f8cSSatish Balay         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1611a30f8f8cSSatish Balay   } else if (op == MAT_ROWS_SORTED ||
1612a30f8f8cSSatish Balay              op == MAT_ROWS_UNSORTED ||
1613a30f8f8cSSatish Balay              op == MAT_SYMMETRIC ||
1614a30f8f8cSSatish Balay              op == MAT_STRUCTURALLY_SYMMETRIC ||
1615a30f8f8cSSatish Balay              op == MAT_YES_NEW_DIAGONALS ||
1616a30f8f8cSSatish Balay              op == MAT_USE_HASH_TABLE) {
1617a30f8f8cSSatish Balay     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1618a30f8f8cSSatish Balay   } else if (op == MAT_COLUMN_ORIENTED) {
1619a30f8f8cSSatish Balay     a->roworiented = PETSC_FALSE;
1620a30f8f8cSSatish Balay     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1621a30f8f8cSSatish Balay     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1622a30f8f8cSSatish Balay   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1623a30f8f8cSSatish Balay     a->donotstash = PETSC_TRUE;
1624a30f8f8cSSatish Balay   } else if (op == MAT_NO_NEW_DIAGONALS) {
1625a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1626a30f8f8cSSatish Balay   } else if (op == MAT_USE_HASH_TABLE) {
1627a30f8f8cSSatish Balay     a->ht_flag = PETSC_TRUE;
1628a30f8f8cSSatish Balay   } else {
1629a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1630a30f8f8cSSatish Balay   }
1631a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1632a30f8f8cSSatish Balay }
1633a30f8f8cSSatish Balay 
1634a30f8f8cSSatish Balay #undef __FUNC__
1635a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatTranspose_MPISBAIJ("
1636a30f8f8cSSatish Balay int MatTranspose_MPISBAIJ(Mat A,Mat *matout)
1637a30f8f8cSSatish Balay {
1638a30f8f8cSSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data;
1639a30f8f8cSSatish Balay   Mat_SeqBAIJ *Aloc;
1640a30f8f8cSSatish Balay   Mat         B;
1641a30f8f8cSSatish Balay   int         ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col;
1642a30f8f8cSSatish Balay   int         bs=baij->bs,mbs=baij->mbs;
1643a30f8f8cSSatish Balay   MatScalar   *a;
1644a30f8f8cSSatish Balay 
1645a30f8f8cSSatish Balay   PetscFunctionBegin;
1646a30f8f8cSSatish Balay   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1647a30f8f8cSSatish Balay   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1648a30f8f8cSSatish Balay 
1649a30f8f8cSSatish Balay   /* copy over the A part */
1650a30f8f8cSSatish Balay   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1651a30f8f8cSSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1652a30f8f8cSSatish Balay   rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
1653a30f8f8cSSatish Balay 
1654a30f8f8cSSatish Balay   for (i=0; i<mbs; i++) {
1655a30f8f8cSSatish Balay     rvals[0] = bs*(baij->rstart + i);
1656a30f8f8cSSatish Balay     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1657a30f8f8cSSatish Balay     for (j=ai[i]; j<ai[i+1]; j++) {
1658a30f8f8cSSatish Balay       col = (baij->cstart+aj[j])*bs;
1659a30f8f8cSSatish Balay       for (k=0; k<bs; k++) {
1660a30f8f8cSSatish Balay         ierr = MatSetValues_MPISBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1661a30f8f8cSSatish Balay         col++; a += bs;
1662a30f8f8cSSatish Balay       }
1663a30f8f8cSSatish Balay     }
1664a30f8f8cSSatish Balay   }
1665a30f8f8cSSatish Balay   /* copy over the B part */
1666a30f8f8cSSatish Balay   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1667a30f8f8cSSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1668a30f8f8cSSatish Balay   for (i=0; i<mbs; i++) {
1669a30f8f8cSSatish Balay     rvals[0] = bs*(baij->rstart + i);
1670a30f8f8cSSatish Balay     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1671a30f8f8cSSatish Balay     for (j=ai[i]; j<ai[i+1]; j++) {
1672a30f8f8cSSatish Balay       col = baij->garray[aj[j]]*bs;
1673a30f8f8cSSatish Balay       for (k=0; k<bs; k++) {
1674a30f8f8cSSatish Balay         ierr = MatSetValues_MPISBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1675a30f8f8cSSatish Balay         col++; a += bs;
1676a30f8f8cSSatish Balay       }
1677a30f8f8cSSatish Balay     }
1678a30f8f8cSSatish Balay   }
1679a30f8f8cSSatish Balay   ierr = PetscFree(rvals);CHKERRQ(ierr);
1680a30f8f8cSSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1681a30f8f8cSSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1682a30f8f8cSSatish Balay 
1683a30f8f8cSSatish Balay   if (matout) {
1684a30f8f8cSSatish Balay     *matout = B;
1685a30f8f8cSSatish Balay   } else {
1686a30f8f8cSSatish Balay     PetscOps *Abops;
1687a30f8f8cSSatish Balay     MatOps   Aops;
1688a30f8f8cSSatish Balay 
1689a30f8f8cSSatish Balay     /* This isn't really an in-place transpose .... but free data structures from baij */
1690a30f8f8cSSatish Balay     ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1691a30f8f8cSSatish Balay     ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1692a30f8f8cSSatish Balay     ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1693a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE)
1694a30f8f8cSSatish Balay     if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1695a30f8f8cSSatish Balay #else
1696a30f8f8cSSatish Balay     if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1697a30f8f8cSSatish Balay #endif
1698a30f8f8cSSatish Balay     if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1699a30f8f8cSSatish Balay     if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1700a30f8f8cSSatish Balay     if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1701a30f8f8cSSatish Balay     ierr = PetscFree(baij);CHKERRQ(ierr);
1702a30f8f8cSSatish Balay 
1703a30f8f8cSSatish Balay     /*
1704a30f8f8cSSatish Balay        This is horrible, horrible code. We need to keep the
1705a30f8f8cSSatish Balay       A pointers for the bops and ops but copy everything
1706a30f8f8cSSatish Balay       else from C.
1707a30f8f8cSSatish Balay     */
1708a30f8f8cSSatish Balay     Abops   = A->bops;
1709a30f8f8cSSatish Balay     Aops    = A->ops;
1710a30f8f8cSSatish Balay     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
1711a30f8f8cSSatish Balay     A->bops = Abops;
1712a30f8f8cSSatish Balay     A->ops  = Aops;
1713a30f8f8cSSatish Balay 
1714a30f8f8cSSatish Balay     PetscHeaderDestroy(B);
1715a30f8f8cSSatish Balay   }
1716a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1717a30f8f8cSSatish Balay }
1718a30f8f8cSSatish Balay 
1719a30f8f8cSSatish Balay #undef __FUNC__
1720a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatDiagonalScale_MPISBAIJ"
1721a30f8f8cSSatish Balay int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1722a30f8f8cSSatish Balay {
1723a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1724a30f8f8cSSatish Balay   Mat         a = baij->A,b = baij->B;
1725a30f8f8cSSatish Balay   int         ierr,s1,s2,s3;
1726a30f8f8cSSatish Balay 
1727a30f8f8cSSatish Balay   PetscFunctionBegin;
1728a30f8f8cSSatish Balay   if (ll != rr) {
1729a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"For symmetric format, left and right scaling vectors must be same\n");
1730a30f8f8cSSatish Balay   }
1731a30f8f8cSSatish Balay   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1732a30f8f8cSSatish Balay   if (rr) {
1733a30f8f8cSSatish Balay     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1734a30f8f8cSSatish Balay     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
1735a30f8f8cSSatish Balay     /* Overlap communication with computation. */
1736a30f8f8cSSatish Balay     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1737a30f8f8cSSatish Balay     /*} if (ll) { */
1738a30f8f8cSSatish Balay     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1739a30f8f8cSSatish Balay     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1740a30f8f8cSSatish Balay     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1741a30f8f8cSSatish Balay     /* } */
1742a30f8f8cSSatish Balay   /* scale  the diagonal block */
1743a30f8f8cSSatish Balay   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1744a30f8f8cSSatish Balay 
1745a30f8f8cSSatish Balay   /* if (rr) { */
1746a30f8f8cSSatish Balay     /* Do a scatter end and then right scale the off-diagonal block */
1747a30f8f8cSSatish Balay     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1748a30f8f8cSSatish Balay     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1749a30f8f8cSSatish Balay   }
1750a30f8f8cSSatish Balay 
1751a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1752a30f8f8cSSatish Balay }
1753a30f8f8cSSatish Balay 
1754a30f8f8cSSatish Balay #undef __FUNC__
1755a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatZeroRows_MPISBAIJ"
1756a30f8f8cSSatish Balay int MatZeroRows_MPISBAIJ(Mat A,IS is,Scalar *diag)
1757a30f8f8cSSatish Balay {
1758a30f8f8cSSatish Balay   Mat_MPISBAIJ    *l = (Mat_MPISBAIJ*)A->data;
1759a30f8f8cSSatish Balay   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
1760a30f8f8cSSatish Balay   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
1761a30f8f8cSSatish Balay   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1762a30f8f8cSSatish Balay   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1763a30f8f8cSSatish Balay   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1764a30f8f8cSSatish Balay   MPI_Comm       comm = A->comm;
1765a30f8f8cSSatish Balay   MPI_Request    *send_waits,*recv_waits;
1766a30f8f8cSSatish Balay   MPI_Status     recv_status,*send_status;
1767a30f8f8cSSatish Balay   IS             istmp;
1768a30f8f8cSSatish Balay 
1769a30f8f8cSSatish Balay   PetscFunctionBegin;
1770a30f8f8cSSatish Balay   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
1771a30f8f8cSSatish Balay   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1772a30f8f8cSSatish Balay 
1773a30f8f8cSSatish Balay   /*  first count number of contributors to each processor */
1774a30f8f8cSSatish Balay   nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs);
1775a30f8f8cSSatish Balay   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
1776a30f8f8cSSatish Balay   procs  = nprocs + size;
1777a30f8f8cSSatish Balay   owner  = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
1778a30f8f8cSSatish Balay   for (i=0; i<N; i++) {
1779a30f8f8cSSatish Balay     idx   = rows[i];
1780a30f8f8cSSatish Balay     found = 0;
1781a30f8f8cSSatish Balay     for (j=0; j<size; j++) {
1782a30f8f8cSSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1783a30f8f8cSSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
1784a30f8f8cSSatish Balay       }
1785a30f8f8cSSatish Balay     }
1786a30f8f8cSSatish Balay     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
1787a30f8f8cSSatish Balay   }
1788a30f8f8cSSatish Balay   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
1789a30f8f8cSSatish Balay 
1790a30f8f8cSSatish Balay   /* inform other processors of number of messages and max length*/
1791a30f8f8cSSatish Balay   work   = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work);
1792a30f8f8cSSatish Balay   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
1793a30f8f8cSSatish Balay   nmax   = work[rank];
1794a30f8f8cSSatish Balay   nrecvs = work[size+rank];
1795a30f8f8cSSatish Balay   ierr = PetscFree(work);CHKERRQ(ierr);
1796a30f8f8cSSatish Balay 
1797a30f8f8cSSatish Balay   /* post receives:   */
1798a30f8f8cSSatish Balay   rvalues = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
1799a30f8f8cSSatish Balay   recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
1800a30f8f8cSSatish Balay   for (i=0; i<nrecvs; i++) {
1801a30f8f8cSSatish Balay     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1802a30f8f8cSSatish Balay   }
1803a30f8f8cSSatish Balay 
1804a30f8f8cSSatish Balay   /* do sends:
1805a30f8f8cSSatish Balay      1) starts[i] gives the starting index in svalues for stuff going to
1806a30f8f8cSSatish Balay      the ith processor
1807a30f8f8cSSatish Balay   */
1808a30f8f8cSSatish Balay   svalues    = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues);
1809a30f8f8cSSatish Balay   send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
1810a30f8f8cSSatish Balay   starts     = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts);
1811a30f8f8cSSatish Balay   starts[0]  = 0;
1812a30f8f8cSSatish Balay   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1813a30f8f8cSSatish Balay   for (i=0; i<N; i++) {
1814a30f8f8cSSatish Balay     svalues[starts[owner[i]]++] = rows[i];
1815a30f8f8cSSatish Balay   }
1816a30f8f8cSSatish Balay   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
1817a30f8f8cSSatish Balay 
1818a30f8f8cSSatish Balay   starts[0] = 0;
1819a30f8f8cSSatish Balay   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1820a30f8f8cSSatish Balay   count = 0;
1821a30f8f8cSSatish Balay   for (i=0; i<size; i++) {
1822a30f8f8cSSatish Balay     if (procs[i]) {
1823a30f8f8cSSatish Balay       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1824a30f8f8cSSatish Balay     }
1825a30f8f8cSSatish Balay   }
1826a30f8f8cSSatish Balay   ierr = PetscFree(starts);CHKERRQ(ierr);
1827a30f8f8cSSatish Balay 
1828a30f8f8cSSatish Balay   base = owners[rank]*bs;
1829a30f8f8cSSatish Balay 
1830a30f8f8cSSatish Balay   /*  wait on receives */
1831a30f8f8cSSatish Balay   lens   = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens);
1832a30f8f8cSSatish Balay   source = lens + nrecvs;
1833a30f8f8cSSatish Balay   count  = nrecvs; slen = 0;
1834a30f8f8cSSatish Balay   while (count) {
1835a30f8f8cSSatish Balay     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1836a30f8f8cSSatish Balay     /* unpack receives into our local space */
1837a30f8f8cSSatish Balay     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1838a30f8f8cSSatish Balay     source[imdex]  = recv_status.MPI_SOURCE;
1839a30f8f8cSSatish Balay     lens[imdex]    = n;
1840a30f8f8cSSatish Balay     slen          += n;
1841a30f8f8cSSatish Balay     count--;
1842a30f8f8cSSatish Balay   }
1843a30f8f8cSSatish Balay   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1844a30f8f8cSSatish Balay 
1845a30f8f8cSSatish Balay   /* move the data into the send scatter */
1846a30f8f8cSSatish Balay   lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows);
1847a30f8f8cSSatish Balay   count = 0;
1848a30f8f8cSSatish Balay   for (i=0; i<nrecvs; i++) {
1849a30f8f8cSSatish Balay     values = rvalues + i*nmax;
1850a30f8f8cSSatish Balay     for (j=0; j<lens[i]; j++) {
1851a30f8f8cSSatish Balay       lrows[count++] = values[j] - base;
1852a30f8f8cSSatish Balay     }
1853a30f8f8cSSatish Balay   }
1854a30f8f8cSSatish Balay   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1855a30f8f8cSSatish Balay   ierr = PetscFree(lens);CHKERRQ(ierr);
1856a30f8f8cSSatish Balay   ierr = PetscFree(owner);CHKERRQ(ierr);
1857a30f8f8cSSatish Balay   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1858a30f8f8cSSatish Balay 
1859a30f8f8cSSatish Balay   /* actually zap the local rows */
1860a30f8f8cSSatish Balay   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1861a30f8f8cSSatish Balay   PLogObjectParent(A,istmp);
1862a30f8f8cSSatish Balay 
1863a30f8f8cSSatish Balay   /*
1864a30f8f8cSSatish Balay         Zero the required rows. If the "diagonal block" of the matrix
1865a30f8f8cSSatish Balay      is square and the user wishes to set the diagonal we use seperate
1866a30f8f8cSSatish Balay      code so that MatSetValues() is not called for each diagonal allocating
1867a30f8f8cSSatish Balay      new memory, thus calling lots of mallocs and slowing things down.
1868a30f8f8cSSatish Balay 
1869a30f8f8cSSatish Balay        Contributed by: Mathew Knepley
1870a30f8f8cSSatish Balay   */
1871a30f8f8cSSatish Balay   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1872c8407628SSatish Balay   ierr = MatZeroRows_SeqSBAIJ(l->B,istmp,0);CHKERRQ(ierr);
1873a30f8f8cSSatish Balay   if (diag && (l->A->M == l->A->N)) {
1874a30f8f8cSSatish Balay     ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,diag);CHKERRQ(ierr);
1875a30f8f8cSSatish Balay   } else if (diag) {
1876a30f8f8cSSatish Balay     ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1877a30f8f8cSSatish Balay     if (((Mat_SeqSBAIJ*)l->A->data)->nonew) {
1878a30f8f8cSSatish Balay       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1879a30f8f8cSSatish Balay MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1880a30f8f8cSSatish Balay     }
1881a30f8f8cSSatish Balay     for (i=0; i<slen; i++) {
1882a30f8f8cSSatish Balay       row  = lrows[i] + rstart_bs;
1883a30f8f8cSSatish Balay       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1884a30f8f8cSSatish Balay     }
1885a30f8f8cSSatish Balay     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1886a30f8f8cSSatish Balay     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1887a30f8f8cSSatish Balay   } else {
1888a30f8f8cSSatish Balay     ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1889a30f8f8cSSatish Balay   }
1890a30f8f8cSSatish Balay 
1891a30f8f8cSSatish Balay   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1892a30f8f8cSSatish Balay   ierr = PetscFree(lrows);CHKERRQ(ierr);
1893a30f8f8cSSatish Balay 
1894a30f8f8cSSatish Balay   /* wait on sends */
1895a30f8f8cSSatish Balay   if (nsends) {
1896a30f8f8cSSatish Balay     send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1897a30f8f8cSSatish Balay     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1898a30f8f8cSSatish Balay     ierr        = PetscFree(send_status);CHKERRQ(ierr);
1899a30f8f8cSSatish Balay   }
1900a30f8f8cSSatish Balay   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1901a30f8f8cSSatish Balay   ierr = PetscFree(svalues);CHKERRQ(ierr);
1902a30f8f8cSSatish Balay 
1903a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1904a30f8f8cSSatish Balay }
1905a30f8f8cSSatish Balay 
1906a30f8f8cSSatish Balay #undef __FUNC__
1907a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatPrintHelp_MPISBAIJ"
1908a30f8f8cSSatish Balay int MatPrintHelp_MPISBAIJ(Mat A)
1909a30f8f8cSSatish Balay {
1910a30f8f8cSSatish Balay   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1911a30f8f8cSSatish Balay   MPI_Comm    comm = A->comm;
1912a30f8f8cSSatish Balay   static int  called = 0;
1913a30f8f8cSSatish Balay   int         ierr;
1914a30f8f8cSSatish Balay 
1915a30f8f8cSSatish Balay   PetscFunctionBegin;
1916a30f8f8cSSatish Balay   if (!a->rank) {
1917a30f8f8cSSatish Balay     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1918a30f8f8cSSatish Balay   }
1919a30f8f8cSSatish Balay   if (called) {PetscFunctionReturn(0);} else called = 1;
1920a30f8f8cSSatish Balay   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1921a30f8f8cSSatish Balay   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1922a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1923a30f8f8cSSatish Balay }
1924a30f8f8cSSatish Balay 
1925a30f8f8cSSatish Balay #undef __FUNC__
1926a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatSetUnfactored_MPISBAIJ"
1927a30f8f8cSSatish Balay int MatSetUnfactored_MPISBAIJ(Mat A)
1928a30f8f8cSSatish Balay {
1929a30f8f8cSSatish Balay   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1930a30f8f8cSSatish Balay   int         ierr;
1931a30f8f8cSSatish Balay 
1932a30f8f8cSSatish Balay   PetscFunctionBegin;
1933a30f8f8cSSatish Balay   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1934a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1935a30f8f8cSSatish Balay }
1936a30f8f8cSSatish Balay 
1937a30f8f8cSSatish Balay static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1938a30f8f8cSSatish Balay 
1939a30f8f8cSSatish Balay #undef __FUNC__
1940a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatEqual_MPISBAIJ"
1941a30f8f8cSSatish Balay int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1942a30f8f8cSSatish Balay {
1943a30f8f8cSSatish Balay   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1944a30f8f8cSSatish Balay   Mat         a,b,c,d;
1945a30f8f8cSSatish Balay   PetscTruth  flg;
1946a30f8f8cSSatish Balay   int         ierr;
1947a30f8f8cSSatish Balay 
1948a30f8f8cSSatish Balay   PetscFunctionBegin;
1949a30f8f8cSSatish Balay   if (B->type != MATMPISBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1950a30f8f8cSSatish Balay   a = matA->A; b = matA->B;
1951a30f8f8cSSatish Balay   c = matB->A; d = matB->B;
1952a30f8f8cSSatish Balay 
1953a30f8f8cSSatish Balay   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1954a30f8f8cSSatish Balay   if (flg == PETSC_TRUE) {
1955a30f8f8cSSatish Balay     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1956a30f8f8cSSatish Balay   }
1957a30f8f8cSSatish Balay   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1958a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1959a30f8f8cSSatish Balay }
1960a30f8f8cSSatish Balay 
1961a30f8f8cSSatish Balay /* -------------------------------------------------------------------*/
1962a30f8f8cSSatish Balay static struct _MatOps MatOps_Values = {
1963a30f8f8cSSatish Balay   MatSetValues_MPISBAIJ,
1964a30f8f8cSSatish Balay   MatGetRow_MPISBAIJ,
1965a30f8f8cSSatish Balay   MatRestoreRow_MPISBAIJ,
1966a30f8f8cSSatish Balay   MatMult_MPISBAIJ,
1967a30f8f8cSSatish Balay   MatMultAdd_MPISBAIJ,
1968a30f8f8cSSatish Balay   MatMultTranspose_MPISBAIJ,
1969a30f8f8cSSatish Balay   MatMultTransposeAdd_MPISBAIJ,
1970a30f8f8cSSatish Balay   0,
1971a30f8f8cSSatish Balay   0,
1972a30f8f8cSSatish Balay   0,
1973a30f8f8cSSatish Balay   0,
1974a30f8f8cSSatish Balay   0,
1975a30f8f8cSSatish Balay   0,
1976a30f8f8cSSatish Balay   0,
1977a30f8f8cSSatish Balay   MatTranspose_MPISBAIJ,
1978a30f8f8cSSatish Balay   MatGetInfo_MPISBAIJ,
1979a30f8f8cSSatish Balay   MatEqual_MPISBAIJ,
1980a30f8f8cSSatish Balay   MatGetDiagonal_MPISBAIJ,
1981a30f8f8cSSatish Balay   MatDiagonalScale_MPISBAIJ,
1982a30f8f8cSSatish Balay   MatNorm_MPISBAIJ,
1983a30f8f8cSSatish Balay   MatAssemblyBegin_MPISBAIJ,
1984a30f8f8cSSatish Balay   MatAssemblyEnd_MPISBAIJ,
1985a30f8f8cSSatish Balay   0,
1986a30f8f8cSSatish Balay   MatSetOption_MPISBAIJ,
1987a30f8f8cSSatish Balay   MatZeroEntries_MPISBAIJ,
1988a30f8f8cSSatish Balay   MatZeroRows_MPISBAIJ,
1989a30f8f8cSSatish Balay   0,
1990a30f8f8cSSatish Balay   0,
1991a30f8f8cSSatish Balay   0,
1992a30f8f8cSSatish Balay   0,
1993a30f8f8cSSatish Balay   MatGetSize_MPISBAIJ,
1994a30f8f8cSSatish Balay   MatGetLocalSize_MPISBAIJ,
1995a30f8f8cSSatish Balay   MatGetOwnershipRange_MPISBAIJ,
1996a30f8f8cSSatish Balay   0,
1997a30f8f8cSSatish Balay   0,
1998a30f8f8cSSatish Balay   0,
1999a30f8f8cSSatish Balay   0,
2000a30f8f8cSSatish Balay   MatDuplicate_MPISBAIJ,
2001a30f8f8cSSatish Balay   0,
2002a30f8f8cSSatish Balay   0,
2003a30f8f8cSSatish Balay   0,
2004a30f8f8cSSatish Balay   0,
2005a30f8f8cSSatish Balay   0,
2006a30f8f8cSSatish Balay   MatGetSubMatrices_MPISBAIJ,
2007a30f8f8cSSatish Balay   MatIncreaseOverlap_MPISBAIJ,
2008a30f8f8cSSatish Balay   MatGetValues_MPISBAIJ,
2009a30f8f8cSSatish Balay   0,
2010a30f8f8cSSatish Balay   MatPrintHelp_MPISBAIJ,
2011a30f8f8cSSatish Balay   MatScale_MPISBAIJ,
2012a30f8f8cSSatish Balay   0,
2013a30f8f8cSSatish Balay   0,
2014a30f8f8cSSatish Balay   0,
2015a30f8f8cSSatish Balay   MatGetBlockSize_MPISBAIJ,
2016a30f8f8cSSatish Balay   0,
2017a30f8f8cSSatish Balay   0,
2018a30f8f8cSSatish Balay   0,
2019a30f8f8cSSatish Balay   0,
2020a30f8f8cSSatish Balay   0,
2021a30f8f8cSSatish Balay   0,
2022a30f8f8cSSatish Balay   MatSetUnfactored_MPISBAIJ,
2023a30f8f8cSSatish Balay   0,
2024a30f8f8cSSatish Balay   MatSetValuesBlocked_MPISBAIJ,
2025a30f8f8cSSatish Balay   0,
2026a30f8f8cSSatish Balay   0,
2027a30f8f8cSSatish Balay   0,
2028a30f8f8cSSatish Balay   MatGetMaps_Petsc};
2029a30f8f8cSSatish Balay 
2030a30f8f8cSSatish Balay 
2031a30f8f8cSSatish Balay EXTERN_C_BEGIN
2032a30f8f8cSSatish Balay #undef __FUNC__
2033a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonalBlock_MPISBAIJ"
2034a30f8f8cSSatish Balay int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
2035a30f8f8cSSatish Balay {
2036a30f8f8cSSatish Balay   PetscFunctionBegin;
2037a30f8f8cSSatish Balay   *a      = ((Mat_MPISBAIJ *)A->data)->A;
2038a30f8f8cSSatish Balay   *iscopy = PETSC_FALSE;
2039a30f8f8cSSatish Balay   PetscFunctionReturn(0);
2040a30f8f8cSSatish Balay }
2041a30f8f8cSSatish Balay EXTERN_C_END
2042a30f8f8cSSatish Balay 
2043a30f8f8cSSatish Balay #undef __FUNC__
2044a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatCreateMPISBAIJ"
2045a30f8f8cSSatish Balay /*@C
2046a30f8f8cSSatish Balay    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2047a30f8f8cSSatish Balay    (block compressed row).  For good matrix assembly performance
2048a30f8f8cSSatish Balay    the user should preallocate the matrix storage by setting the parameters
2049a30f8f8cSSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2050a30f8f8cSSatish Balay    performance can be increased by more than a factor of 50.
2051a30f8f8cSSatish Balay 
2052a30f8f8cSSatish Balay    Collective on MPI_Comm
2053a30f8f8cSSatish Balay 
2054a30f8f8cSSatish Balay    Input Parameters:
2055a30f8f8cSSatish Balay +  comm - MPI communicator
2056a30f8f8cSSatish Balay .  bs   - size of blockk
2057a30f8f8cSSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2058a30f8f8cSSatish Balay            This value should be the same as the local size used in creating the
2059a30f8f8cSSatish Balay            y vector for the matrix-vector product y = Ax.
2060a30f8f8cSSatish Balay .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2061a30f8f8cSSatish Balay            This value should be the same as the local size used in creating the
2062a30f8f8cSSatish Balay            x vector for the matrix-vector product y = Ax.
2063a30f8f8cSSatish Balay .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2064a30f8f8cSSatish Balay .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2065a30f8f8cSSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2066a30f8f8cSSatish Balay            submatrix  (same for all local rows)
2067a30f8f8cSSatish Balay .  d_nnz - array containing the number of block nonzeros in the various block rows
2068a30f8f8cSSatish Balay            of the in diagonal portion of the local (possibly different for each block
2069a30f8f8cSSatish Balay            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2070a30f8f8cSSatish Balay .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2071a30f8f8cSSatish Balay            submatrix (same for all local rows).
2072a30f8f8cSSatish Balay -  o_nnz - array containing the number of nonzeros in the various block rows of the
2073a30f8f8cSSatish Balay            off-diagonal portion of the local submatrix (possibly different for
2074a30f8f8cSSatish Balay            each block row) or PETSC_NULL.
2075a30f8f8cSSatish Balay 
2076a30f8f8cSSatish Balay    Output Parameter:
2077a30f8f8cSSatish Balay .  A - the matrix
2078a30f8f8cSSatish Balay 
2079a30f8f8cSSatish Balay    Options Database Keys:
2080a30f8f8cSSatish Balay .   -mat_no_unroll - uses code that does not unroll the loops in the
2081a30f8f8cSSatish Balay                      block calculations (much slower)
2082a30f8f8cSSatish Balay .   -mat_block_size - size of the blocks to use
2083a30f8f8cSSatish Balay .   -mat_mpi - use the parallel matrix data structures even on one processor
2084a30f8f8cSSatish Balay                (defaults to using SeqBAIJ format on one processor)
2085a30f8f8cSSatish Balay 
2086a30f8f8cSSatish Balay    Notes:
2087a30f8f8cSSatish Balay    The user MUST specify either the local or global matrix dimensions
2088a30f8f8cSSatish Balay    (possibly both).
2089a30f8f8cSSatish Balay 
2090a30f8f8cSSatish Balay    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2091a30f8f8cSSatish Balay    than it must be used on all processors that share the object for that argument.
2092a30f8f8cSSatish Balay 
2093a30f8f8cSSatish Balay    Storage Information:
2094a30f8f8cSSatish Balay    For a square global matrix we define each processor's diagonal portion
2095a30f8f8cSSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
2096a30f8f8cSSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
2097a30f8f8cSSatish Balay    local matrix (a rectangular submatrix).
2098a30f8f8cSSatish Balay 
2099a30f8f8cSSatish Balay    The user can specify preallocated storage for the diagonal part of
2100a30f8f8cSSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
2101a30f8f8cSSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2102a30f8f8cSSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
2103a30f8f8cSSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2104a30f8f8cSSatish Balay 
2105a30f8f8cSSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2106a30f8f8cSSatish Balay    the figure below we depict these three local rows and all columns (0-11).
2107a30f8f8cSSatish Balay 
2108a30f8f8cSSatish Balay .vb
2109a30f8f8cSSatish Balay            0 1 2 3 4 5 6 7 8 9 10 11
2110a30f8f8cSSatish Balay           -------------------
2111a30f8f8cSSatish Balay    row 3  |  o o o d d d o o o o o o
2112a30f8f8cSSatish Balay    row 4  |  o o o d d d o o o o o o
2113a30f8f8cSSatish Balay    row 5  |  o o o d d d o o o o o o
2114a30f8f8cSSatish Balay           -------------------
2115a30f8f8cSSatish Balay .ve
2116a30f8f8cSSatish Balay 
2117a30f8f8cSSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
2118a30f8f8cSSatish Balay    submatrix, and any entries in the o locations are stored in the
2119a30f8f8cSSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2120a30f8f8cSSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
2121a30f8f8cSSatish Balay 
2122a30f8f8cSSatish Balay    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2123a30f8f8cSSatish Balay    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2124a30f8f8cSSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
2125a30f8f8cSSatish Balay    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2126a30f8f8cSSatish Balay    or you will get TERRIBLE performance; see the users' manual chapter on
2127a30f8f8cSSatish Balay    matrices.
2128a30f8f8cSSatish Balay 
2129a30f8f8cSSatish Balay    Level: intermediate
2130a30f8f8cSSatish Balay 
2131a30f8f8cSSatish Balay .keywords: matrix, block, aij, compressed row, sparse, parallel
2132a30f8f8cSSatish Balay 
2133a30f8f8cSSatish Balay .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPISBAIJ()
2134a30f8f8cSSatish Balay @*/
2135a30f8f8cSSatish Balay 
2136a30f8f8cSSatish Balay int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2137a30f8f8cSSatish Balay {
2138a30f8f8cSSatish Balay   Mat          B;
2139a30f8f8cSSatish Balay   Mat_MPISBAIJ  *b;
2140f65c83cfSHong Zhang   int          ierr,i,sum[1],work[1],mbs,Mbs=PETSC_DECIDE,size;
2141a30f8f8cSSatish Balay   PetscTruth   flag1,flag2,flg;
2142a30f8f8cSSatish Balay 
2143a30f8f8cSSatish Balay   PetscFunctionBegin;
2144a30f8f8cSSatish Balay   if (M != N || m != n){ /* N and n are not used after this */
2145a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"For symmetric format, set M=N and m=n");
2146a30f8f8cSSatish Balay   }
2147a30f8f8cSSatish Balay   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2148a30f8f8cSSatish Balay 
2149a30f8f8cSSatish Balay   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive");
2150a30f8f8cSSatish Balay   if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -2: value %d",d_nz);
2151a30f8f8cSSatish Balay   if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -2: value %d",o_nz);
2152a30f8f8cSSatish Balay   if (d_nnz) {
2153a30f8f8cSSatish Balay     for (i=0; i<m/bs; i++) {
2154a30f8f8cSSatish Balay       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
2155a30f8f8cSSatish Balay     }
2156a30f8f8cSSatish Balay   }
2157a30f8f8cSSatish Balay   if (o_nnz) {
2158a30f8f8cSSatish Balay     for (i=0; i<m/bs; i++) {
2159a30f8f8cSSatish Balay       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
2160a30f8f8cSSatish Balay     }
2161a30f8f8cSSatish Balay   }
2162a30f8f8cSSatish Balay 
2163a30f8f8cSSatish Balay   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2164a30f8f8cSSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-mat_mpisbaij",&flag1);CHKERRQ(ierr);
2165a30f8f8cSSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr);
2166a30f8f8cSSatish Balay   if (!flag1 && !flag2 && size == 1) {
2167a30f8f8cSSatish Balay     if (M == PETSC_DECIDE) M = m;
2168a30f8f8cSSatish Balay     ierr = MatCreateSeqSBAIJ(comm,bs,M,M,d_nz,d_nnz,A);CHKERRQ(ierr);
2169a30f8f8cSSatish Balay     PetscFunctionReturn(0);
2170a30f8f8cSSatish Balay   }
2171a30f8f8cSSatish Balay 
2172a30f8f8cSSatish Balay   *A = 0;
2173a30f8f8cSSatish Balay   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPISBAIJ,"Mat",comm,MatDestroy,MatView);
2174a30f8f8cSSatish Balay   PLogObjectCreate(B);
2175a30f8f8cSSatish Balay   B->data = (void*)(b = PetscNew(Mat_MPISBAIJ));CHKPTRQ(b);
2176a30f8f8cSSatish Balay   ierr    = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
2177a30f8f8cSSatish Balay   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2178a30f8f8cSSatish Balay 
2179a30f8f8cSSatish Balay   B->ops->destroy    = MatDestroy_MPISBAIJ;
2180a30f8f8cSSatish Balay   B->ops->view       = MatView_MPISBAIJ;
2181a30f8f8cSSatish Balay   B->mapping    = 0;
2182a30f8f8cSSatish Balay   B->factor     = 0;
2183a30f8f8cSSatish Balay   B->assembled  = PETSC_FALSE;
2184a30f8f8cSSatish Balay 
2185a30f8f8cSSatish Balay   B->insertmode = NOT_SET_VALUES;
2186a30f8f8cSSatish Balay   ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr);
2187a30f8f8cSSatish Balay   ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr);
2188a30f8f8cSSatish Balay 
2189a30f8f8cSSatish Balay   if (m == PETSC_DECIDE && (d_nnz || o_nnz)) {
2190a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
2191a30f8f8cSSatish Balay   }
2192a30f8f8cSSatish Balay   if (M == PETSC_DECIDE && m == PETSC_DECIDE) {
2193a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified");
2194a30f8f8cSSatish Balay   }
2195a30f8f8cSSatish Balay   if (M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
2196a30f8f8cSSatish Balay 
2197a30f8f8cSSatish Balay   if (M == PETSC_DECIDE) {
2198a30f8f8cSSatish Balay     work[0] = m; mbs = m/bs;
2199a30f8f8cSSatish Balay     ierr = MPI_Allreduce(work,sum,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2200a30f8f8cSSatish Balay     M = sum[0]; Mbs = M/bs;
2201a30f8f8cSSatish Balay   } else { /* M is specified */
2202a30f8f8cSSatish Balay     Mbs = M/bs;
2203a30f8f8cSSatish Balay     if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize");
2204a30f8f8cSSatish Balay     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
2205a30f8f8cSSatish Balay     m   = mbs*bs;
2206a30f8f8cSSatish Balay   }
2207a30f8f8cSSatish Balay 
2208a30f8f8cSSatish Balay   if (mbs*bs != m) {
2209a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows/cols must be divisible by blocksize");
2210a30f8f8cSSatish Balay   }
2211a30f8f8cSSatish Balay 
2212a30f8f8cSSatish Balay   b->m = m; B->m = m;
2213a30f8f8cSSatish Balay   b->n = m; B->n = m;
2214a30f8f8cSSatish Balay   b->N = M; B->N = M;
2215a30f8f8cSSatish Balay   b->M = M;
2216a30f8f8cSSatish Balay   B->M = M;
2217a30f8f8cSSatish Balay   b->bs  = bs;
2218a30f8f8cSSatish Balay   b->bs2 = bs*bs;
2219a30f8f8cSSatish Balay   b->mbs = mbs;
2220a30f8f8cSSatish Balay   b->nbs = mbs;
2221a30f8f8cSSatish Balay   b->Mbs = Mbs;
2222a30f8f8cSSatish Balay   b->Nbs = Mbs;
2223a30f8f8cSSatish Balay 
2224a30f8f8cSSatish Balay   /* the information in the maps duplicates the information computed below, eventually
2225a30f8f8cSSatish Balay      we should remove the duplicate information that is not contained in the maps */
2226a30f8f8cSSatish Balay   ierr = MapCreateMPI(B->comm,m,M,&B->rmap);CHKERRQ(ierr);
2227a30f8f8cSSatish Balay   ierr = MapCreateMPI(B->comm,m,M,&B->cmap);CHKERRQ(ierr);
2228a30f8f8cSSatish Balay 
2229a30f8f8cSSatish Balay   /* build local table of row and column ownerships */
2230a30f8f8cSSatish Balay   b->rowners = (int*)PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners);
2231a30f8f8cSSatish Balay   PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
2232a30f8f8cSSatish Balay   b->cowners    = b->rowners + b->size + 2;
2233a30f8f8cSSatish Balay   b->rowners_bs = b->cowners + b->size + 2;
2234a30f8f8cSSatish Balay   ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2235a30f8f8cSSatish Balay   b->rowners[0]    = 0;
2236a30f8f8cSSatish Balay   for (i=2; i<=b->size; i++) {
2237a30f8f8cSSatish Balay     b->rowners[i] += b->rowners[i-1];
2238a30f8f8cSSatish Balay   }
2239a30f8f8cSSatish Balay   for (i=0; i<=b->size; i++) {
2240a30f8f8cSSatish Balay     b->rowners_bs[i] = b->rowners[i]*bs;
2241a30f8f8cSSatish Balay   }
2242a30f8f8cSSatish Balay   b->rstart    = b->rowners[b->rank];
2243a30f8f8cSSatish Balay   b->rend      = b->rowners[b->rank+1];
2244a30f8f8cSSatish Balay   b->rstart_bs = b->rstart * bs;
2245a30f8f8cSSatish Balay   b->rend_bs   = b->rend * bs;
2246a30f8f8cSSatish Balay 
2247a30f8f8cSSatish Balay   b->cstart    = b->rstart;
2248a30f8f8cSSatish Balay   b->cend      = b->rend;
2249a30f8f8cSSatish Balay   b->cstart_bs = b->cstart * bs;
2250a30f8f8cSSatish Balay   b->cend_bs   = b->cend * bs;
2251a30f8f8cSSatish Balay 
2252a30f8f8cSSatish Balay 
2253a30f8f8cSSatish Balay   if (d_nz == PETSC_DEFAULT) d_nz = 5;
2254a30f8f8cSSatish Balay   ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,m,m,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2255a30f8f8cSSatish Balay   PLogObjectParent(B,b->A);
2256a30f8f8cSSatish Balay   if (o_nz == PETSC_DEFAULT) o_nz = 0;
2257a30f8f8cSSatish Balay   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,M,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2258a30f8f8cSSatish Balay   PLogObjectParent(B,b->B);
2259a30f8f8cSSatish Balay 
2260a30f8f8cSSatish Balay   /* build cache for off array entries formed */
2261a30f8f8cSSatish Balay   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
2262a30f8f8cSSatish Balay   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
2263a30f8f8cSSatish Balay   b->donotstash  = PETSC_FALSE;
2264a30f8f8cSSatish Balay   b->colmap      = PETSC_NULL;
2265a30f8f8cSSatish Balay   b->garray      = PETSC_NULL;
2266a30f8f8cSSatish Balay   b->roworiented = PETSC_TRUE;
2267a30f8f8cSSatish Balay 
2268a30f8f8cSSatish Balay #if defined(PEYSC_USE_MAT_SINGLE)
2269a30f8f8cSSatish Balay   /* stuff for MatSetValues_XXX in single precision */
2270a30f8f8cSSatish Balay   b->lensetvalues     = 0;
2271a30f8f8cSSatish Balay   b->setvaluescopy    = PETSC_NULL;
2272a30f8f8cSSatish Balay #endif
2273a30f8f8cSSatish Balay 
2274a30f8f8cSSatish Balay   /* stuff used in block assembly */
2275a30f8f8cSSatish Balay   b->barray       = 0;
2276a30f8f8cSSatish Balay 
2277a30f8f8cSSatish Balay   /* stuff used for matrix vector multiply */
2278a30f8f8cSSatish Balay   b->lvec         = 0;
2279a30f8f8cSSatish Balay   b->Mvctx        = 0;
2280a30f8f8cSSatish Balay 
2281a30f8f8cSSatish Balay   /* stuff for MatGetRow() */
2282a30f8f8cSSatish Balay   b->rowindices   = 0;
2283a30f8f8cSSatish Balay   b->rowvalues    = 0;
2284a30f8f8cSSatish Balay   b->getrowactive = PETSC_FALSE;
2285a30f8f8cSSatish Balay 
2286a30f8f8cSSatish Balay   /* hash table stuff */
2287a30f8f8cSSatish Balay   b->ht           = 0;
2288a30f8f8cSSatish Balay   b->hd           = 0;
2289a30f8f8cSSatish Balay   b->ht_size      = 0;
2290a30f8f8cSSatish Balay   b->ht_flag      = PETSC_FALSE;
2291a30f8f8cSSatish Balay   b->ht_fact      = 0;
2292a30f8f8cSSatish Balay   b->ht_total_ct  = 0;
2293a30f8f8cSSatish Balay   b->ht_insert_ct = 0;
2294a30f8f8cSSatish Balay 
2295a30f8f8cSSatish Balay   *A = B;
2296a30f8f8cSSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2297a30f8f8cSSatish Balay   if (flg) {
2298a30f8f8cSSatish Balay     double fact = 1.39;
2299a30f8f8cSSatish Balay     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2300a30f8f8cSSatish Balay     ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2301a30f8f8cSSatish Balay     if (fact <= 1.0) fact = 1.39;
2302a30f8f8cSSatish Balay     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2303a30f8f8cSSatish Balay     PLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact);
2304a30f8f8cSSatish Balay   }
2305a30f8f8cSSatish Balay   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2306a30f8f8cSSatish Balay                                      "MatStoreValues_MPISBAIJ",
2307a30f8f8cSSatish Balay                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
2308a30f8f8cSSatish Balay   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2309a30f8f8cSSatish Balay                                      "MatRetrieveValues_MPISBAIJ",
2310a30f8f8cSSatish Balay                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
2311a30f8f8cSSatish Balay   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2312a30f8f8cSSatish Balay                                      "MatGetDiagonalBlock_MPISBAIJ",
2313a30f8f8cSSatish Balay                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
2314a30f8f8cSSatish Balay   PetscFunctionReturn(0);
2315a30f8f8cSSatish Balay }
2316a30f8f8cSSatish Balay 
2317a30f8f8cSSatish Balay 
2318a30f8f8cSSatish Balay #undef __FUNC__
2319a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatDuplicate_MPISBAIJ"
2320a30f8f8cSSatish Balay static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2321a30f8f8cSSatish Balay {
2322a30f8f8cSSatish Balay   Mat         mat;
2323a30f8f8cSSatish Balay   Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2324a30f8f8cSSatish Balay   int         ierr,len=0;
2325a30f8f8cSSatish Balay   PetscTruth  flg;
2326a30f8f8cSSatish Balay 
2327a30f8f8cSSatish Balay   PetscFunctionBegin;
2328a30f8f8cSSatish Balay   *newmat       = 0;
2329a30f8f8cSSatish Balay   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPISBAIJ,"Mat",matin->comm,MatDestroy,MatView);
2330a30f8f8cSSatish Balay   PLogObjectCreate(mat);
2331a30f8f8cSSatish Balay   mat->data         = (void*)(a = PetscNew(Mat_MPISBAIJ));CHKPTRQ(a);
2332a30f8f8cSSatish Balay   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2333a30f8f8cSSatish Balay   mat->ops->destroy = MatDestroy_MPISBAIJ;
2334a30f8f8cSSatish Balay   mat->ops->view    = MatView_MPISBAIJ;
2335a30f8f8cSSatish Balay   mat->factor       = matin->factor;
2336a30f8f8cSSatish Balay   mat->assembled    = PETSC_TRUE;
2337a30f8f8cSSatish Balay   mat->insertmode   = NOT_SET_VALUES;
2338a30f8f8cSSatish Balay 
2339a30f8f8cSSatish Balay   a->m = mat->m   = oldmat->m;
2340a30f8f8cSSatish Balay   a->n = mat->n   = oldmat->n;
2341a30f8f8cSSatish Balay   a->M = mat->M   = oldmat->M;
2342a30f8f8cSSatish Balay   a->N = mat->N   = oldmat->N;
2343a30f8f8cSSatish Balay 
2344a30f8f8cSSatish Balay   a->bs  = oldmat->bs;
2345a30f8f8cSSatish Balay   a->bs2 = oldmat->bs2;
2346a30f8f8cSSatish Balay   a->mbs = oldmat->mbs;
2347a30f8f8cSSatish Balay   a->nbs = oldmat->nbs;
2348a30f8f8cSSatish Balay   a->Mbs = oldmat->Mbs;
2349a30f8f8cSSatish Balay   a->Nbs = oldmat->Nbs;
2350a30f8f8cSSatish Balay 
2351a30f8f8cSSatish Balay   a->rstart       = oldmat->rstart;
2352a30f8f8cSSatish Balay   a->rend         = oldmat->rend;
2353a30f8f8cSSatish Balay   a->cstart       = oldmat->cstart;
2354a30f8f8cSSatish Balay   a->cend         = oldmat->cend;
2355a30f8f8cSSatish Balay   a->size         = oldmat->size;
2356a30f8f8cSSatish Balay   a->rank         = oldmat->rank;
2357a30f8f8cSSatish Balay   a->donotstash   = oldmat->donotstash;
2358a30f8f8cSSatish Balay   a->roworiented  = oldmat->roworiented;
2359a30f8f8cSSatish Balay   a->rowindices   = 0;
2360a30f8f8cSSatish Balay   a->rowvalues    = 0;
2361a30f8f8cSSatish Balay   a->getrowactive = PETSC_FALSE;
2362a30f8f8cSSatish Balay   a->barray       = 0;
2363a30f8f8cSSatish Balay   a->rstart_bs    = oldmat->rstart_bs;
2364a30f8f8cSSatish Balay   a->rend_bs      = oldmat->rend_bs;
2365a30f8f8cSSatish Balay   a->cstart_bs    = oldmat->cstart_bs;
2366a30f8f8cSSatish Balay   a->cend_bs      = oldmat->cend_bs;
2367a30f8f8cSSatish Balay 
2368a30f8f8cSSatish Balay   /* hash table stuff */
2369a30f8f8cSSatish Balay   a->ht           = 0;
2370a30f8f8cSSatish Balay   a->hd           = 0;
2371a30f8f8cSSatish Balay   a->ht_size      = 0;
2372a30f8f8cSSatish Balay   a->ht_flag      = oldmat->ht_flag;
2373a30f8f8cSSatish Balay   a->ht_fact      = oldmat->ht_fact;
2374a30f8f8cSSatish Balay   a->ht_total_ct  = 0;
2375a30f8f8cSSatish Balay   a->ht_insert_ct = 0;
2376a30f8f8cSSatish Balay 
2377a30f8f8cSSatish Balay 
2378a30f8f8cSSatish Balay   a->rowners = (int*)PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
2379a30f8f8cSSatish Balay   PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
2380a30f8f8cSSatish Balay   a->cowners    = a->rowners + a->size + 2;
2381a30f8f8cSSatish Balay   a->rowners_bs = a->cowners + a->size + 2;
2382a30f8f8cSSatish Balay   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2383a30f8f8cSSatish Balay   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2384a30f8f8cSSatish Balay   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2385a30f8f8cSSatish Balay   if (oldmat->colmap) {
2386a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE)
2387a30f8f8cSSatish Balay   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2388a30f8f8cSSatish Balay #else
2389a30f8f8cSSatish Balay     a->colmap = (int*)PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
2390a30f8f8cSSatish Balay     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2391a30f8f8cSSatish Balay     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2392a30f8f8cSSatish Balay #endif
2393a30f8f8cSSatish Balay   } else a->colmap = 0;
2394a30f8f8cSSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2395a30f8f8cSSatish Balay     a->garray = (int*)PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray);
2396a30f8f8cSSatish Balay     PLogObjectMemory(mat,len*sizeof(int));
2397a30f8f8cSSatish Balay     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2398a30f8f8cSSatish Balay   } else a->garray = 0;
2399a30f8f8cSSatish Balay 
2400a30f8f8cSSatish Balay   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2401a30f8f8cSSatish Balay   PLogObjectParent(mat,a->lvec);
2402a30f8f8cSSatish Balay   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2403a30f8f8cSSatish Balay 
2404a30f8f8cSSatish Balay   PLogObjectParent(mat,a->Mvctx);
2405a30f8f8cSSatish Balay   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2406a30f8f8cSSatish Balay   PLogObjectParent(mat,a->A);
2407a30f8f8cSSatish Balay   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2408a30f8f8cSSatish Balay   PLogObjectParent(mat,a->B);
2409a30f8f8cSSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
2410a30f8f8cSSatish Balay   ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2411a30f8f8cSSatish Balay   if (flg) {
2412a30f8f8cSSatish Balay     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
2413a30f8f8cSSatish Balay   }
2414a30f8f8cSSatish Balay   *newmat = mat;
2415a30f8f8cSSatish Balay   PetscFunctionReturn(0);
2416a30f8f8cSSatish Balay }
2417a30f8f8cSSatish Balay 
2418a30f8f8cSSatish Balay #include "petscsys.h"
2419a30f8f8cSSatish Balay 
2420a30f8f8cSSatish Balay #undef __FUNC__
2421a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPISBAIJ"
2422a30f8f8cSSatish Balay int MatLoad_MPISBAIJ(Viewer viewer,MatType type,Mat *newmat)
2423a30f8f8cSSatish Balay {
2424a30f8f8cSSatish Balay   Mat          A;
2425a30f8f8cSSatish Balay   int          i,nz,ierr,j,rstart,rend,fd;
2426a30f8f8cSSatish Balay   Scalar       *vals,*buf;
2427a30f8f8cSSatish Balay   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2428a30f8f8cSSatish Balay   MPI_Status   status;
2429a30f8f8cSSatish Balay   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2430a30f8f8cSSatish Balay   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2431a30f8f8cSSatish Balay   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2432a30f8f8cSSatish Balay   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2433a30f8f8cSSatish Balay   int          dcount,kmax,k,nzcount,tmp;
2434a30f8f8cSSatish Balay 
2435a30f8f8cSSatish Balay   PetscFunctionBegin;
2436a30f8f8cSSatish Balay   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2437a30f8f8cSSatish Balay 
2438a30f8f8cSSatish Balay   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2439a30f8f8cSSatish Balay   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2440a30f8f8cSSatish Balay   if (!rank) {
2441a30f8f8cSSatish Balay     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2442a30f8f8cSSatish Balay     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2443a30f8f8cSSatish Balay     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
2444a30f8f8cSSatish Balay     if (header[3] < 0) {
2445a30f8f8cSSatish Balay       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPISBAIJ");
2446a30f8f8cSSatish Balay     }
2447a30f8f8cSSatish Balay   }
2448a30f8f8cSSatish Balay 
2449a30f8f8cSSatish Balay   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2450a30f8f8cSSatish Balay   M = header[1]; N = header[2];
2451a30f8f8cSSatish Balay 
2452a30f8f8cSSatish Balay   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
2453a30f8f8cSSatish Balay 
2454a30f8f8cSSatish Balay   /*
2455a30f8f8cSSatish Balay      This code adds extra rows to make sure the number of rows is
2456a30f8f8cSSatish Balay      divisible by the blocksize
2457a30f8f8cSSatish Balay   */
2458a30f8f8cSSatish Balay   Mbs        = M/bs;
2459a30f8f8cSSatish Balay   extra_rows = bs - M + bs*(Mbs);
2460a30f8f8cSSatish Balay   if (extra_rows == bs) extra_rows = 0;
2461a30f8f8cSSatish Balay   else                  Mbs++;
2462a30f8f8cSSatish Balay   if (extra_rows &&!rank) {
2463a30f8f8cSSatish Balay     PLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n");
2464a30f8f8cSSatish Balay   }
2465a30f8f8cSSatish Balay 
2466a30f8f8cSSatish Balay   /* determine ownership of all rows */
2467a30f8f8cSSatish Balay   mbs = Mbs/size + ((Mbs % size) > rank);
2468a30f8f8cSSatish Balay   m   = mbs * bs;
2469a30f8f8cSSatish Balay   rowners = (int*)PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners);
2470a30f8f8cSSatish Balay   browners = rowners + size + 1;
2471a30f8f8cSSatish Balay   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2472a30f8f8cSSatish Balay   rowners[0] = 0;
2473a30f8f8cSSatish Balay   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2474a30f8f8cSSatish Balay   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2475a30f8f8cSSatish Balay   rstart = rowners[rank];
2476a30f8f8cSSatish Balay   rend   = rowners[rank+1];
2477a30f8f8cSSatish Balay 
2478a30f8f8cSSatish Balay   /* distribute row lengths to all processors */
2479a30f8f8cSSatish Balay   locrowlens = (int*)PetscMalloc((rend-rstart)*bs*sizeof(int));CHKPTRQ(locrowlens);
2480a30f8f8cSSatish Balay   if (!rank) {
2481a30f8f8cSSatish Balay     rowlengths = (int*)PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
2482a30f8f8cSSatish Balay     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2483a30f8f8cSSatish Balay     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2484a30f8f8cSSatish Balay     sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts);
2485a30f8f8cSSatish Balay     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2486a30f8f8cSSatish Balay     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2487a30f8f8cSSatish Balay     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2488a30f8f8cSSatish Balay   } else {
2489a30f8f8cSSatish Balay     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2490a30f8f8cSSatish Balay   }
2491a30f8f8cSSatish Balay 
2492a30f8f8cSSatish Balay   if (!rank) {
2493a30f8f8cSSatish Balay     /* calculate the number of nonzeros on each processor */
2494a30f8f8cSSatish Balay     procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz);
2495a30f8f8cSSatish Balay     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2496a30f8f8cSSatish Balay     for (i=0; i<size; i++) {
2497a30f8f8cSSatish Balay       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2498a30f8f8cSSatish Balay         procsnz[i] += rowlengths[j];
2499a30f8f8cSSatish Balay       }
2500a30f8f8cSSatish Balay     }
2501a30f8f8cSSatish Balay     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2502a30f8f8cSSatish Balay 
2503a30f8f8cSSatish Balay     /* determine max buffer needed and allocate it */
2504a30f8f8cSSatish Balay     maxnz = 0;
2505a30f8f8cSSatish Balay     for (i=0; i<size; i++) {
2506a30f8f8cSSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
2507a30f8f8cSSatish Balay     }
2508a30f8f8cSSatish Balay     cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols);
2509a30f8f8cSSatish Balay 
2510a30f8f8cSSatish Balay     /* read in my part of the matrix column indices  */
2511a30f8f8cSSatish Balay     nz = procsnz[0];
2512a30f8f8cSSatish Balay     ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf);
2513a30f8f8cSSatish Balay     mycols = ibuf;
2514a30f8f8cSSatish Balay     if (size == 1)  nz -= extra_rows;
2515a30f8f8cSSatish Balay     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2516a30f8f8cSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2517a30f8f8cSSatish Balay 
2518a30f8f8cSSatish Balay     /* read in every ones (except the last) and ship off */
2519a30f8f8cSSatish Balay     for (i=1; i<size-1; i++) {
2520a30f8f8cSSatish Balay       nz   = procsnz[i];
2521a30f8f8cSSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2522a30f8f8cSSatish Balay       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2523a30f8f8cSSatish Balay     }
2524a30f8f8cSSatish Balay     /* read in the stuff for the last proc */
2525a30f8f8cSSatish Balay     if (size != 1) {
2526a30f8f8cSSatish Balay       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2527a30f8f8cSSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2528a30f8f8cSSatish Balay       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2529a30f8f8cSSatish Balay       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2530a30f8f8cSSatish Balay     }
2531a30f8f8cSSatish Balay     ierr = PetscFree(cols);CHKERRQ(ierr);
2532a30f8f8cSSatish Balay   } else {
2533a30f8f8cSSatish Balay     /* determine buffer space needed for message */
2534a30f8f8cSSatish Balay     nz = 0;
2535a30f8f8cSSatish Balay     for (i=0; i<m; i++) {
2536a30f8f8cSSatish Balay       nz += locrowlens[i];
2537a30f8f8cSSatish Balay     }
2538a30f8f8cSSatish Balay     ibuf   = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf);
2539a30f8f8cSSatish Balay     mycols = ibuf;
2540a30f8f8cSSatish Balay     /* receive message of column indices*/
2541a30f8f8cSSatish Balay     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2542a30f8f8cSSatish Balay     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2543a30f8f8cSSatish Balay     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2544a30f8f8cSSatish Balay   }
2545a30f8f8cSSatish Balay 
2546a30f8f8cSSatish Balay   /* loop over local rows, determining number of off diagonal entries */
2547a30f8f8cSSatish Balay   dlens  = (int*)PetscMalloc(2*(rend-rstart+1)*sizeof(int));CHKPTRQ(dlens);
2548a30f8f8cSSatish Balay   odlens = dlens + (rend-rstart);
2549a30f8f8cSSatish Balay   mask   = (int*)PetscMalloc(3*Mbs*sizeof(int));CHKPTRQ(mask);
2550a30f8f8cSSatish Balay   ierr   = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2551a30f8f8cSSatish Balay   masked1 = mask    + Mbs;
2552a30f8f8cSSatish Balay   masked2 = masked1 + Mbs;
2553a30f8f8cSSatish Balay   rowcount = 0; nzcount = 0;
2554a30f8f8cSSatish Balay   for (i=0; i<mbs; i++) {
2555a30f8f8cSSatish Balay     dcount  = 0;
2556a30f8f8cSSatish Balay     odcount = 0;
2557a30f8f8cSSatish Balay     for (j=0; j<bs; j++) {
2558a30f8f8cSSatish Balay       kmax = locrowlens[rowcount];
2559a30f8f8cSSatish Balay       for (k=0; k<kmax; k++) {
2560a30f8f8cSSatish Balay         tmp = mycols[nzcount++]/bs;
2561a30f8f8cSSatish Balay         if (!mask[tmp]) {
2562a30f8f8cSSatish Balay           mask[tmp] = 1;
2563a30f8f8cSSatish Balay           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2564a30f8f8cSSatish Balay           else masked1[dcount++] = tmp;
2565a30f8f8cSSatish Balay         }
2566a30f8f8cSSatish Balay       }
2567a30f8f8cSSatish Balay       rowcount++;
2568a30f8f8cSSatish Balay     }
2569a30f8f8cSSatish Balay 
2570a30f8f8cSSatish Balay     dlens[i]  = dcount;
2571a30f8f8cSSatish Balay     odlens[i] = odcount;
2572a30f8f8cSSatish Balay 
2573a30f8f8cSSatish Balay     /* zero out the mask elements we set */
2574a30f8f8cSSatish Balay     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2575a30f8f8cSSatish Balay     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2576a30f8f8cSSatish Balay   }
2577a30f8f8cSSatish Balay 
2578a30f8f8cSSatish Balay   /* create our matrix */
2579a30f8f8cSSatish Balay   ierr = MatCreateMPISBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr);
2580a30f8f8cSSatish Balay   A = *newmat;
2581a30f8f8cSSatish Balay   MatSetOption(A,MAT_COLUMNS_SORTED);
2582a30f8f8cSSatish Balay 
2583a30f8f8cSSatish Balay   if (!rank) {
2584a30f8f8cSSatish Balay     buf = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(buf);
2585a30f8f8cSSatish Balay     /* read in my part of the matrix numerical values  */
2586a30f8f8cSSatish Balay     nz = procsnz[0];
2587a30f8f8cSSatish Balay     vals = buf;
2588a30f8f8cSSatish Balay     mycols = ibuf;
2589a30f8f8cSSatish Balay     if (size == 1)  nz -= extra_rows;
2590a30f8f8cSSatish Balay     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2591a30f8f8cSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2592a30f8f8cSSatish Balay 
2593a30f8f8cSSatish Balay     /* insert into matrix */
2594a30f8f8cSSatish Balay     jj      = rstart*bs;
2595a30f8f8cSSatish Balay     for (i=0; i<m; i++) {
2596a30f8f8cSSatish Balay       ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2597a30f8f8cSSatish Balay       mycols += locrowlens[i];
2598a30f8f8cSSatish Balay       vals   += locrowlens[i];
2599a30f8f8cSSatish Balay       jj++;
2600a30f8f8cSSatish Balay     }
2601a30f8f8cSSatish Balay     /* read in other processors (except the last one) and ship out */
2602a30f8f8cSSatish Balay     for (i=1; i<size-1; i++) {
2603a30f8f8cSSatish Balay       nz   = procsnz[i];
2604a30f8f8cSSatish Balay       vals = buf;
2605a30f8f8cSSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2606a30f8f8cSSatish Balay       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2607a30f8f8cSSatish Balay     }
2608a30f8f8cSSatish Balay     /* the last proc */
2609a30f8f8cSSatish Balay     if (size != 1){
2610a30f8f8cSSatish Balay       nz   = procsnz[i] - extra_rows;
2611a30f8f8cSSatish Balay       vals = buf;
2612a30f8f8cSSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2613a30f8f8cSSatish Balay       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2614a30f8f8cSSatish Balay       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2615a30f8f8cSSatish Balay     }
2616a30f8f8cSSatish Balay     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2617a30f8f8cSSatish Balay   } else {
2618a30f8f8cSSatish Balay     /* receive numeric values */
2619a30f8f8cSSatish Balay     buf = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(buf);
2620a30f8f8cSSatish Balay 
2621a30f8f8cSSatish Balay     /* receive message of values*/
2622a30f8f8cSSatish Balay     vals   = buf;
2623a30f8f8cSSatish Balay     mycols = ibuf;
2624a30f8f8cSSatish Balay     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2625a30f8f8cSSatish Balay     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2626a30f8f8cSSatish Balay     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2627a30f8f8cSSatish Balay 
2628a30f8f8cSSatish Balay     /* insert into matrix */
2629a30f8f8cSSatish Balay     jj      = rstart*bs;
2630a30f8f8cSSatish Balay     for (i=0; i<m; i++) {
2631a30f8f8cSSatish Balay       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2632a30f8f8cSSatish Balay       mycols += locrowlens[i];
2633a30f8f8cSSatish Balay       vals   += locrowlens[i];
2634a30f8f8cSSatish Balay       jj++;
2635a30f8f8cSSatish Balay     }
2636a30f8f8cSSatish Balay   }
2637a30f8f8cSSatish Balay   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2638a30f8f8cSSatish Balay   ierr = PetscFree(buf);CHKERRQ(ierr);
2639a30f8f8cSSatish Balay   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2640a30f8f8cSSatish Balay   ierr = PetscFree(rowners);CHKERRQ(ierr);
2641a30f8f8cSSatish Balay   ierr = PetscFree(dlens);CHKERRQ(ierr);
2642a30f8f8cSSatish Balay   ierr = PetscFree(mask);CHKERRQ(ierr);
2643a30f8f8cSSatish Balay   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2644a30f8f8cSSatish Balay   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2645a30f8f8cSSatish Balay   PetscFunctionReturn(0);
2646a30f8f8cSSatish Balay }
2647a30f8f8cSSatish Balay 
2648a30f8f8cSSatish Balay #undef __FUNC__
2649a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatMPISBAIJSetHashTableFactor"
2650a30f8f8cSSatish Balay /*@
2651a30f8f8cSSatish Balay    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2652a30f8f8cSSatish Balay 
2653a30f8f8cSSatish Balay    Input Parameters:
2654a30f8f8cSSatish Balay .  mat  - the matrix
2655a30f8f8cSSatish Balay .  fact - factor
2656a30f8f8cSSatish Balay 
2657a30f8f8cSSatish Balay    Collective on Mat
2658a30f8f8cSSatish Balay 
2659a30f8f8cSSatish Balay    Level: advanced
2660a30f8f8cSSatish Balay 
2661a30f8f8cSSatish Balay   Notes:
2662a30f8f8cSSatish Balay    This can also be set by the command line option: -mat_use_hash_table fact
2663a30f8f8cSSatish Balay 
2664a30f8f8cSSatish Balay .keywords: matrix, hashtable, factor, HT
2665a30f8f8cSSatish Balay 
2666a30f8f8cSSatish Balay .seealso: MatSetOption()
2667a30f8f8cSSatish Balay @*/
2668a30f8f8cSSatish Balay int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2669a30f8f8cSSatish Balay {
2670a30f8f8cSSatish Balay   Mat_MPIBAIJ *baij;
2671a30f8f8cSSatish Balay 
2672a30f8f8cSSatish Balay   PetscFunctionBegin;
2673a30f8f8cSSatish Balay   PetscValidHeaderSpecific(mat,MAT_COOKIE);
2674a30f8f8cSSatish Balay   if (mat->type != MATMPIBAIJ) {
2675a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only.");
2676a30f8f8cSSatish Balay   }
2677a30f8f8cSSatish Balay   baij = (Mat_MPIBAIJ*)mat->data;
2678a30f8f8cSSatish Balay   baij->ht_fact = fact;
2679a30f8f8cSSatish Balay   PetscFunctionReturn(0);
2680a30f8f8cSSatish Balay }
2681