xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision bc0ba53db2568597366f67456dd8dbfa61d5d11b)
1*bc0ba53dSHong Zhang /*$Id: mpisbaij.c,v 1.3 2000/07/12 18:58:59 hzhang Exp hzhang $*/
2a30f8f8cSSatish Balay 
3a30f8f8cSSatish Balay #include "src/mat/impls/baij/mpi/mpibaij.h"
4a30f8f8cSSatish Balay #include "src/vec/vecimpl.h"
5a30f8f8cSSatish Balay #include "mpisbaij.h"
6a30f8f8cSSatish Balay #include "src/mat/impls/sbaij/seq/sbaij.h"
7a30f8f8cSSatish Balay 
8a30f8f8cSSatish Balay extern int MatSetUpMultiply_MPISBAIJ(Mat);
9a30f8f8cSSatish Balay extern int DisAssemble_MPISBAIJ(Mat);
10a30f8f8cSSatish Balay extern int MatIncreaseOverlap_MPISBAIJ(Mat,int,IS *,int);
11a30f8f8cSSatish Balay extern int MatGetSubMatrices_MPISBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
12a30f8f8cSSatish Balay extern int MatGetValues_SeqSBAIJ(Mat,int,int *,int,int *,Scalar *);
13a30f8f8cSSatish Balay extern int MatSetValues_SeqSBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode);
14a30f8f8cSSatish Balay extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
15a30f8f8cSSatish Balay extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,Scalar**);
16a30f8f8cSSatish Balay extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,Scalar**);
17a30f8f8cSSatish Balay extern int MatPrintHelp_SeqSBAIJ(Mat);
18a30f8f8cSSatish Balay extern int MatZeroRows_SeqSBAIJ(Mat,IS,Scalar*);
19a30f8f8cSSatish Balay 
20a30f8f8cSSatish Balay /*  UGLY, ugly, ugly
21a30f8f8cSSatish Balay    When MatScalar == Scalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
22a30f8f8cSSatish Balay    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
23a30f8f8cSSatish Balay    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
24a30f8f8cSSatish Balay    converts the entries into single precision and then calls ..._MatScalar() to put them
25a30f8f8cSSatish Balay    into the single precision data structures.
26a30f8f8cSSatish Balay */
27a30f8f8cSSatish Balay #if defined(PETSC_USE_MAT_SINGLE)
28a30f8f8cSSatish Balay extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
29a30f8f8cSSatish Balay extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
30a30f8f8cSSatish Balay extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
31a30f8f8cSSatish Balay extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
32a30f8f8cSSatish Balay extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
33a30f8f8cSSatish Balay #else
34a30f8f8cSSatish Balay #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
35a30f8f8cSSatish Balay #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
36a30f8f8cSSatish Balay #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
37a30f8f8cSSatish Balay #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
38a30f8f8cSSatish Balay #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
39a30f8f8cSSatish Balay #endif
40a30f8f8cSSatish Balay 
41a30f8f8cSSatish Balay EXTERN_C_BEGIN
42a30f8f8cSSatish Balay #undef __FUNC__
43a30f8f8cSSatish Balay #define __FUNC__ /*<a name="MatStoreValues_MPIBAIJ"></a>*/"MatStoreValues_MPIBAIJ"
44a30f8f8cSSatish Balay int MatStoreValues_MPISBAIJ(Mat mat)
45a30f8f8cSSatish Balay {
46a30f8f8cSSatish Balay   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
47a30f8f8cSSatish Balay   int         ierr;
48a30f8f8cSSatish Balay 
49a30f8f8cSSatish Balay   PetscFunctionBegin;
50a30f8f8cSSatish Balay   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
51a30f8f8cSSatish Balay   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
52a30f8f8cSSatish Balay   PetscFunctionReturn(0);
53a30f8f8cSSatish Balay }
54a30f8f8cSSatish Balay EXTERN_C_END
55a30f8f8cSSatish Balay 
56a30f8f8cSSatish Balay EXTERN_C_BEGIN
57a30f8f8cSSatish Balay #undef __FUNC__
58a30f8f8cSSatish Balay #define __FUNC__ /*<a name="MatRetrieveValues_MPIBAIJ"></a>*/"MatRetrieveValues_MPIBAIJ"
59a30f8f8cSSatish Balay int MatRetrieveValues_MPISBAIJ(Mat mat)
60a30f8f8cSSatish Balay {
61a30f8f8cSSatish Balay   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
62a30f8f8cSSatish Balay   int         ierr;
63a30f8f8cSSatish Balay 
64a30f8f8cSSatish Balay   PetscFunctionBegin;
65a30f8f8cSSatish Balay   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
66a30f8f8cSSatish Balay   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
67a30f8f8cSSatish Balay   PetscFunctionReturn(0);
68a30f8f8cSSatish Balay }
69a30f8f8cSSatish Balay EXTERN_C_END
70a30f8f8cSSatish Balay 
71a30f8f8cSSatish Balay /*
72a30f8f8cSSatish Balay      Local utility routine that creates a mapping from the global column
73a30f8f8cSSatish Balay    number to the local number in the off-diagonal part of the local
74a30f8f8cSSatish Balay    storage of the matrix.  This is done in a non scable way since the
75a30f8f8cSSatish Balay    length of colmap equals the global matrix length.
76a30f8f8cSSatish Balay */
77a30f8f8cSSatish Balay #undef __FUNC__
78a30f8f8cSSatish Balay #define __FUNC__ /*<a name="CreateColmap_MPISBAIJ_Private"></a>*/"CreateColmap_MPISBAIJ_Private"
79a30f8f8cSSatish Balay static int CreateColmap_MPISBAIJ_Private(Mat mat)
80a30f8f8cSSatish Balay {
81a30f8f8cSSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
82a30f8f8cSSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data;
83a30f8f8cSSatish Balay   int         nbs = B->nbs,i,bs=B->bs,ierr;
84a30f8f8cSSatish Balay 
85a30f8f8cSSatish Balay   PetscFunctionBegin;
86a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE)
87a30f8f8cSSatish Balay   ierr = PetscTableCreate(baij->nbs/5,&baij->colmap);CHKERRQ(ierr);
88a30f8f8cSSatish Balay   for (i=0; i<nbs; i++){
89a30f8f8cSSatish Balay     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
90a30f8f8cSSatish Balay   }
91a30f8f8cSSatish Balay #else
92a30f8f8cSSatish Balay   baij->colmap = (int*)PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
93a30f8f8cSSatish Balay   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
94a30f8f8cSSatish Balay   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr);
95a30f8f8cSSatish Balay   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
96a30f8f8cSSatish Balay #endif
97a30f8f8cSSatish Balay   PetscFunctionReturn(0);
98a30f8f8cSSatish Balay }
99a30f8f8cSSatish Balay 
100a30f8f8cSSatish Balay #define CHUNKSIZE  10
101a30f8f8cSSatish Balay 
102a30f8f8cSSatish Balay #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
103a30f8f8cSSatish Balay { \
104a30f8f8cSSatish Balay  \
105a30f8f8cSSatish Balay     brow = row/bs;  \
106a30f8f8cSSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
107a30f8f8cSSatish Balay     rmax = aimax[brow]; nrow = ailen[brow]; \
108a30f8f8cSSatish Balay       bcol = col/bs; \
109a30f8f8cSSatish Balay       ridx = row % bs; cidx = col % bs; \
110a30f8f8cSSatish Balay       low = 0; high = nrow; \
111a30f8f8cSSatish Balay       while (high-low > 3) { \
112a30f8f8cSSatish Balay         t = (low+high)/2; \
113a30f8f8cSSatish Balay         if (rp[t] > bcol) high = t; \
114a30f8f8cSSatish Balay         else              low  = t; \
115a30f8f8cSSatish Balay       } \
116a30f8f8cSSatish Balay       for (_i=low; _i<high; _i++) { \
117a30f8f8cSSatish Balay         if (rp[_i] > bcol) break; \
118a30f8f8cSSatish Balay         if (rp[_i] == bcol) { \
119a30f8f8cSSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
120a30f8f8cSSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
121a30f8f8cSSatish Balay           else                    *bap  = value;  \
122a30f8f8cSSatish Balay           goto a_noinsert; \
123a30f8f8cSSatish Balay         } \
124a30f8f8cSSatish Balay       } \
125a30f8f8cSSatish Balay       if (a->nonew == 1) goto a_noinsert; \
126a30f8f8cSSatish Balay       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
127a30f8f8cSSatish Balay       if (nrow >= rmax) { \
128a30f8f8cSSatish Balay         /* there is no extra room in row, therefore enlarge */ \
129a30f8f8cSSatish Balay         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
130a30f8f8cSSatish Balay         MatScalar *new_a; \
131a30f8f8cSSatish Balay  \
132a30f8f8cSSatish Balay         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
133a30f8f8cSSatish Balay  \
134a30f8f8cSSatish Balay         /* malloc new storage space */ \
135a30f8f8cSSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
136a30f8f8cSSatish Balay         new_a   = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \
137a30f8f8cSSatish Balay         new_j   = (int*)(new_a + bs2*new_nz); \
138a30f8f8cSSatish Balay         new_i   = new_j + new_nz; \
139a30f8f8cSSatish Balay  \
140a30f8f8cSSatish Balay         /* copy over old data into new slots */ \
141a30f8f8cSSatish Balay         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
142a30f8f8cSSatish Balay         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
143a30f8f8cSSatish Balay         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
144a30f8f8cSSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
145a30f8f8cSSatish Balay         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
146a30f8f8cSSatish Balay         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
147a30f8f8cSSatish Balay         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \
148a30f8f8cSSatish Balay         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
149a30f8f8cSSatish Balay                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
150a30f8f8cSSatish Balay         /* free up old matrix storage */ \
151a30f8f8cSSatish Balay         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
152a30f8f8cSSatish Balay         if (!a->singlemalloc) { \
153a30f8f8cSSatish Balay           ierr = PetscFree(a->i);CHKERRQ(ierr); \
154a30f8f8cSSatish Balay           ierr = PetscFree(a->j);CHKERRQ(ierr);\
155a30f8f8cSSatish Balay         } \
156a30f8f8cSSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
157a30f8f8cSSatish Balay         a->singlemalloc = PETSC_TRUE; \
158a30f8f8cSSatish Balay  \
159a30f8f8cSSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
160a30f8f8cSSatish Balay         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
161a30f8f8cSSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
162a30f8f8cSSatish Balay         a->s_maxnz += bs2*CHUNKSIZE; \
163a30f8f8cSSatish Balay         a->reallocs++; \
164a30f8f8cSSatish Balay         a->s_nz++; \
165a30f8f8cSSatish Balay       } \
166a30f8f8cSSatish Balay       N = nrow++ - 1;  \
167a30f8f8cSSatish Balay       /* shift up all the later entries in this row */ \
168a30f8f8cSSatish Balay       for (ii=N; ii>=_i; ii--) { \
169a30f8f8cSSatish Balay         rp[ii+1] = rp[ii]; \
170a30f8f8cSSatish Balay         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
171a30f8f8cSSatish Balay       } \
172a30f8f8cSSatish Balay       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
173a30f8f8cSSatish Balay       rp[_i]                      = bcol;  \
174a30f8f8cSSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
175a30f8f8cSSatish Balay       a_noinsert:; \
176a30f8f8cSSatish Balay     ailen[brow] = nrow; \
177a30f8f8cSSatish Balay }
178a30f8f8cSSatish Balay #ifndef MatSetValues_SeqBAIJ_B_Private
179a30f8f8cSSatish Balay #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
180a30f8f8cSSatish Balay { \
181a30f8f8cSSatish Balay     brow = row/bs;  \
182a30f8f8cSSatish Balay     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
183a30f8f8cSSatish Balay     rmax = bimax[brow]; nrow = bilen[brow]; \
184a30f8f8cSSatish Balay       bcol = col/bs; \
185a30f8f8cSSatish Balay       ridx = row % bs; cidx = col % bs; \
186a30f8f8cSSatish Balay       low = 0; high = nrow; \
187a30f8f8cSSatish Balay       while (high-low > 3) { \
188a30f8f8cSSatish Balay         t = (low+high)/2; \
189a30f8f8cSSatish Balay         if (rp[t] > bcol) high = t; \
190a30f8f8cSSatish Balay         else              low  = t; \
191a30f8f8cSSatish Balay       } \
192a30f8f8cSSatish Balay       for (_i=low; _i<high; _i++) { \
193a30f8f8cSSatish Balay         if (rp[_i] > bcol) break; \
194a30f8f8cSSatish Balay         if (rp[_i] == bcol) { \
195a30f8f8cSSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
196a30f8f8cSSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
197a30f8f8cSSatish Balay           else                    *bap  = value;  \
198a30f8f8cSSatish Balay           goto b_noinsert; \
199a30f8f8cSSatish Balay         } \
200a30f8f8cSSatish Balay       } \
201a30f8f8cSSatish Balay       if (b->nonew == 1) goto b_noinsert; \
202a30f8f8cSSatish Balay       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
203a30f8f8cSSatish Balay       if (nrow >= rmax) { \
204a30f8f8cSSatish Balay         /* there is no extra room in row, therefore enlarge */ \
205a30f8f8cSSatish Balay         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
206a30f8f8cSSatish Balay         MatScalar *new_a; \
207a30f8f8cSSatish Balay  \
208a30f8f8cSSatish Balay         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
209a30f8f8cSSatish Balay  \
210a30f8f8cSSatish Balay         /* malloc new storage space */ \
211a30f8f8cSSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
212a30f8f8cSSatish Balay         new_a   = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \
213a30f8f8cSSatish Balay         new_j   = (int*)(new_a + bs2*new_nz); \
214a30f8f8cSSatish Balay         new_i   = new_j + new_nz; \
215a30f8f8cSSatish Balay  \
216a30f8f8cSSatish Balay         /* copy over old data into new slots */ \
217a30f8f8cSSatish Balay         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
218a30f8f8cSSatish Balay         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
219a30f8f8cSSatish Balay         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
220a30f8f8cSSatish Balay         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
221a30f8f8cSSatish Balay         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
222a30f8f8cSSatish Balay         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
223a30f8f8cSSatish Balay         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
224a30f8f8cSSatish Balay         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
225a30f8f8cSSatish Balay                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
226a30f8f8cSSatish Balay         /* free up old matrix storage */ \
227a30f8f8cSSatish Balay         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
228a30f8f8cSSatish Balay         if (!b->singlemalloc) { \
229a30f8f8cSSatish Balay           ierr = PetscFree(b->i);CHKERRQ(ierr); \
230a30f8f8cSSatish Balay           ierr = PetscFree(b->j);CHKERRQ(ierr); \
231a30f8f8cSSatish Balay         } \
232a30f8f8cSSatish Balay         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
233a30f8f8cSSatish Balay         b->singlemalloc = PETSC_TRUE; \
234a30f8f8cSSatish Balay  \
235a30f8f8cSSatish Balay         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
236a30f8f8cSSatish Balay         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
237a30f8f8cSSatish Balay         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
238a30f8f8cSSatish Balay         b->maxnz += bs2*CHUNKSIZE; \
239a30f8f8cSSatish Balay         b->reallocs++; \
240a30f8f8cSSatish Balay         b->nz++; \
241a30f8f8cSSatish Balay       } \
242a30f8f8cSSatish Balay       N = nrow++ - 1;  \
243a30f8f8cSSatish Balay       /* shift up all the later entries in this row */ \
244a30f8f8cSSatish Balay       for (ii=N; ii>=_i; ii--) { \
245a30f8f8cSSatish Balay         rp[ii+1] = rp[ii]; \
246a30f8f8cSSatish Balay         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
247a30f8f8cSSatish Balay       } \
248a30f8f8cSSatish Balay       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
249a30f8f8cSSatish Balay       rp[_i]                      = bcol;  \
250a30f8f8cSSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
251a30f8f8cSSatish Balay       b_noinsert:; \
252a30f8f8cSSatish Balay     bilen[brow] = nrow; \
253a30f8f8cSSatish Balay }
254a30f8f8cSSatish Balay #endif
255a30f8f8cSSatish Balay 
256a30f8f8cSSatish Balay #if defined(PETSC_USE_MAT_SINGLE)
257a30f8f8cSSatish Balay #undef __FUNC__
258a30f8f8cSSatish Balay #define __FUNC__ /*<a name="MatSetValues_MPISBAIJ"></a>*/"MatSetValues_MPISBAIJ"
259a30f8f8cSSatish Balay int MatSetValues_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
260a30f8f8cSSatish Balay {
261a30f8f8cSSatish Balay   Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data;
262a30f8f8cSSatish Balay   int         ierr,i,N = m*n;
263a30f8f8cSSatish Balay   MatScalar   *vsingle;
264a30f8f8cSSatish Balay 
265a30f8f8cSSatish Balay   PetscFunctionBegin;
266a30f8f8cSSatish Balay   if (N > b->setvalueslen) {
267a30f8f8cSSatish Balay     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
268a30f8f8cSSatish Balay     b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy);
269a30f8f8cSSatish Balay     b->setvalueslen  = N;
270a30f8f8cSSatish Balay   }
271a30f8f8cSSatish Balay   vsingle = b->setvaluescopy;
272a30f8f8cSSatish Balay 
273a30f8f8cSSatish Balay   for (i=0; i<N; i++) {
274a30f8f8cSSatish Balay     vsingle[i] = v[i];
275a30f8f8cSSatish Balay   }
276a30f8f8cSSatish Balay   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
277a30f8f8cSSatish Balay   PetscFunctionReturn(0);
278a30f8f8cSSatish Balay }
279a30f8f8cSSatish Balay 
280a30f8f8cSSatish Balay #undef __FUNC__
281a30f8f8cSSatish Balay #define __FUNC__ /*<a name="MatSetValuesBlocked_MPISBAIJ"></a>*/"MatSetValuesBlocked_MPISBAIJ"
282a30f8f8cSSatish Balay int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
283a30f8f8cSSatish Balay {
284a30f8f8cSSatish Balay   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
285a30f8f8cSSatish Balay   int         ierr,i,N = m*n*b->bs2;
286a30f8f8cSSatish Balay   MatScalar   *vsingle;
287a30f8f8cSSatish Balay 
288a30f8f8cSSatish Balay   PetscFunctionBegin;
289a30f8f8cSSatish Balay   if (N > b->setvalueslen) {
290a30f8f8cSSatish Balay     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
291a30f8f8cSSatish Balay     b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy);
292a30f8f8cSSatish Balay     b->setvalueslen  = N;
293a30f8f8cSSatish Balay   }
294a30f8f8cSSatish Balay   vsingle = b->setvaluescopy;
295a30f8f8cSSatish Balay   for (i=0; i<N; i++) {
296a30f8f8cSSatish Balay     vsingle[i] = v[i];
297a30f8f8cSSatish Balay   }
298a30f8f8cSSatish Balay   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
299a30f8f8cSSatish Balay   PetscFunctionReturn(0);
300a30f8f8cSSatish Balay }
301a30f8f8cSSatish Balay 
302a30f8f8cSSatish Balay #undef __FUNC__
303a30f8f8cSSatish Balay #define __FUNC__ /*<a name="MatSetValues_MPISBAIJ_HT"></a>*/"MatSetValues_MPISBAIJ_HT"
304a30f8f8cSSatish Balay int MatSetValues_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
305a30f8f8cSSatish Balay {
306a30f8f8cSSatish Balay   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
307a30f8f8cSSatish Balay   int         ierr,i,N = m*n;
308a30f8f8cSSatish Balay   MatScalar   *vsingle;
309a30f8f8cSSatish Balay 
310a30f8f8cSSatish Balay   PetscFunctionBegin;
311a30f8f8cSSatish Balay   if (N > b->setvalueslen) {
312a30f8f8cSSatish Balay     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
313a30f8f8cSSatish Balay     b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy);
314a30f8f8cSSatish Balay     b->setvalueslen  = N;
315a30f8f8cSSatish Balay   }
316a30f8f8cSSatish Balay   vsingle = b->setvaluescopy;
317a30f8f8cSSatish Balay   for (i=0; i<N; i++) {
318a30f8f8cSSatish Balay     vsingle[i] = v[i];
319a30f8f8cSSatish Balay   }
320a30f8f8cSSatish Balay   ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
321a30f8f8cSSatish Balay   PetscFunctionReturn(0);
322a30f8f8cSSatish Balay }
323a30f8f8cSSatish Balay 
324a30f8f8cSSatish Balay #undef __FUNC__
325a30f8f8cSSatish Balay #define __FUNC__ /*<a name="MatSetValuesBlocked_MPISBAIJ_HT"></a>*/"MatSetValuesBlocked_MPISBAIJ_HT"
326a30f8f8cSSatish Balay int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
327a30f8f8cSSatish Balay {
328a30f8f8cSSatish Balay   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
329a30f8f8cSSatish Balay   int         ierr,i,N = m*n*b->bs2;
330a30f8f8cSSatish Balay   MatScalar   *vsingle;
331a30f8f8cSSatish Balay 
332a30f8f8cSSatish Balay   PetscFunctionBegin;
333a30f8f8cSSatish Balay   if (N > b->setvalueslen) {
334a30f8f8cSSatish Balay     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
335a30f8f8cSSatish Balay     b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy);
336a30f8f8cSSatish Balay     b->setvalueslen  = N;
337a30f8f8cSSatish Balay   }
338a30f8f8cSSatish Balay   vsingle = b->setvaluescopy;
339a30f8f8cSSatish Balay   for (i=0; i<N; i++) {
340a30f8f8cSSatish Balay     vsingle[i] = v[i];
341a30f8f8cSSatish Balay   }
342a30f8f8cSSatish Balay   ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
343a30f8f8cSSatish Balay   PetscFunctionReturn(0);
344a30f8f8cSSatish Balay }
345a30f8f8cSSatish Balay #endif
346a30f8f8cSSatish Balay 
347a30f8f8cSSatish Balay /* Only add/insert a(i,j) with i<=j (blocks).
348a30f8f8cSSatish Balay    Any a(i,j) with i>j input by user is ingored.
349a30f8f8cSSatish Balay */
350a30f8f8cSSatish Balay #undef __FUNC__
351a30f8f8cSSatish Balay #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ"></a>*/"MatSetValues_MPIBAIJ"
352a30f8f8cSSatish Balay int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
353a30f8f8cSSatish Balay {
354a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
355a30f8f8cSSatish Balay   MatScalar   value;
356a30f8f8cSSatish Balay   int         ierr,i,j,row,col;
357a30f8f8cSSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
358a30f8f8cSSatish Balay   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
359a30f8f8cSSatish Balay   int         cend_orig=baij->cend_bs,bs=baij->bs;
360a30f8f8cSSatish Balay 
361a30f8f8cSSatish Balay   /* Some Variables required in the macro */
362a30f8f8cSSatish Balay   Mat         A = baij->A;
363a30f8f8cSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data;
364a30f8f8cSSatish Balay   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
365a30f8f8cSSatish Balay   MatScalar   *aa=a->a;
366a30f8f8cSSatish Balay 
367a30f8f8cSSatish Balay   Mat         B = baij->B;
368a30f8f8cSSatish Balay   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data;
369a30f8f8cSSatish Balay   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
370a30f8f8cSSatish Balay   MatScalar   *ba=b->a;
371a30f8f8cSSatish Balay 
372a30f8f8cSSatish Balay   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
373a30f8f8cSSatish Balay   int         low,high,t,ridx,cidx,bs2=a->bs2;
374a30f8f8cSSatish Balay   MatScalar   *ap,*bap;
375a30f8f8cSSatish Balay 
376a30f8f8cSSatish Balay   /* for stash */
377a30f8f8cSSatish Balay   int         n_loc, *in_loc;
378a30f8f8cSSatish Balay   MatScalar   *v_loc;
379a30f8f8cSSatish Balay 
380a30f8f8cSSatish Balay   PetscFunctionBegin;
381a30f8f8cSSatish Balay 
382a30f8f8cSSatish Balay   if(!baij->donotstash){
383a30f8f8cSSatish Balay     in_loc = (int*)PetscMalloc(n*sizeof(int)); CHKPTRQ(in_loc);
384a30f8f8cSSatish Balay     v_loc  = (MatScalar*)PetscMalloc(n*sizeof(MatScalar)); CHKPTRQ(v_loc);
385a30f8f8cSSatish Balay   }
386a30f8f8cSSatish Balay 
387a30f8f8cSSatish Balay   for (i=0; i<m; i++) {
388a30f8f8cSSatish Balay     if (im[i] < 0) continue;
389a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
390a30f8f8cSSatish Balay     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
391a30f8f8cSSatish Balay #endif
392a30f8f8cSSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
393a30f8f8cSSatish Balay       row = im[i] - rstart_orig;              /* local row index */
394a30f8f8cSSatish Balay       for (j=0; j<n; j++) {
395a30f8f8cSSatish Balay         if (im[i]/bs > in[j]/bs) continue;    /* ignore low triangular blocks */
396a30f8f8cSSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
397a30f8f8cSSatish Balay           col = in[j] - cstart_orig;          /* local col index */
398a30f8f8cSSatish Balay           brow = row/bs; bcol = col/bs;
399a30f8f8cSSatish Balay           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
400a30f8f8cSSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
401a30f8f8cSSatish Balay           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
402a30f8f8cSSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
403a30f8f8cSSatish Balay         } else if (in[j] < 0) continue;
404a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
405a30f8f8cSSatish Balay         else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");}
406a30f8f8cSSatish Balay #endif
407a30f8f8cSSatish Balay         else {  /* off-diag entry (B) */
408a30f8f8cSSatish Balay           if (mat->was_assembled) {
409a30f8f8cSSatish Balay             if (!baij->colmap) {
410a30f8f8cSSatish Balay               ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr);
411a30f8f8cSSatish Balay             }
412a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE)
413a30f8f8cSSatish Balay             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
414a30f8f8cSSatish Balay             col  = col - 1 + in[j]%bs;
415a30f8f8cSSatish Balay #else
416a30f8f8cSSatish Balay             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
417a30f8f8cSSatish Balay #endif
418a30f8f8cSSatish Balay             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
419a30f8f8cSSatish Balay               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
420a30f8f8cSSatish Balay               col =  in[j];
421a30f8f8cSSatish Balay               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
422a30f8f8cSSatish Balay               B = baij->B;
423a30f8f8cSSatish Balay               b = (Mat_SeqBAIJ*)(B)->data;
424a30f8f8cSSatish Balay               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
425a30f8f8cSSatish Balay               ba=b->a;
426a30f8f8cSSatish Balay             }
427a30f8f8cSSatish Balay           } else col = in[j];
428a30f8f8cSSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
429a30f8f8cSSatish Balay           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
430a30f8f8cSSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
431a30f8f8cSSatish Balay         }
432a30f8f8cSSatish Balay       }
433a30f8f8cSSatish Balay     } else {  /* off processor entry */
434a30f8f8cSSatish Balay       if (!baij->donotstash) {
435a30f8f8cSSatish Balay         n_loc = 0;
436a30f8f8cSSatish Balay         for (j=0; j<n; j++){
437a30f8f8cSSatish Balay           if (im[i]/bs > in[j]/bs) continue; /* ignore low triangular blocks */
438a30f8f8cSSatish Balay           in_loc[n_loc] = in[j];
439a30f8f8cSSatish Balay           if (roworiented) {
440a30f8f8cSSatish Balay             v_loc[n_loc] = v[i*n+j];
441a30f8f8cSSatish Balay           } else {
442a30f8f8cSSatish Balay             v_loc[n_loc] = v[j*m+i];
443a30f8f8cSSatish Balay           }
444a30f8f8cSSatish Balay           n_loc++;
445a30f8f8cSSatish Balay         }
446a30f8f8cSSatish Balay         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
447a30f8f8cSSatish Balay       }
448a30f8f8cSSatish Balay     }
449a30f8f8cSSatish Balay   }
450a30f8f8cSSatish Balay 
451a30f8f8cSSatish Balay   if(!baij->donotstash){
452a30f8f8cSSatish Balay     ierr = PetscFree(in_loc);CHKERRQ(ierr);
453a30f8f8cSSatish Balay     ierr = PetscFree(v_loc);CHKERRQ(ierr);
454a30f8f8cSSatish Balay   }
455a30f8f8cSSatish Balay   PetscFunctionReturn(0);
456a30f8f8cSSatish Balay }
457a30f8f8cSSatish Balay 
458a30f8f8cSSatish Balay #undef __FUNC__
459a30f8f8cSSatish Balay #define __FUNC__ /*<a name="MatSetValuesBlocked_MPISBAIJ"></a>*/"MatSetValuesBlocked_MPISBAIJ"
460a30f8f8cSSatish Balay int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
461a30f8f8cSSatish Balay {
462a30f8f8cSSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
463a30f8f8cSSatish Balay   MatScalar   *value,*barray=baij->barray;
464a30f8f8cSSatish Balay   int         ierr,i,j,ii,jj,row,col;
465a30f8f8cSSatish Balay   int         roworiented = baij->roworiented,rstart=baij->rstart ;
466a30f8f8cSSatish Balay   int         rend=baij->rend,cstart=baij->cstart,stepval;
467a30f8f8cSSatish Balay   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
468a30f8f8cSSatish Balay 
469a30f8f8cSSatish Balay   PetscFunctionBegin;
470a30f8f8cSSatish Balay   if(!barray) {
471a30f8f8cSSatish Balay     baij->barray = barray = (MatScalar*)PetscMalloc(bs2*sizeof(MatScalar));CHKPTRQ(barray);
472a30f8f8cSSatish Balay   }
473a30f8f8cSSatish Balay 
474a30f8f8cSSatish Balay   if (roworiented) {
475a30f8f8cSSatish Balay     stepval = (n-1)*bs;
476a30f8f8cSSatish Balay   } else {
477a30f8f8cSSatish Balay     stepval = (m-1)*bs;
478a30f8f8cSSatish Balay   }
479a30f8f8cSSatish Balay   for (i=0; i<m; i++) {
480a30f8f8cSSatish Balay     if (im[i] < 0) continue;
481a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
482a30f8f8cSSatish Balay     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs);
483a30f8f8cSSatish Balay #endif
484a30f8f8cSSatish Balay     if (im[i] >= rstart && im[i] < rend) {
485a30f8f8cSSatish Balay       row = im[i] - rstart;
486a30f8f8cSSatish Balay       for (j=0; j<n; j++) {
487a30f8f8cSSatish Balay         /* If NumCol = 1 then a copy is not required */
488a30f8f8cSSatish Balay         if ((roworiented) && (n == 1)) {
489a30f8f8cSSatish Balay           barray = v + i*bs2;
490a30f8f8cSSatish Balay         } else if((!roworiented) && (m == 1)) {
491a30f8f8cSSatish Balay           barray = v + j*bs2;
492a30f8f8cSSatish Balay         } else { /* Here a copy is required */
493a30f8f8cSSatish Balay           if (roworiented) {
494a30f8f8cSSatish Balay             value = v + i*(stepval+bs)*bs + j*bs;
495a30f8f8cSSatish Balay           } else {
496a30f8f8cSSatish Balay             value = v + j*(stepval+bs)*bs + i*bs;
497a30f8f8cSSatish Balay           }
498a30f8f8cSSatish Balay           for (ii=0; ii<bs; ii++,value+=stepval) {
499a30f8f8cSSatish Balay             for (jj=0; jj<bs; jj++) {
500a30f8f8cSSatish Balay               *barray++  = *value++;
501a30f8f8cSSatish Balay             }
502a30f8f8cSSatish Balay           }
503a30f8f8cSSatish Balay           barray -=bs2;
504a30f8f8cSSatish Balay         }
505a30f8f8cSSatish Balay 
506a30f8f8cSSatish Balay         if (in[j] >= cstart && in[j] < cend){
507a30f8f8cSSatish Balay           col  = in[j] - cstart;
508a30f8f8cSSatish Balay           ierr = MatSetValuesBlocked_SeqSBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
509a30f8f8cSSatish Balay         }
510a30f8f8cSSatish Balay         else if (in[j] < 0) continue;
511a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
512a30f8f8cSSatish Balay         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);}
513a30f8f8cSSatish Balay #endif
514a30f8f8cSSatish Balay         else {
515a30f8f8cSSatish Balay           if (mat->was_assembled) {
516a30f8f8cSSatish Balay             if (!baij->colmap) {
517a30f8f8cSSatish Balay               ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr);
518a30f8f8cSSatish Balay             }
519a30f8f8cSSatish Balay 
520a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
521a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE)
522a30f8f8cSSatish Balay             { int data;
523a30f8f8cSSatish Balay               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
524a30f8f8cSSatish Balay               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");
525a30f8f8cSSatish Balay             }
526a30f8f8cSSatish Balay #else
527a30f8f8cSSatish Balay             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");
528a30f8f8cSSatish Balay #endif
529a30f8f8cSSatish Balay #endif
530a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE)
531a30f8f8cSSatish Balay 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
532a30f8f8cSSatish Balay             col  = (col - 1)/bs;
533a30f8f8cSSatish Balay #else
534a30f8f8cSSatish Balay             col = (baij->colmap[in[j]] - 1)/bs;
535a30f8f8cSSatish Balay #endif
536a30f8f8cSSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
537c8407628SSatish Balay               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
538a30f8f8cSSatish Balay               col =  in[j];
539a30f8f8cSSatish Balay             }
540a30f8f8cSSatish Balay           }
541a30f8f8cSSatish Balay           else col = in[j];
542a30f8f8cSSatish Balay           ierr = MatSetValuesBlocked_SeqSBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
543a30f8f8cSSatish Balay         }
544a30f8f8cSSatish Balay       }
545a30f8f8cSSatish Balay     } else {
546a30f8f8cSSatish Balay       if (!baij->donotstash) {
547a30f8f8cSSatish Balay         if (roworiented) {
548a30f8f8cSSatish Balay           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
549a30f8f8cSSatish Balay         } else {
550a30f8f8cSSatish Balay           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
551a30f8f8cSSatish Balay         }
552a30f8f8cSSatish Balay       }
553a30f8f8cSSatish Balay     }
554a30f8f8cSSatish Balay   }
555a30f8f8cSSatish Balay   PetscFunctionReturn(0);
556a30f8f8cSSatish Balay }
557a30f8f8cSSatish Balay 
558a30f8f8cSSatish Balay #define HASH_KEY 0.6180339887
559a30f8f8cSSatish Balay #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
560a30f8f8cSSatish Balay /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
561a30f8f8cSSatish Balay /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
562a30f8f8cSSatish Balay #undef __FUNC__
563a30f8f8cSSatish Balay #define __FUNC__ /*<a name="MatSetValues_MPISBAIJ_HT_MatScalar"></a>*/"MatSetValues_MPISBAIJ_HT_MatScalar"
564a30f8f8cSSatish Balay int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
565a30f8f8cSSatish Balay {
566a30f8f8cSSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
567a30f8f8cSSatish Balay   int         ierr,i,j,row,col;
568a30f8f8cSSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
569a30f8f8cSSatish Balay   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
570a30f8f8cSSatish Balay   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
571a30f8f8cSSatish Balay   PetscReal   tmp;
572a30f8f8cSSatish Balay   MatScalar   **HD = baij->hd,value;
573a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
574a30f8f8cSSatish Balay   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
575a30f8f8cSSatish Balay #endif
576a30f8f8cSSatish Balay 
577a30f8f8cSSatish Balay   PetscFunctionBegin;
578a30f8f8cSSatish Balay 
579a30f8f8cSSatish Balay   for (i=0; i<m; i++) {
580a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
581a30f8f8cSSatish Balay     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
582a30f8f8cSSatish Balay     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
583a30f8f8cSSatish Balay #endif
584a30f8f8cSSatish Balay       row = im[i];
585a30f8f8cSSatish Balay     if (row >= rstart_orig && row < rend_orig) {
586a30f8f8cSSatish Balay       for (j=0; j<n; j++) {
587a30f8f8cSSatish Balay         col = in[j];
588a30f8f8cSSatish Balay         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
589a30f8f8cSSatish Balay         /* Look up into the Hash Table */
590a30f8f8cSSatish Balay         key = (row/bs)*Nbs+(col/bs)+1;
591a30f8f8cSSatish Balay         h1  = HASH(size,key,tmp);
592a30f8f8cSSatish Balay 
593a30f8f8cSSatish Balay 
594a30f8f8cSSatish Balay         idx = h1;
595a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
596a30f8f8cSSatish Balay         insert_ct++;
597a30f8f8cSSatish Balay         total_ct++;
598a30f8f8cSSatish Balay         if (HT[idx] != key) {
599a30f8f8cSSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
600a30f8f8cSSatish Balay           if (idx == size) {
601a30f8f8cSSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
602a30f8f8cSSatish Balay             if (idx == h1) {
603a30f8f8cSSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
604a30f8f8cSSatish Balay             }
605a30f8f8cSSatish Balay           }
606a30f8f8cSSatish Balay         }
607a30f8f8cSSatish Balay #else
608a30f8f8cSSatish Balay         if (HT[idx] != key) {
609a30f8f8cSSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
610a30f8f8cSSatish Balay           if (idx == size) {
611a30f8f8cSSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
612a30f8f8cSSatish Balay             if (idx == h1) {
613a30f8f8cSSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
614a30f8f8cSSatish Balay             }
615a30f8f8cSSatish Balay           }
616a30f8f8cSSatish Balay         }
617a30f8f8cSSatish Balay #endif
618a30f8f8cSSatish Balay         /* A HASH table entry is found, so insert the values at the correct address */
619a30f8f8cSSatish Balay         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
620a30f8f8cSSatish Balay         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
621a30f8f8cSSatish Balay       }
622a30f8f8cSSatish Balay     } else {
623a30f8f8cSSatish Balay       if (!baij->donotstash) {
624a30f8f8cSSatish Balay         if (roworiented) {
625a30f8f8cSSatish Balay           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
626a30f8f8cSSatish Balay         } else {
627a30f8f8cSSatish Balay           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
628a30f8f8cSSatish Balay         }
629a30f8f8cSSatish Balay       }
630a30f8f8cSSatish Balay     }
631a30f8f8cSSatish Balay   }
632a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
633a30f8f8cSSatish Balay   baij->ht_total_ct = total_ct;
634a30f8f8cSSatish Balay   baij->ht_insert_ct = insert_ct;
635a30f8f8cSSatish Balay #endif
636a30f8f8cSSatish Balay   PetscFunctionReturn(0);
637a30f8f8cSSatish Balay }
638a30f8f8cSSatish Balay 
639a30f8f8cSSatish Balay #undef __FUNC__
640a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPISBAIJ_HT_MatScalar"
641a30f8f8cSSatish Balay int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
642a30f8f8cSSatish Balay {
643a30f8f8cSSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
644a30f8f8cSSatish Balay   int         ierr,i,j,ii,jj,row,col;
645a30f8f8cSSatish Balay   int         roworiented = baij->roworiented,rstart=baij->rstart ;
646a30f8f8cSSatish Balay   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
647a30f8f8cSSatish Balay   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
648a30f8f8cSSatish Balay   PetscReal   tmp;
649a30f8f8cSSatish Balay   MatScalar   **HD = baij->hd,*baij_a;
650a30f8f8cSSatish Balay   MatScalar   *v_t,*value;
651a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
652a30f8f8cSSatish Balay   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
653a30f8f8cSSatish Balay #endif
654a30f8f8cSSatish Balay 
655a30f8f8cSSatish Balay   PetscFunctionBegin;
656a30f8f8cSSatish Balay 
657a30f8f8cSSatish Balay   if (roworiented) {
658a30f8f8cSSatish Balay     stepval = (n-1)*bs;
659a30f8f8cSSatish Balay   } else {
660a30f8f8cSSatish Balay     stepval = (m-1)*bs;
661a30f8f8cSSatish Balay   }
662a30f8f8cSSatish Balay   for (i=0; i<m; i++) {
663a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
664a30f8f8cSSatish Balay     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
665a30f8f8cSSatish Balay     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
666a30f8f8cSSatish Balay #endif
667a30f8f8cSSatish Balay     row   = im[i];
668a30f8f8cSSatish Balay     v_t   = v + i*bs2;
669a30f8f8cSSatish Balay     if (row >= rstart && row < rend) {
670a30f8f8cSSatish Balay       for (j=0; j<n; j++) {
671a30f8f8cSSatish Balay         col = in[j];
672a30f8f8cSSatish Balay 
673a30f8f8cSSatish Balay         /* Look up into the Hash Table */
674a30f8f8cSSatish Balay         key = row*Nbs+col+1;
675a30f8f8cSSatish Balay         h1  = HASH(size,key,tmp);
676a30f8f8cSSatish Balay 
677a30f8f8cSSatish Balay         idx = h1;
678a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
679a30f8f8cSSatish Balay         total_ct++;
680a30f8f8cSSatish Balay         insert_ct++;
681a30f8f8cSSatish Balay        if (HT[idx] != key) {
682a30f8f8cSSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
683a30f8f8cSSatish Balay           if (idx == size) {
684a30f8f8cSSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
685a30f8f8cSSatish Balay             if (idx == h1) {
686a30f8f8cSSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
687a30f8f8cSSatish Balay             }
688a30f8f8cSSatish Balay           }
689a30f8f8cSSatish Balay         }
690a30f8f8cSSatish Balay #else
691a30f8f8cSSatish Balay         if (HT[idx] != key) {
692a30f8f8cSSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
693a30f8f8cSSatish Balay           if (idx == size) {
694a30f8f8cSSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
695a30f8f8cSSatish Balay             if (idx == h1) {
696a30f8f8cSSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
697a30f8f8cSSatish Balay             }
698a30f8f8cSSatish Balay           }
699a30f8f8cSSatish Balay         }
700a30f8f8cSSatish Balay #endif
701a30f8f8cSSatish Balay         baij_a = HD[idx];
702a30f8f8cSSatish Balay         if (roworiented) {
703a30f8f8cSSatish Balay           /*value = v + i*(stepval+bs)*bs + j*bs;*/
704a30f8f8cSSatish Balay           /* value = v + (i*(stepval+bs)+j)*bs; */
705a30f8f8cSSatish Balay           value = v_t;
706a30f8f8cSSatish Balay           v_t  += bs;
707a30f8f8cSSatish Balay           if (addv == ADD_VALUES) {
708a30f8f8cSSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval) {
709a30f8f8cSSatish Balay               for (jj=ii; jj<bs2; jj+=bs) {
710a30f8f8cSSatish Balay                 baij_a[jj]  += *value++;
711a30f8f8cSSatish Balay               }
712a30f8f8cSSatish Balay             }
713a30f8f8cSSatish Balay           } else {
714a30f8f8cSSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval) {
715a30f8f8cSSatish Balay               for (jj=ii; jj<bs2; jj+=bs) {
716a30f8f8cSSatish Balay                 baij_a[jj]  = *value++;
717a30f8f8cSSatish Balay               }
718a30f8f8cSSatish Balay             }
719a30f8f8cSSatish Balay           }
720a30f8f8cSSatish Balay         } else {
721a30f8f8cSSatish Balay           value = v + j*(stepval+bs)*bs + i*bs;
722a30f8f8cSSatish Balay           if (addv == ADD_VALUES) {
723a30f8f8cSSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
724a30f8f8cSSatish Balay               for (jj=0; jj<bs; jj++) {
725a30f8f8cSSatish Balay                 baij_a[jj]  += *value++;
726a30f8f8cSSatish Balay               }
727a30f8f8cSSatish Balay             }
728a30f8f8cSSatish Balay           } else {
729a30f8f8cSSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
730a30f8f8cSSatish Balay               for (jj=0; jj<bs; jj++) {
731a30f8f8cSSatish Balay                 baij_a[jj]  = *value++;
732a30f8f8cSSatish Balay               }
733a30f8f8cSSatish Balay             }
734a30f8f8cSSatish Balay           }
735a30f8f8cSSatish Balay         }
736a30f8f8cSSatish Balay       }
737a30f8f8cSSatish Balay     } else {
738a30f8f8cSSatish Balay       if (!baij->donotstash) {
739a30f8f8cSSatish Balay         if (roworiented) {
740a30f8f8cSSatish Balay           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
741a30f8f8cSSatish Balay         } else {
742a30f8f8cSSatish Balay           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
743a30f8f8cSSatish Balay         }
744a30f8f8cSSatish Balay       }
745a30f8f8cSSatish Balay     }
746a30f8f8cSSatish Balay   }
747a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
748a30f8f8cSSatish Balay   baij->ht_total_ct = total_ct;
749a30f8f8cSSatish Balay   baij->ht_insert_ct = insert_ct;
750a30f8f8cSSatish Balay #endif
751a30f8f8cSSatish Balay   PetscFunctionReturn(0);
752a30f8f8cSSatish Balay }
753a30f8f8cSSatish Balay 
754a30f8f8cSSatish Balay #undef __FUNC__
755a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatGetValues_MPISBAIJ"
756a30f8f8cSSatish Balay int MatGetValues_MPISBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
757a30f8f8cSSatish Balay {
758a30f8f8cSSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
759a30f8f8cSSatish Balay   int        bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
760a30f8f8cSSatish Balay   int        bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
761a30f8f8cSSatish Balay 
762a30f8f8cSSatish Balay   PetscFunctionBegin;
763a30f8f8cSSatish Balay   for (i=0; i<m; i++) {
764a30f8f8cSSatish Balay     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
765a30f8f8cSSatish Balay     if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
766a30f8f8cSSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
767a30f8f8cSSatish Balay       row = idxm[i] - bsrstart;
768a30f8f8cSSatish Balay       for (j=0; j<n; j++) {
769a30f8f8cSSatish Balay         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
770a30f8f8cSSatish Balay         if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
771a30f8f8cSSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
772a30f8f8cSSatish Balay           col = idxn[j] - bscstart;
773c8407628SSatish Balay           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
774a30f8f8cSSatish Balay         } else {
775a30f8f8cSSatish Balay           if (!baij->colmap) {
776a30f8f8cSSatish Balay             ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr);
777a30f8f8cSSatish Balay           }
778a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE)
779a30f8f8cSSatish Balay           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
780a30f8f8cSSatish Balay           data --;
781a30f8f8cSSatish Balay #else
782a30f8f8cSSatish Balay           data = baij->colmap[idxn[j]/bs]-1;
783a30f8f8cSSatish Balay #endif
784a30f8f8cSSatish Balay           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
785a30f8f8cSSatish Balay           else {
786a30f8f8cSSatish Balay             col  = data + idxn[j]%bs;
787c8407628SSatish Balay             ierr = MatGetValues_SeqSBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
788a30f8f8cSSatish Balay           }
789a30f8f8cSSatish Balay         }
790a30f8f8cSSatish Balay       }
791a30f8f8cSSatish Balay     } else {
792a30f8f8cSSatish Balay       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
793a30f8f8cSSatish Balay     }
794a30f8f8cSSatish Balay   }
795a30f8f8cSSatish Balay  PetscFunctionReturn(0);
796a30f8f8cSSatish Balay }
797a30f8f8cSSatish Balay 
798a30f8f8cSSatish Balay #undef __FUNC__
799a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatNorm_MPISBAIJ"
800a30f8f8cSSatish Balay int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
801a30f8f8cSSatish Balay {
802a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
803*bc0ba53dSHong Zhang   /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */
804*bc0ba53dSHong Zhang   /* Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ*)baij->B->data; */
805a30f8f8cSSatish Balay   int        ierr;
806a30f8f8cSSatish Balay   PetscReal  sum[2],*lnorm2;
807a30f8f8cSSatish Balay 
808a30f8f8cSSatish Balay   PetscFunctionBegin;
809a30f8f8cSSatish Balay   if (baij->size == 1) {
810a30f8f8cSSatish Balay     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
811a30f8f8cSSatish Balay   } else {
812a30f8f8cSSatish Balay     if (type == NORM_FROBENIUS) {
813ee237329SHong Zhang       lnorm2 = (double*)PetscMalloc(2*sizeof(double));CHKPTRQ(lnorm2);
814a30f8f8cSSatish Balay       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
815a30f8f8cSSatish Balay       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
816a30f8f8cSSatish Balay       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
817a30f8f8cSSatish Balay       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
818a30f8f8cSSatish Balay       /*
819a30f8f8cSSatish Balay       ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
820a30f8f8cSSatish Balay       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]);
821a30f8f8cSSatish Balay       */
822a30f8f8cSSatish Balay       ierr = MPI_Allreduce(lnorm2,&sum,2,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
823a30f8f8cSSatish Balay       /*
824a30f8f8cSSatish Balay       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]);
825a30f8f8cSSatish Balay       PetscSynchronizedFlush(PETSC_COMM_WORLD); */
826a30f8f8cSSatish Balay 
827a30f8f8cSSatish Balay       *norm = sqrt(sum[0] + 2*sum[1]);
828a30f8f8cSSatish Balay       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
829a30f8f8cSSatish Balay     } else {
830a30f8f8cSSatish Balay       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
831a30f8f8cSSatish Balay     }
832a30f8f8cSSatish Balay   }
833a30f8f8cSSatish Balay   PetscFunctionReturn(0);
834a30f8f8cSSatish Balay }
835a30f8f8cSSatish Balay 
836a30f8f8cSSatish Balay /*
837a30f8f8cSSatish Balay   Creates the hash table, and sets the table
838a30f8f8cSSatish Balay   This table is created only once.
839a30f8f8cSSatish Balay   If new entried need to be added to the matrix
840a30f8f8cSSatish Balay   then the hash table has to be destroyed and
841a30f8f8cSSatish Balay   recreated.
842a30f8f8cSSatish Balay */
843a30f8f8cSSatish Balay #undef __FUNC__
844a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatCreateHashTable_MPISBAIJ_Private"
845a30f8f8cSSatish Balay int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor)
846a30f8f8cSSatish Balay {
847a30f8f8cSSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
848a30f8f8cSSatish Balay   Mat         A = baij->A,B=baij->B;
849a30f8f8cSSatish Balay   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
850a30f8f8cSSatish Balay   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
851a30f8f8cSSatish Balay   int         size,bs2=baij->bs2,rstart=baij->rstart,ierr;
852a30f8f8cSSatish Balay   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
853a30f8f8cSSatish Balay   int         *HT,key;
854a30f8f8cSSatish Balay   MatScalar   **HD;
855a30f8f8cSSatish Balay   PetscReal   tmp;
856a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
857a30f8f8cSSatish Balay   int         ct=0,max=0;
858a30f8f8cSSatish Balay #endif
859a30f8f8cSSatish Balay 
860a30f8f8cSSatish Balay   PetscFunctionBegin;
861a30f8f8cSSatish Balay   baij->ht_size=(int)(factor*nz);
862a30f8f8cSSatish Balay   size = baij->ht_size;
863a30f8f8cSSatish Balay 
864a30f8f8cSSatish Balay   if (baij->ht) {
865a30f8f8cSSatish Balay     PetscFunctionReturn(0);
866a30f8f8cSSatish Balay   }
867a30f8f8cSSatish Balay 
868a30f8f8cSSatish Balay   /* Allocate Memory for Hash Table */
869a30f8f8cSSatish Balay   baij->hd = (MatScalar**)PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1);CHKPTRQ(baij->hd);
870a30f8f8cSSatish Balay   baij->ht = (int*)(baij->hd + size);
871a30f8f8cSSatish Balay   HD = baij->hd;
872a30f8f8cSSatish Balay   HT = baij->ht;
873a30f8f8cSSatish Balay 
874a30f8f8cSSatish Balay 
875a30f8f8cSSatish Balay   ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr);
876a30f8f8cSSatish Balay 
877a30f8f8cSSatish Balay 
878a30f8f8cSSatish Balay   /* Loop Over A */
879a30f8f8cSSatish Balay   for (i=0; i<a->mbs; i++) {
880a30f8f8cSSatish Balay     for (j=ai[i]; j<ai[i+1]; j++) {
881a30f8f8cSSatish Balay       row = i+rstart;
882a30f8f8cSSatish Balay       col = aj[j]+cstart;
883a30f8f8cSSatish Balay 
884a30f8f8cSSatish Balay       key = row*Nbs + col + 1;
885a30f8f8cSSatish Balay       h1  = HASH(size,key,tmp);
886a30f8f8cSSatish Balay       for (k=0; k<size; k++){
887a30f8f8cSSatish Balay         if (HT[(h1+k)%size] == 0.0) {
888a30f8f8cSSatish Balay           HT[(h1+k)%size] = key;
889a30f8f8cSSatish Balay           HD[(h1+k)%size] = a->a + j*bs2;
890a30f8f8cSSatish Balay           break;
891a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
892a30f8f8cSSatish Balay         } else {
893a30f8f8cSSatish Balay           ct++;
894a30f8f8cSSatish Balay #endif
895a30f8f8cSSatish Balay         }
896a30f8f8cSSatish Balay       }
897a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
898a30f8f8cSSatish Balay       if (k> max) max = k;
899a30f8f8cSSatish Balay #endif
900a30f8f8cSSatish Balay     }
901a30f8f8cSSatish Balay   }
902a30f8f8cSSatish Balay   /* Loop Over B */
903a30f8f8cSSatish Balay   for (i=0; i<b->mbs; i++) {
904a30f8f8cSSatish Balay     for (j=bi[i]; j<bi[i+1]; j++) {
905a30f8f8cSSatish Balay       row = i+rstart;
906a30f8f8cSSatish Balay       col = garray[bj[j]];
907a30f8f8cSSatish Balay       key = row*Nbs + col + 1;
908a30f8f8cSSatish Balay       h1  = HASH(size,key,tmp);
909a30f8f8cSSatish Balay       for (k=0; k<size; k++){
910a30f8f8cSSatish Balay         if (HT[(h1+k)%size] == 0.0) {
911a30f8f8cSSatish Balay           HT[(h1+k)%size] = key;
912a30f8f8cSSatish Balay           HD[(h1+k)%size] = b->a + j*bs2;
913a30f8f8cSSatish Balay           break;
914a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
915a30f8f8cSSatish Balay         } else {
916a30f8f8cSSatish Balay           ct++;
917a30f8f8cSSatish Balay #endif
918a30f8f8cSSatish Balay         }
919a30f8f8cSSatish Balay       }
920a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
921a30f8f8cSSatish Balay       if (k> max) max = k;
922a30f8f8cSSatish Balay #endif
923a30f8f8cSSatish Balay     }
924a30f8f8cSSatish Balay   }
925a30f8f8cSSatish Balay 
926a30f8f8cSSatish Balay   /* Print Summary */
927a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
928a30f8f8cSSatish Balay   for (i=0,j=0; i<size; i++) {
929a30f8f8cSSatish Balay     if (HT[i]) {j++;}
930a30f8f8cSSatish Balay   }
931a30f8f8cSSatish Balay   PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max);
932a30f8f8cSSatish Balay #endif
933a30f8f8cSSatish Balay   PetscFunctionReturn(0);
934a30f8f8cSSatish Balay }
935a30f8f8cSSatish Balay 
936a30f8f8cSSatish Balay #undef __FUNC__
937a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatAssemblyBegin_MPISBAIJ"
938a30f8f8cSSatish Balay int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
939a30f8f8cSSatish Balay {
940a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
941a30f8f8cSSatish Balay   int         ierr,nstash,reallocs;
942a30f8f8cSSatish Balay   InsertMode  addv;
943a30f8f8cSSatish Balay 
944a30f8f8cSSatish Balay   PetscFunctionBegin;
945a30f8f8cSSatish Balay   if (baij->donotstash) {
946a30f8f8cSSatish Balay     PetscFunctionReturn(0);
947a30f8f8cSSatish Balay   }
948a30f8f8cSSatish Balay 
949a30f8f8cSSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
950a30f8f8cSSatish Balay   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
951a30f8f8cSSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
952a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
953a30f8f8cSSatish Balay   }
954a30f8f8cSSatish Balay   mat->insertmode = addv; /* in case this processor had no cache */
955a30f8f8cSSatish Balay 
956a30f8f8cSSatish Balay   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
957a30f8f8cSSatish Balay   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
958a30f8f8cSSatish Balay   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
959a30f8f8cSSatish Balay   PLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
960a30f8f8cSSatish Balay   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
961a30f8f8cSSatish Balay   PLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
962a30f8f8cSSatish Balay   PetscFunctionReturn(0);
963a30f8f8cSSatish Balay }
964a30f8f8cSSatish Balay 
965a30f8f8cSSatish Balay #undef __FUNC__
966a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatAssemblyEnd_MPISBAIJ"
967a30f8f8cSSatish Balay int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
968a30f8f8cSSatish Balay {
969a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data;
970a30f8f8cSSatish Balay   Mat_SeqSBAIJ  *a=(Mat_SeqSBAIJ*)baij->A->data;
971a30f8f8cSSatish Balay   Mat_SeqBAIJ  *b=(Mat_SeqBAIJ*)baij->B->data;
972a30f8f8cSSatish Balay   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
973a30f8f8cSSatish Balay   int         *row,*col,other_disassembled;
974a30f8f8cSSatish Balay   PetscTruth  r1,r2,r3;
975a30f8f8cSSatish Balay   MatScalar   *val;
976a30f8f8cSSatish Balay   InsertMode  addv = mat->insertmode;
977a30f8f8cSSatish Balay   int         rank;
978a30f8f8cSSatish Balay 
979a30f8f8cSSatish Balay   PetscFunctionBegin;
980a30f8f8cSSatish Balay   /* remove 2 line below later */
981a30f8f8cSSatish Balay   ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr);
982a30f8f8cSSatish Balay 
983a30f8f8cSSatish Balay   if (!baij->donotstash) {
984a30f8f8cSSatish Balay     while (1) {
985a30f8f8cSSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
986a30f8f8cSSatish Balay       /*
987a30f8f8cSSatish Balay       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg);
988a30f8f8cSSatish Balay       PetscSynchronizedFlush(PETSC_COMM_WORLD);
989a30f8f8cSSatish Balay       */
990a30f8f8cSSatish Balay       if (!flg) break;
991a30f8f8cSSatish Balay 
992a30f8f8cSSatish Balay       for (i=0; i<n;) {
993a30f8f8cSSatish Balay         /* Now identify the consecutive vals belonging to the same row */
994a30f8f8cSSatish Balay         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
995a30f8f8cSSatish Balay         if (j < n) ncols = j-i;
996a30f8f8cSSatish Balay         else       ncols = n-i;
997a30f8f8cSSatish Balay         /* Now assemble all these values with a single function call */
998a30f8f8cSSatish Balay         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
999a30f8f8cSSatish Balay         i = j;
1000a30f8f8cSSatish Balay       }
1001a30f8f8cSSatish Balay     }
1002a30f8f8cSSatish Balay     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
1003a30f8f8cSSatish Balay     /* Now process the block-stash. Since the values are stashed column-oriented,
1004a30f8f8cSSatish Balay        set the roworiented flag to column oriented, and after MatSetValues()
1005a30f8f8cSSatish Balay        restore the original flags */
1006a30f8f8cSSatish Balay     r1 = baij->roworiented;
1007a30f8f8cSSatish Balay     r2 = a->roworiented;
1008a30f8f8cSSatish Balay     r3 = b->roworiented;
1009a30f8f8cSSatish Balay     baij->roworiented = PETSC_FALSE;
1010a30f8f8cSSatish Balay     a->roworiented    = PETSC_FALSE;
1011a30f8f8cSSatish Balay     b->roworiented    = PETSC_FALSE;
1012a30f8f8cSSatish Balay     while (1) {
1013a30f8f8cSSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1014a30f8f8cSSatish Balay       if (!flg) break;
1015a30f8f8cSSatish Balay 
1016a30f8f8cSSatish Balay       for (i=0; i<n;) {
1017a30f8f8cSSatish Balay         /* Now identify the consecutive vals belonging to the same row */
1018a30f8f8cSSatish Balay         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1019a30f8f8cSSatish Balay         if (j < n) ncols = j-i;
1020a30f8f8cSSatish Balay         else       ncols = n-i;
1021a30f8f8cSSatish Balay         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
1022a30f8f8cSSatish Balay         i = j;
1023a30f8f8cSSatish Balay       }
1024a30f8f8cSSatish Balay     }
1025a30f8f8cSSatish Balay     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
1026a30f8f8cSSatish Balay     baij->roworiented = r1;
1027a30f8f8cSSatish Balay     a->roworiented    = r2;
1028a30f8f8cSSatish Balay     b->roworiented    = r3;
1029a30f8f8cSSatish Balay   }
1030a30f8f8cSSatish Balay 
1031a30f8f8cSSatish Balay   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
1032a30f8f8cSSatish Balay   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
1033a30f8f8cSSatish Balay 
1034a30f8f8cSSatish Balay   /* determine if any processor has disassembled, if so we must
1035a30f8f8cSSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
1036a30f8f8cSSatish Balay   /*
1037a30f8f8cSSatish Balay      if nonzero structure of submatrix B cannot change then we know that
1038a30f8f8cSSatish Balay      no processor disassembled thus we can skip this stuff
1039a30f8f8cSSatish Balay   */
1040a30f8f8cSSatish Balay   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
1041a30f8f8cSSatish Balay     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
1042a30f8f8cSSatish Balay     if (mat->was_assembled && !other_disassembled) {
1043c8407628SSatish Balay       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
1044a30f8f8cSSatish Balay     }
1045a30f8f8cSSatish Balay   }
1046a30f8f8cSSatish Balay 
1047a30f8f8cSSatish Balay   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1048a30f8f8cSSatish Balay     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr);
1049a30f8f8cSSatish Balay   }
1050a30f8f8cSSatish Balay   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1051a30f8f8cSSatish Balay   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1052a30f8f8cSSatish Balay 
1053a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
1054a30f8f8cSSatish Balay   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1055a30f8f8cSSatish Balay     PLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((double)baij->ht_total_ct)/baij->ht_insert_ct);
1056a30f8f8cSSatish Balay     baij->ht_total_ct  = 0;
1057a30f8f8cSSatish Balay     baij->ht_insert_ct = 0;
1058a30f8f8cSSatish Balay   }
1059a30f8f8cSSatish Balay #endif
1060a30f8f8cSSatish Balay   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1061c8407628SSatish Balay     ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1062a30f8f8cSSatish Balay     mat->ops->setvalues        = MatSetValues_MPISBAIJ_HT;
1063a30f8f8cSSatish Balay     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT;
1064a30f8f8cSSatish Balay   }
1065a30f8f8cSSatish Balay 
1066a30f8f8cSSatish Balay   if (baij->rowvalues) {
1067a30f8f8cSSatish Balay     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1068a30f8f8cSSatish Balay     baij->rowvalues = 0;
1069a30f8f8cSSatish Balay   }
1070a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1071a30f8f8cSSatish Balay }
1072a30f8f8cSSatish Balay 
1073a30f8f8cSSatish Balay #undef __FUNC__
1074a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatView_MPISBAIJ_ASCIIorDraworSocket"
1075a30f8f8cSSatish Balay static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
1076a30f8f8cSSatish Balay {
1077a30f8f8cSSatish Balay   Mat_MPISBAIJ  *baij = (Mat_MPISBAIJ*)mat->data;
1078a30f8f8cSSatish Balay   int          ierr,format,bs = baij->bs,size = baij->size,rank = baij->rank;
1079a30f8f8cSSatish Balay   PetscTruth   isascii,isdraw;
1080a30f8f8cSSatish Balay 
1081a30f8f8cSSatish Balay   PetscFunctionBegin;
1082a30f8f8cSSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
1083a30f8f8cSSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
1084a30f8f8cSSatish Balay   if (isascii) {
1085a30f8f8cSSatish Balay     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1086a30f8f8cSSatish Balay     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
1087a30f8f8cSSatish Balay       MatInfo info;
1088a30f8f8cSSatish Balay       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
1089a30f8f8cSSatish Balay       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1090a30f8f8cSSatish Balay       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
1091a30f8f8cSSatish Balay               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
1092a30f8f8cSSatish Balay               baij->bs,(int)info.memory);CHKERRQ(ierr);
1093a30f8f8cSSatish Balay       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1094a30f8f8cSSatish Balay       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1095a30f8f8cSSatish Balay       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1096a30f8f8cSSatish Balay       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1097a30f8f8cSSatish Balay       ierr = ViewerFlush(viewer);CHKERRQ(ierr);
1098a30f8f8cSSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1099a30f8f8cSSatish Balay       PetscFunctionReturn(0);
1100a30f8f8cSSatish Balay     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
1101a30f8f8cSSatish Balay       ierr = ViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
1102a30f8f8cSSatish Balay       PetscFunctionReturn(0);
1103a30f8f8cSSatish Balay     }
1104a30f8f8cSSatish Balay   }
1105a30f8f8cSSatish Balay 
1106a30f8f8cSSatish Balay   if (isdraw) {
1107a30f8f8cSSatish Balay     Draw       draw;
1108a30f8f8cSSatish Balay     PetscTruth isnull;
1109a30f8f8cSSatish Balay     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1110a30f8f8cSSatish Balay     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1111a30f8f8cSSatish Balay   }
1112a30f8f8cSSatish Balay 
1113a30f8f8cSSatish Balay   if (size == 1) {
1114a30f8f8cSSatish Balay     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1115a30f8f8cSSatish Balay   } else {
1116a30f8f8cSSatish Balay     /* assemble the entire matrix onto first processor. */
1117a30f8f8cSSatish Balay     Mat         A;
1118a30f8f8cSSatish Balay     Mat_SeqBAIJ *Aloc;
1119a30f8f8cSSatish Balay     int         M = baij->M,N = baij->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1120a30f8f8cSSatish Balay     MatScalar   *a;
1121a30f8f8cSSatish Balay 
1122a30f8f8cSSatish Balay     if (!rank) {
1123*bc0ba53dSHong Zhang       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1124a30f8f8cSSatish Balay     } else {
1125a30f8f8cSSatish Balay       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,M,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1126a30f8f8cSSatish Balay     }
1127a30f8f8cSSatish Balay     PLogObjectParent(mat,A);
1128a30f8f8cSSatish Balay 
1129a30f8f8cSSatish Balay     /* copy over the A part */
1130a30f8f8cSSatish Balay     Aloc  = (Mat_SeqBAIJ*)baij->A->data;
1131a30f8f8cSSatish Balay     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
1132a30f8f8cSSatish Balay     rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
1133a30f8f8cSSatish Balay 
1134a30f8f8cSSatish Balay     for (i=0; i<mbs; i++) {
1135a30f8f8cSSatish Balay       rvals[0] = bs*(baij->rstart + i);
1136a30f8f8cSSatish Balay       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1137a30f8f8cSSatish Balay       for (j=ai[i]; j<ai[i+1]; j++) {
1138a30f8f8cSSatish Balay         col = (baij->cstart+aj[j])*bs;
1139a30f8f8cSSatish Balay         for (k=0; k<bs; k++) {
1140a30f8f8cSSatish Balay           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1141a30f8f8cSSatish Balay           col++; a += bs;
1142a30f8f8cSSatish Balay         }
1143a30f8f8cSSatish Balay       }
1144a30f8f8cSSatish Balay     }
1145a30f8f8cSSatish Balay     /* copy over the B part */
1146a30f8f8cSSatish Balay     Aloc = (Mat_SeqBAIJ*)baij->B->data;
1147a30f8f8cSSatish Balay     ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1148a30f8f8cSSatish Balay     for (i=0; i<mbs; i++) {
1149a30f8f8cSSatish Balay       rvals[0] = bs*(baij->rstart + i);
1150a30f8f8cSSatish Balay       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1151a30f8f8cSSatish Balay       for (j=ai[i]; j<ai[i+1]; j++) {
1152a30f8f8cSSatish Balay         col = baij->garray[aj[j]]*bs;
1153a30f8f8cSSatish Balay         for (k=0; k<bs; k++) {
1154a30f8f8cSSatish Balay           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1155a30f8f8cSSatish Balay           col++; a += bs;
1156a30f8f8cSSatish Balay         }
1157a30f8f8cSSatish Balay       }
1158a30f8f8cSSatish Balay     }
1159a30f8f8cSSatish Balay     ierr = PetscFree(rvals);CHKERRQ(ierr);
1160a30f8f8cSSatish Balay     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1161a30f8f8cSSatish Balay     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1162a30f8f8cSSatish Balay     /*
1163a30f8f8cSSatish Balay        Everyone has to call to draw the matrix since the graphics waits are
1164a30f8f8cSSatish Balay        synchronized across all processors that share the Draw object
1165a30f8f8cSSatish Balay     */
1166a30f8f8cSSatish Balay     if (!rank) {
1167a30f8f8cSSatish Balay       Viewer sviewer;
1168a30f8f8cSSatish Balay       ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
1169a30f8f8cSSatish Balay       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1170a30f8f8cSSatish Balay       ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1171a30f8f8cSSatish Balay     }
1172a30f8f8cSSatish Balay     ierr = MatDestroy(A);CHKERRQ(ierr);
1173a30f8f8cSSatish Balay   }
1174a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1175a30f8f8cSSatish Balay }
1176a30f8f8cSSatish Balay 
1177a30f8f8cSSatish Balay #undef __FUNC__
1178a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatView_MPISBAIJ"
1179a30f8f8cSSatish Balay int MatView_MPISBAIJ(Mat mat,Viewer viewer)
1180a30f8f8cSSatish Balay {
1181a30f8f8cSSatish Balay   int        ierr;
1182a30f8f8cSSatish Balay   PetscTruth isascii,isdraw,issocket,isbinary;
1183a30f8f8cSSatish Balay 
1184a30f8f8cSSatish Balay   PetscFunctionBegin;
1185a30f8f8cSSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
1186a30f8f8cSSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
1187a30f8f8cSSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr);
1188a30f8f8cSSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
1189a30f8f8cSSatish Balay   if (isascii || isdraw || issocket || isbinary) {
1190a30f8f8cSSatish Balay     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1191a30f8f8cSSatish Balay   } else {
1192a30f8f8cSSatish Balay     SETERRQ1(1,1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
1193a30f8f8cSSatish Balay   }
1194a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1195a30f8f8cSSatish Balay }
1196a30f8f8cSSatish Balay 
1197a30f8f8cSSatish Balay #undef __FUNC__
1198a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatDestroy_MPISBAIJ"
1199a30f8f8cSSatish Balay int MatDestroy_MPISBAIJ(Mat mat)
1200a30f8f8cSSatish Balay {
1201a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1202a30f8f8cSSatish Balay   int         ierr;
1203a30f8f8cSSatish Balay 
1204a30f8f8cSSatish Balay   PetscFunctionBegin;
1205a30f8f8cSSatish Balay 
1206a30f8f8cSSatish Balay   if (mat->mapping) {
1207a30f8f8cSSatish Balay     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
1208a30f8f8cSSatish Balay   }
1209a30f8f8cSSatish Balay   if (mat->bmapping) {
1210a30f8f8cSSatish Balay     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
1211a30f8f8cSSatish Balay   }
1212a30f8f8cSSatish Balay   if (mat->rmap) {
1213a30f8f8cSSatish Balay     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
1214a30f8f8cSSatish Balay   }
1215a30f8f8cSSatish Balay   if (mat->cmap) {
1216a30f8f8cSSatish Balay     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
1217a30f8f8cSSatish Balay   }
1218a30f8f8cSSatish Balay #if defined(PETSC_USE_LOG)
1219a30f8f8cSSatish Balay   PLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",baij->M,baij->N);
1220a30f8f8cSSatish Balay #endif
1221a30f8f8cSSatish Balay 
1222a30f8f8cSSatish Balay   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1223a30f8f8cSSatish Balay   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1224a30f8f8cSSatish Balay 
1225a30f8f8cSSatish Balay   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1226a30f8f8cSSatish Balay   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1227a30f8f8cSSatish Balay   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1228a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE)
1229a30f8f8cSSatish Balay   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1230a30f8f8cSSatish Balay #else
1231a30f8f8cSSatish Balay   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1232a30f8f8cSSatish Balay #endif
1233a30f8f8cSSatish Balay   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1234a30f8f8cSSatish Balay   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1235a30f8f8cSSatish Balay   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1236a30f8f8cSSatish Balay   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1237a30f8f8cSSatish Balay   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1238a30f8f8cSSatish Balay   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
1239a30f8f8cSSatish Balay #if defined(PETSC_USE_MAT_SINGLE)
1240a30f8f8cSSatish Balay   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
1241a30f8f8cSSatish Balay #endif
1242a30f8f8cSSatish Balay   ierr = PetscFree(baij);CHKERRQ(ierr);
1243a30f8f8cSSatish Balay   PLogObjectDestroy(mat);
1244a30f8f8cSSatish Balay   PetscHeaderDestroy(mat);
1245a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1246a30f8f8cSSatish Balay }
1247a30f8f8cSSatish Balay 
1248a30f8f8cSSatish Balay #undef __FUNC__
1249a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatMult_MPISBAIJ"
1250a30f8f8cSSatish Balay int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
1251a30f8f8cSSatish Balay {
1252a30f8f8cSSatish Balay   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1253a30f8f8cSSatish Balay   int         ierr,nt;
1254a30f8f8cSSatish Balay 
1255a30f8f8cSSatish Balay   PetscFunctionBegin;
1256a30f8f8cSSatish Balay   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1257a30f8f8cSSatish Balay   if (nt != a->n) {
1258a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
1259a30f8f8cSSatish Balay   }
1260a30f8f8cSSatish Balay   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1261a30f8f8cSSatish Balay   if (nt != a->m) {
1262a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy");
1263a30f8f8cSSatish Balay   }
1264a30f8f8cSSatish Balay   /* do subdiagonal part */
1265a30f8f8cSSatish Balay   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1266a30f8f8cSSatish Balay   /* send it on its way */
1267a30f8f8cSSatish Balay   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1268a30f8f8cSSatish Balay   /* do local part */
1269a30f8f8cSSatish Balay   ierr = (*a->B->ops->mult)(a->B,xx,yy);CHKERRQ(ierr);
1270a30f8f8cSSatish Balay   ierr = (*a->A->ops->multadd)(a->A,xx,yy,yy);CHKERRQ(ierr);
1271a30f8f8cSSatish Balay   /* receive remote parts */
1272a30f8f8cSSatish Balay   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1273a30f8f8cSSatish Balay 
1274a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1275a30f8f8cSSatish Balay }
1276a30f8f8cSSatish Balay 
1277a30f8f8cSSatish Balay #undef __FUNC__
1278a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatMultAdd_MPISBAIJ"
1279a30f8f8cSSatish Balay int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1280a30f8f8cSSatish Balay {
1281a30f8f8cSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1282a30f8f8cSSatish Balay   int        ierr;
1283a30f8f8cSSatish Balay 
1284a30f8f8cSSatish Balay   PetscFunctionBegin;
1285a30f8f8cSSatish Balay   /* do subdiagonal part */
1286a30f8f8cSSatish Balay   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1287a30f8f8cSSatish Balay   /* send it on its way */
1288a30f8f8cSSatish Balay   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1289a30f8f8cSSatish Balay   /* do local part */
1290a30f8f8cSSatish Balay   ierr = (*a->B->ops->multadd)(a->B,xx,yy,zz);CHKERRQ(ierr);
1291a30f8f8cSSatish Balay   ierr = (*a->A->ops->multadd)(a->A,xx,zz,zz);CHKERRQ(ierr);
1292a30f8f8cSSatish Balay   /* receive remote parts */
1293a30f8f8cSSatish Balay   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1294a30f8f8cSSatish Balay 
1295a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1296a30f8f8cSSatish Balay }
1297a30f8f8cSSatish Balay 
1298a30f8f8cSSatish Balay #undef __FUNC__
1299a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatMultTranspose_MPISBAIJ"
1300a30f8f8cSSatish Balay int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
1301a30f8f8cSSatish Balay {
1302a30f8f8cSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1303a30f8f8cSSatish Balay   int         ierr;
1304a30f8f8cSSatish Balay 
1305a30f8f8cSSatish Balay   PetscFunctionBegin;
1306a30f8f8cSSatish Balay   /* do nondiagonal part */
1307a30f8f8cSSatish Balay   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1308a30f8f8cSSatish Balay   /* send it on its way */
1309a30f8f8cSSatish Balay   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1310a30f8f8cSSatish Balay   /* do local part */
1311a30f8f8cSSatish Balay   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1312a30f8f8cSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
1313a30f8f8cSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
1314a30f8f8cSSatish Balay   /* but is not perhaps always true. */
1315a30f8f8cSSatish Balay   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1316a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1317a30f8f8cSSatish Balay }
1318a30f8f8cSSatish Balay 
1319a30f8f8cSSatish Balay #undef __FUNC__
1320a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatMultTransposeAdd_MPISBAIJ"
1321a30f8f8cSSatish Balay int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1322a30f8f8cSSatish Balay {
1323a30f8f8cSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1324a30f8f8cSSatish Balay   int         ierr;
1325a30f8f8cSSatish Balay 
1326a30f8f8cSSatish Balay   PetscFunctionBegin;
1327a30f8f8cSSatish Balay   /* do nondiagonal part */
1328a30f8f8cSSatish Balay   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1329a30f8f8cSSatish Balay   /* send it on its way */
1330a30f8f8cSSatish Balay   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1331a30f8f8cSSatish Balay   /* do local part */
1332a30f8f8cSSatish Balay   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1333a30f8f8cSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
1334a30f8f8cSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
1335a30f8f8cSSatish Balay   /* but is not perhaps always true. */
1336a30f8f8cSSatish Balay   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1337a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1338a30f8f8cSSatish Balay }
1339a30f8f8cSSatish Balay 
1340a30f8f8cSSatish Balay /*
1341a30f8f8cSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
1342a30f8f8cSSatish Balay    diagonal block
1343a30f8f8cSSatish Balay */
1344a30f8f8cSSatish Balay #undef __FUNC__
1345a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonal_MPISBAIJ"
1346a30f8f8cSSatish Balay int MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1347a30f8f8cSSatish Balay {
1348a30f8f8cSSatish Balay   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1349a30f8f8cSSatish Balay   int         ierr;
1350a30f8f8cSSatish Balay 
1351a30f8f8cSSatish Balay   PetscFunctionBegin;
1352a30f8f8cSSatish Balay   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); */
1353a30f8f8cSSatish Balay   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1354a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1355a30f8f8cSSatish Balay }
1356a30f8f8cSSatish Balay 
1357a30f8f8cSSatish Balay #undef __FUNC__
1358a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatScale_MPISBAIJ"
1359a30f8f8cSSatish Balay int MatScale_MPISBAIJ(Scalar *aa,Mat A)
1360a30f8f8cSSatish Balay {
1361a30f8f8cSSatish Balay   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1362a30f8f8cSSatish Balay   int         ierr;
1363a30f8f8cSSatish Balay 
1364a30f8f8cSSatish Balay   PetscFunctionBegin;
1365a30f8f8cSSatish Balay   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1366a30f8f8cSSatish Balay   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1367a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1368a30f8f8cSSatish Balay }
1369a30f8f8cSSatish Balay 
1370a30f8f8cSSatish Balay #undef __FUNC__
1371a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatGetSize_MPISBAIJ"
1372a30f8f8cSSatish Balay int MatGetSize_MPISBAIJ(Mat matin,int *m,int *n)
1373a30f8f8cSSatish Balay {
1374a30f8f8cSSatish Balay   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;
1375a30f8f8cSSatish Balay 
1376a30f8f8cSSatish Balay   PetscFunctionBegin;
1377a30f8f8cSSatish Balay   if (m) *m = mat->M;
1378a30f8f8cSSatish Balay   if (n) *n = mat->N;
1379a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1380a30f8f8cSSatish Balay }
1381a30f8f8cSSatish Balay 
1382a30f8f8cSSatish Balay #undef __FUNC__
1383a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatGetLocalSize_MPISBAIJ"
1384a30f8f8cSSatish Balay int MatGetLocalSize_MPISBAIJ(Mat matin,int *m,int *n)
1385a30f8f8cSSatish Balay {
1386a30f8f8cSSatish Balay   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;
1387a30f8f8cSSatish Balay 
1388a30f8f8cSSatish Balay   PetscFunctionBegin;
1389a30f8f8cSSatish Balay   *m = mat->m; *n = mat->n;
1390a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1391a30f8f8cSSatish Balay }
1392a30f8f8cSSatish Balay 
1393a30f8f8cSSatish Balay #undef __FUNC__
1394a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatGetOwnershipRange_MPISBAIJ"
1395a30f8f8cSSatish Balay int MatGetOwnershipRange_MPISBAIJ(Mat matin,int *m,int *n)
1396a30f8f8cSSatish Balay {
1397a30f8f8cSSatish Balay   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;
1398a30f8f8cSSatish Balay 
1399a30f8f8cSSatish Balay   PetscFunctionBegin;
1400a30f8f8cSSatish Balay   if (m) *m = mat->rstart*mat->bs;
1401a30f8f8cSSatish Balay   if (n) *n = mat->rend*mat->bs;
1402a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1403a30f8f8cSSatish Balay }
1404a30f8f8cSSatish Balay 
1405a30f8f8cSSatish Balay #undef __FUNC__
1406a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatGetRow_MPISBAIJ"
1407a30f8f8cSSatish Balay int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1408a30f8f8cSSatish Balay {
1409a30f8f8cSSatish Balay   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;
1410a30f8f8cSSatish Balay   Scalar     *vworkA,*vworkB,**pvA,**pvB,*v_p;
1411a30f8f8cSSatish Balay   int        bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1412a30f8f8cSSatish Balay   int        nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1413a30f8f8cSSatish Balay   int        *cmap,*idx_p,cstart = mat->cstart;
1414a30f8f8cSSatish Balay 
1415a30f8f8cSSatish Balay   PetscFunctionBegin;
1416a30f8f8cSSatish Balay   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1417a30f8f8cSSatish Balay   mat->getrowactive = PETSC_TRUE;
1418a30f8f8cSSatish Balay 
1419a30f8f8cSSatish Balay   if (!mat->rowvalues && (idx || v)) {
1420a30f8f8cSSatish Balay     /*
1421a30f8f8cSSatish Balay         allocate enough space to hold information from the longest row.
1422a30f8f8cSSatish Balay     */
1423a30f8f8cSSatish Balay     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1424a30f8f8cSSatish Balay     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1425a30f8f8cSSatish Balay     int     max = 1,mbs = mat->mbs,tmp;
1426a30f8f8cSSatish Balay     for (i=0; i<mbs; i++) {
1427a30f8f8cSSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1428a30f8f8cSSatish Balay       if (max < tmp) { max = tmp; }
1429a30f8f8cSSatish Balay     }
1430a30f8f8cSSatish Balay     mat->rowvalues = (Scalar*)PetscMalloc(max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1431a30f8f8cSSatish Balay     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1432a30f8f8cSSatish Balay   }
1433a30f8f8cSSatish Balay 
1434a30f8f8cSSatish Balay   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows")
1435a30f8f8cSSatish Balay   lrow = row - brstart;  /* local row index */
1436a30f8f8cSSatish Balay 
1437a30f8f8cSSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1438a30f8f8cSSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
1439a30f8f8cSSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1440a30f8f8cSSatish Balay   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1441a30f8f8cSSatish Balay   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1442a30f8f8cSSatish Balay   nztot = nzA + nzB;
1443a30f8f8cSSatish Balay 
1444a30f8f8cSSatish Balay   cmap  = mat->garray;
1445a30f8f8cSSatish Balay   if (v  || idx) {
1446a30f8f8cSSatish Balay     if (nztot) {
1447a30f8f8cSSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
1448a30f8f8cSSatish Balay       int imark = -1;
1449a30f8f8cSSatish Balay       if (v) {
1450a30f8f8cSSatish Balay         *v = v_p = mat->rowvalues;
1451a30f8f8cSSatish Balay         for (i=0; i<nzB; i++) {
1452a30f8f8cSSatish Balay           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1453a30f8f8cSSatish Balay           else break;
1454a30f8f8cSSatish Balay         }
1455a30f8f8cSSatish Balay         imark = i;
1456a30f8f8cSSatish Balay         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1457a30f8f8cSSatish Balay         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1458a30f8f8cSSatish Balay       }
1459a30f8f8cSSatish Balay       if (idx) {
1460a30f8f8cSSatish Balay         *idx = idx_p = mat->rowindices;
1461a30f8f8cSSatish Balay         if (imark > -1) {
1462a30f8f8cSSatish Balay           for (i=0; i<imark; i++) {
1463a30f8f8cSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1464a30f8f8cSSatish Balay           }
1465a30f8f8cSSatish Balay         } else {
1466a30f8f8cSSatish Balay           for (i=0; i<nzB; i++) {
1467a30f8f8cSSatish Balay             if (cmap[cworkB[i]/bs] < cstart)
1468a30f8f8cSSatish Balay               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1469a30f8f8cSSatish Balay             else break;
1470a30f8f8cSSatish Balay           }
1471a30f8f8cSSatish Balay           imark = i;
1472a30f8f8cSSatish Balay         }
1473a30f8f8cSSatish Balay         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1474a30f8f8cSSatish Balay         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1475a30f8f8cSSatish Balay       }
1476a30f8f8cSSatish Balay     } else {
1477a30f8f8cSSatish Balay       if (idx) *idx = 0;
1478a30f8f8cSSatish Balay       if (v)   *v   = 0;
1479a30f8f8cSSatish Balay     }
1480a30f8f8cSSatish Balay   }
1481a30f8f8cSSatish Balay   *nz = nztot;
1482a30f8f8cSSatish Balay   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1483a30f8f8cSSatish Balay   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1484a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1485a30f8f8cSSatish Balay }
1486a30f8f8cSSatish Balay 
1487a30f8f8cSSatish Balay #undef __FUNC__
1488a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatRestoreRow_MPISBAIJ"
1489a30f8f8cSSatish Balay int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1490a30f8f8cSSatish Balay {
1491a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1492a30f8f8cSSatish Balay 
1493a30f8f8cSSatish Balay   PetscFunctionBegin;
1494a30f8f8cSSatish Balay   if (baij->getrowactive == PETSC_FALSE) {
1495a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1496a30f8f8cSSatish Balay   }
1497a30f8f8cSSatish Balay   baij->getrowactive = PETSC_FALSE;
1498a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1499a30f8f8cSSatish Balay }
1500a30f8f8cSSatish Balay 
1501a30f8f8cSSatish Balay #undef __FUNC__
1502a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatGetBlockSize_MPISBAIJ"
1503a30f8f8cSSatish Balay int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs)
1504a30f8f8cSSatish Balay {
1505a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1506a30f8f8cSSatish Balay 
1507a30f8f8cSSatish Balay   PetscFunctionBegin;
1508a30f8f8cSSatish Balay   *bs = baij->bs;
1509a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1510a30f8f8cSSatish Balay }
1511a30f8f8cSSatish Balay 
1512a30f8f8cSSatish Balay #undef __FUNC__
1513a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatZeroEntries_MPISBAIJ"
1514a30f8f8cSSatish Balay int MatZeroEntries_MPISBAIJ(Mat A)
1515a30f8f8cSSatish Balay {
1516a30f8f8cSSatish Balay   Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data;
1517a30f8f8cSSatish Balay   int         ierr;
1518a30f8f8cSSatish Balay 
1519a30f8f8cSSatish Balay   PetscFunctionBegin;
1520a30f8f8cSSatish Balay   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1521a30f8f8cSSatish Balay   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1522a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1523a30f8f8cSSatish Balay }
1524a30f8f8cSSatish Balay 
1525a30f8f8cSSatish Balay #undef __FUNC__
1526a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatGetInfo_MPISBAIJ"
1527a30f8f8cSSatish Balay int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1528a30f8f8cSSatish Balay {
1529a30f8f8cSSatish Balay   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data;
1530a30f8f8cSSatish Balay   Mat         A = a->A,B = a->B;
1531a30f8f8cSSatish Balay   int         ierr;
1532a30f8f8cSSatish Balay   PetscReal   isend[5],irecv[5];
1533a30f8f8cSSatish Balay 
1534a30f8f8cSSatish Balay   PetscFunctionBegin;
1535a30f8f8cSSatish Balay   info->block_size     = (double)a->bs;
1536a30f8f8cSSatish Balay   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1537a30f8f8cSSatish Balay   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1538a30f8f8cSSatish Balay   isend[3] = info->memory;  isend[4] = info->mallocs;
1539a30f8f8cSSatish Balay   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1540a30f8f8cSSatish Balay   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1541a30f8f8cSSatish Balay   isend[3] += info->memory;  isend[4] += info->mallocs;
1542a30f8f8cSSatish Balay   if (flag == MAT_LOCAL) {
1543a30f8f8cSSatish Balay     info->nz_used      = isend[0];
1544a30f8f8cSSatish Balay     info->nz_allocated = isend[1];
1545a30f8f8cSSatish Balay     info->nz_unneeded  = isend[2];
1546a30f8f8cSSatish Balay     info->memory       = isend[3];
1547a30f8f8cSSatish Balay     info->mallocs      = isend[4];
1548a30f8f8cSSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
1549a30f8f8cSSatish Balay     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1550a30f8f8cSSatish Balay     info->nz_used      = irecv[0];
1551a30f8f8cSSatish Balay     info->nz_allocated = irecv[1];
1552a30f8f8cSSatish Balay     info->nz_unneeded  = irecv[2];
1553a30f8f8cSSatish Balay     info->memory       = irecv[3];
1554a30f8f8cSSatish Balay     info->mallocs      = irecv[4];
1555a30f8f8cSSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
1556a30f8f8cSSatish Balay     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1557a30f8f8cSSatish Balay     info->nz_used      = irecv[0];
1558a30f8f8cSSatish Balay     info->nz_allocated = irecv[1];
1559a30f8f8cSSatish Balay     info->nz_unneeded  = irecv[2];
1560a30f8f8cSSatish Balay     info->memory       = irecv[3];
1561a30f8f8cSSatish Balay     info->mallocs      = irecv[4];
1562a30f8f8cSSatish Balay   } else {
1563a30f8f8cSSatish Balay     SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag);
1564a30f8f8cSSatish Balay   }
1565a30f8f8cSSatish Balay   info->rows_global       = (double)a->M;
1566a30f8f8cSSatish Balay   info->columns_global    = (double)a->N;
1567a30f8f8cSSatish Balay   info->rows_local        = (double)a->m;
1568a30f8f8cSSatish Balay   info->columns_local     = (double)a->N;
1569a30f8f8cSSatish Balay   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1570a30f8f8cSSatish Balay   info->fill_ratio_needed = 0;
1571a30f8f8cSSatish Balay   info->factor_mallocs    = 0;
1572a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1573a30f8f8cSSatish Balay }
1574a30f8f8cSSatish Balay 
1575a30f8f8cSSatish Balay #undef __FUNC__
1576a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatSetOption_MPISBAIJ"
1577a30f8f8cSSatish Balay int MatSetOption_MPISBAIJ(Mat A,MatOption op)
1578a30f8f8cSSatish Balay {
1579a30f8f8cSSatish Balay   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1580a30f8f8cSSatish Balay   int         ierr;
1581a30f8f8cSSatish Balay 
1582a30f8f8cSSatish Balay   PetscFunctionBegin;
1583a30f8f8cSSatish Balay   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1584a30f8f8cSSatish Balay       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1585a30f8f8cSSatish Balay       op == MAT_COLUMNS_UNSORTED ||
1586a30f8f8cSSatish Balay       op == MAT_COLUMNS_SORTED ||
1587a30f8f8cSSatish Balay       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1588a30f8f8cSSatish Balay       op == MAT_KEEP_ZEROED_ROWS ||
1589a30f8f8cSSatish Balay       op == MAT_NEW_NONZERO_LOCATION_ERR) {
1590a30f8f8cSSatish Balay         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1591a30f8f8cSSatish Balay         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1592a30f8f8cSSatish Balay   } else if (op == MAT_ROW_ORIENTED) {
1593a30f8f8cSSatish Balay         a->roworiented = PETSC_TRUE;
1594a30f8f8cSSatish Balay         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1595a30f8f8cSSatish Balay         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1596a30f8f8cSSatish Balay   } else if (op == MAT_ROWS_SORTED ||
1597a30f8f8cSSatish Balay              op == MAT_ROWS_UNSORTED ||
1598a30f8f8cSSatish Balay              op == MAT_SYMMETRIC ||
1599a30f8f8cSSatish Balay              op == MAT_STRUCTURALLY_SYMMETRIC ||
1600a30f8f8cSSatish Balay              op == MAT_YES_NEW_DIAGONALS ||
1601a30f8f8cSSatish Balay              op == MAT_USE_HASH_TABLE) {
1602a30f8f8cSSatish Balay     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1603a30f8f8cSSatish Balay   } else if (op == MAT_COLUMN_ORIENTED) {
1604a30f8f8cSSatish Balay     a->roworiented = PETSC_FALSE;
1605a30f8f8cSSatish Balay     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1606a30f8f8cSSatish Balay     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1607a30f8f8cSSatish Balay   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1608a30f8f8cSSatish Balay     a->donotstash = PETSC_TRUE;
1609a30f8f8cSSatish Balay   } else if (op == MAT_NO_NEW_DIAGONALS) {
1610a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1611a30f8f8cSSatish Balay   } else if (op == MAT_USE_HASH_TABLE) {
1612a30f8f8cSSatish Balay     a->ht_flag = PETSC_TRUE;
1613a30f8f8cSSatish Balay   } else {
1614a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1615a30f8f8cSSatish Balay   }
1616a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1617a30f8f8cSSatish Balay }
1618a30f8f8cSSatish Balay 
1619a30f8f8cSSatish Balay #undef __FUNC__
1620a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatTranspose_MPISBAIJ("
1621a30f8f8cSSatish Balay int MatTranspose_MPISBAIJ(Mat A,Mat *matout)
1622a30f8f8cSSatish Balay {
1623a30f8f8cSSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data;
1624a30f8f8cSSatish Balay   Mat_SeqBAIJ *Aloc;
1625a30f8f8cSSatish Balay   Mat         B;
1626a30f8f8cSSatish Balay   int         ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col;
1627a30f8f8cSSatish Balay   int         bs=baij->bs,mbs=baij->mbs;
1628a30f8f8cSSatish Balay   MatScalar   *a;
1629a30f8f8cSSatish Balay 
1630a30f8f8cSSatish Balay   PetscFunctionBegin;
1631a30f8f8cSSatish Balay   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1632a30f8f8cSSatish Balay   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1633a30f8f8cSSatish Balay 
1634a30f8f8cSSatish Balay   /* copy over the A part */
1635a30f8f8cSSatish Balay   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1636a30f8f8cSSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1637a30f8f8cSSatish Balay   rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
1638a30f8f8cSSatish Balay 
1639a30f8f8cSSatish Balay   for (i=0; i<mbs; i++) {
1640a30f8f8cSSatish Balay     rvals[0] = bs*(baij->rstart + i);
1641a30f8f8cSSatish Balay     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1642a30f8f8cSSatish Balay     for (j=ai[i]; j<ai[i+1]; j++) {
1643a30f8f8cSSatish Balay       col = (baij->cstart+aj[j])*bs;
1644a30f8f8cSSatish Balay       for (k=0; k<bs; k++) {
1645a30f8f8cSSatish Balay         ierr = MatSetValues_MPISBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1646a30f8f8cSSatish Balay         col++; a += bs;
1647a30f8f8cSSatish Balay       }
1648a30f8f8cSSatish Balay     }
1649a30f8f8cSSatish Balay   }
1650a30f8f8cSSatish Balay   /* copy over the B part */
1651a30f8f8cSSatish Balay   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1652a30f8f8cSSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1653a30f8f8cSSatish Balay   for (i=0; i<mbs; i++) {
1654a30f8f8cSSatish Balay     rvals[0] = bs*(baij->rstart + i);
1655a30f8f8cSSatish Balay     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1656a30f8f8cSSatish Balay     for (j=ai[i]; j<ai[i+1]; j++) {
1657a30f8f8cSSatish Balay       col = baij->garray[aj[j]]*bs;
1658a30f8f8cSSatish Balay       for (k=0; k<bs; k++) {
1659a30f8f8cSSatish Balay         ierr = MatSetValues_MPISBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1660a30f8f8cSSatish Balay         col++; a += bs;
1661a30f8f8cSSatish Balay       }
1662a30f8f8cSSatish Balay     }
1663a30f8f8cSSatish Balay   }
1664a30f8f8cSSatish Balay   ierr = PetscFree(rvals);CHKERRQ(ierr);
1665a30f8f8cSSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1666a30f8f8cSSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1667a30f8f8cSSatish Balay 
1668a30f8f8cSSatish Balay   if (matout) {
1669a30f8f8cSSatish Balay     *matout = B;
1670a30f8f8cSSatish Balay   } else {
1671a30f8f8cSSatish Balay     PetscOps *Abops;
1672a30f8f8cSSatish Balay     MatOps   Aops;
1673a30f8f8cSSatish Balay 
1674a30f8f8cSSatish Balay     /* This isn't really an in-place transpose .... but free data structures from baij */
1675a30f8f8cSSatish Balay     ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1676a30f8f8cSSatish Balay     ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1677a30f8f8cSSatish Balay     ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1678a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE)
1679a30f8f8cSSatish Balay     if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1680a30f8f8cSSatish Balay #else
1681a30f8f8cSSatish Balay     if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1682a30f8f8cSSatish Balay #endif
1683a30f8f8cSSatish Balay     if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1684a30f8f8cSSatish Balay     if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1685a30f8f8cSSatish Balay     if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1686a30f8f8cSSatish Balay     ierr = PetscFree(baij);CHKERRQ(ierr);
1687a30f8f8cSSatish Balay 
1688a30f8f8cSSatish Balay     /*
1689a30f8f8cSSatish Balay        This is horrible, horrible code. We need to keep the
1690a30f8f8cSSatish Balay       A pointers for the bops and ops but copy everything
1691a30f8f8cSSatish Balay       else from C.
1692a30f8f8cSSatish Balay     */
1693a30f8f8cSSatish Balay     Abops   = A->bops;
1694a30f8f8cSSatish Balay     Aops    = A->ops;
1695a30f8f8cSSatish Balay     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
1696a30f8f8cSSatish Balay     A->bops = Abops;
1697a30f8f8cSSatish Balay     A->ops  = Aops;
1698a30f8f8cSSatish Balay 
1699a30f8f8cSSatish Balay     PetscHeaderDestroy(B);
1700a30f8f8cSSatish Balay   }
1701a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1702a30f8f8cSSatish Balay }
1703a30f8f8cSSatish Balay 
1704a30f8f8cSSatish Balay #undef __FUNC__
1705a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatDiagonalScale_MPISBAIJ"
1706a30f8f8cSSatish Balay int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1707a30f8f8cSSatish Balay {
1708a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1709a30f8f8cSSatish Balay   Mat         a = baij->A,b = baij->B;
1710a30f8f8cSSatish Balay   int         ierr,s1,s2,s3;
1711a30f8f8cSSatish Balay 
1712a30f8f8cSSatish Balay   PetscFunctionBegin;
1713a30f8f8cSSatish Balay   if (ll != rr) {
1714a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"For symmetric format, left and right scaling vectors must be same\n");
1715a30f8f8cSSatish Balay   }
1716a30f8f8cSSatish Balay   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1717a30f8f8cSSatish Balay   if (rr) {
1718a30f8f8cSSatish Balay     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1719a30f8f8cSSatish Balay     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
1720a30f8f8cSSatish Balay     /* Overlap communication with computation. */
1721a30f8f8cSSatish Balay     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1722a30f8f8cSSatish Balay     /*} if (ll) { */
1723a30f8f8cSSatish Balay     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1724a30f8f8cSSatish Balay     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1725a30f8f8cSSatish Balay     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1726a30f8f8cSSatish Balay     /* } */
1727a30f8f8cSSatish Balay   /* scale  the diagonal block */
1728a30f8f8cSSatish Balay   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1729a30f8f8cSSatish Balay 
1730a30f8f8cSSatish Balay   /* if (rr) { */
1731a30f8f8cSSatish Balay     /* Do a scatter end and then right scale the off-diagonal block */
1732a30f8f8cSSatish Balay     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1733a30f8f8cSSatish Balay     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1734a30f8f8cSSatish Balay   }
1735a30f8f8cSSatish Balay 
1736a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1737a30f8f8cSSatish Balay }
1738a30f8f8cSSatish Balay 
1739a30f8f8cSSatish Balay #undef __FUNC__
1740a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatZeroRows_MPISBAIJ"
1741a30f8f8cSSatish Balay int MatZeroRows_MPISBAIJ(Mat A,IS is,Scalar *diag)
1742a30f8f8cSSatish Balay {
1743a30f8f8cSSatish Balay   Mat_MPISBAIJ    *l = (Mat_MPISBAIJ*)A->data;
1744a30f8f8cSSatish Balay   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
1745a30f8f8cSSatish Balay   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
1746a30f8f8cSSatish Balay   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1747a30f8f8cSSatish Balay   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1748a30f8f8cSSatish Balay   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1749a30f8f8cSSatish Balay   MPI_Comm       comm = A->comm;
1750a30f8f8cSSatish Balay   MPI_Request    *send_waits,*recv_waits;
1751a30f8f8cSSatish Balay   MPI_Status     recv_status,*send_status;
1752a30f8f8cSSatish Balay   IS             istmp;
1753a30f8f8cSSatish Balay 
1754a30f8f8cSSatish Balay   PetscFunctionBegin;
1755a30f8f8cSSatish Balay   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
1756a30f8f8cSSatish Balay   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1757a30f8f8cSSatish Balay 
1758a30f8f8cSSatish Balay   /*  first count number of contributors to each processor */
1759a30f8f8cSSatish Balay   nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs);
1760a30f8f8cSSatish Balay   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
1761a30f8f8cSSatish Balay   procs  = nprocs + size;
1762a30f8f8cSSatish Balay   owner  = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
1763a30f8f8cSSatish Balay   for (i=0; i<N; i++) {
1764a30f8f8cSSatish Balay     idx   = rows[i];
1765a30f8f8cSSatish Balay     found = 0;
1766a30f8f8cSSatish Balay     for (j=0; j<size; j++) {
1767a30f8f8cSSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1768a30f8f8cSSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
1769a30f8f8cSSatish Balay       }
1770a30f8f8cSSatish Balay     }
1771a30f8f8cSSatish Balay     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
1772a30f8f8cSSatish Balay   }
1773a30f8f8cSSatish Balay   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
1774a30f8f8cSSatish Balay 
1775a30f8f8cSSatish Balay   /* inform other processors of number of messages and max length*/
1776a30f8f8cSSatish Balay   work   = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work);
1777a30f8f8cSSatish Balay   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
1778a30f8f8cSSatish Balay   nmax   = work[rank];
1779a30f8f8cSSatish Balay   nrecvs = work[size+rank];
1780a30f8f8cSSatish Balay   ierr = PetscFree(work);CHKERRQ(ierr);
1781a30f8f8cSSatish Balay 
1782a30f8f8cSSatish Balay   /* post receives:   */
1783a30f8f8cSSatish Balay   rvalues = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
1784a30f8f8cSSatish Balay   recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
1785a30f8f8cSSatish Balay   for (i=0; i<nrecvs; i++) {
1786a30f8f8cSSatish Balay     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1787a30f8f8cSSatish Balay   }
1788a30f8f8cSSatish Balay 
1789a30f8f8cSSatish Balay   /* do sends:
1790a30f8f8cSSatish Balay      1) starts[i] gives the starting index in svalues for stuff going to
1791a30f8f8cSSatish Balay      the ith processor
1792a30f8f8cSSatish Balay   */
1793a30f8f8cSSatish Balay   svalues    = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues);
1794a30f8f8cSSatish Balay   send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
1795a30f8f8cSSatish Balay   starts     = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts);
1796a30f8f8cSSatish Balay   starts[0]  = 0;
1797a30f8f8cSSatish Balay   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1798a30f8f8cSSatish Balay   for (i=0; i<N; i++) {
1799a30f8f8cSSatish Balay     svalues[starts[owner[i]]++] = rows[i];
1800a30f8f8cSSatish Balay   }
1801a30f8f8cSSatish Balay   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
1802a30f8f8cSSatish Balay 
1803a30f8f8cSSatish Balay   starts[0] = 0;
1804a30f8f8cSSatish Balay   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1805a30f8f8cSSatish Balay   count = 0;
1806a30f8f8cSSatish Balay   for (i=0; i<size; i++) {
1807a30f8f8cSSatish Balay     if (procs[i]) {
1808a30f8f8cSSatish Balay       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1809a30f8f8cSSatish Balay     }
1810a30f8f8cSSatish Balay   }
1811a30f8f8cSSatish Balay   ierr = PetscFree(starts);CHKERRQ(ierr);
1812a30f8f8cSSatish Balay 
1813a30f8f8cSSatish Balay   base = owners[rank]*bs;
1814a30f8f8cSSatish Balay 
1815a30f8f8cSSatish Balay   /*  wait on receives */
1816a30f8f8cSSatish Balay   lens   = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens);
1817a30f8f8cSSatish Balay   source = lens + nrecvs;
1818a30f8f8cSSatish Balay   count  = nrecvs; slen = 0;
1819a30f8f8cSSatish Balay   while (count) {
1820a30f8f8cSSatish Balay     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1821a30f8f8cSSatish Balay     /* unpack receives into our local space */
1822a30f8f8cSSatish Balay     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1823a30f8f8cSSatish Balay     source[imdex]  = recv_status.MPI_SOURCE;
1824a30f8f8cSSatish Balay     lens[imdex]    = n;
1825a30f8f8cSSatish Balay     slen          += n;
1826a30f8f8cSSatish Balay     count--;
1827a30f8f8cSSatish Balay   }
1828a30f8f8cSSatish Balay   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1829a30f8f8cSSatish Balay 
1830a30f8f8cSSatish Balay   /* move the data into the send scatter */
1831a30f8f8cSSatish Balay   lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows);
1832a30f8f8cSSatish Balay   count = 0;
1833a30f8f8cSSatish Balay   for (i=0; i<nrecvs; i++) {
1834a30f8f8cSSatish Balay     values = rvalues + i*nmax;
1835a30f8f8cSSatish Balay     for (j=0; j<lens[i]; j++) {
1836a30f8f8cSSatish Balay       lrows[count++] = values[j] - base;
1837a30f8f8cSSatish Balay     }
1838a30f8f8cSSatish Balay   }
1839a30f8f8cSSatish Balay   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1840a30f8f8cSSatish Balay   ierr = PetscFree(lens);CHKERRQ(ierr);
1841a30f8f8cSSatish Balay   ierr = PetscFree(owner);CHKERRQ(ierr);
1842a30f8f8cSSatish Balay   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1843a30f8f8cSSatish Balay 
1844a30f8f8cSSatish Balay   /* actually zap the local rows */
1845a30f8f8cSSatish Balay   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1846a30f8f8cSSatish Balay   PLogObjectParent(A,istmp);
1847a30f8f8cSSatish Balay 
1848a30f8f8cSSatish Balay   /*
1849a30f8f8cSSatish Balay         Zero the required rows. If the "diagonal block" of the matrix
1850a30f8f8cSSatish Balay      is square and the user wishes to set the diagonal we use seperate
1851a30f8f8cSSatish Balay      code so that MatSetValues() is not called for each diagonal allocating
1852a30f8f8cSSatish Balay      new memory, thus calling lots of mallocs and slowing things down.
1853a30f8f8cSSatish Balay 
1854a30f8f8cSSatish Balay        Contributed by: Mathew Knepley
1855a30f8f8cSSatish Balay   */
1856a30f8f8cSSatish Balay   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1857c8407628SSatish Balay   ierr = MatZeroRows_SeqSBAIJ(l->B,istmp,0);CHKERRQ(ierr);
1858a30f8f8cSSatish Balay   if (diag && (l->A->M == l->A->N)) {
1859a30f8f8cSSatish Balay     ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,diag);CHKERRQ(ierr);
1860a30f8f8cSSatish Balay   } else if (diag) {
1861a30f8f8cSSatish Balay     ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1862a30f8f8cSSatish Balay     if (((Mat_SeqSBAIJ*)l->A->data)->nonew) {
1863a30f8f8cSSatish Balay       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1864a30f8f8cSSatish Balay MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1865a30f8f8cSSatish Balay     }
1866a30f8f8cSSatish Balay     for (i=0; i<slen; i++) {
1867a30f8f8cSSatish Balay       row  = lrows[i] + rstart_bs;
1868a30f8f8cSSatish Balay       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1869a30f8f8cSSatish Balay     }
1870a30f8f8cSSatish Balay     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1871a30f8f8cSSatish Balay     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1872a30f8f8cSSatish Balay   } else {
1873a30f8f8cSSatish Balay     ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1874a30f8f8cSSatish Balay   }
1875a30f8f8cSSatish Balay 
1876a30f8f8cSSatish Balay   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1877a30f8f8cSSatish Balay   ierr = PetscFree(lrows);CHKERRQ(ierr);
1878a30f8f8cSSatish Balay 
1879a30f8f8cSSatish Balay   /* wait on sends */
1880a30f8f8cSSatish Balay   if (nsends) {
1881a30f8f8cSSatish Balay     send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1882a30f8f8cSSatish Balay     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1883a30f8f8cSSatish Balay     ierr        = PetscFree(send_status);CHKERRQ(ierr);
1884a30f8f8cSSatish Balay   }
1885a30f8f8cSSatish Balay   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1886a30f8f8cSSatish Balay   ierr = PetscFree(svalues);CHKERRQ(ierr);
1887a30f8f8cSSatish Balay 
1888a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1889a30f8f8cSSatish Balay }
1890a30f8f8cSSatish Balay 
1891a30f8f8cSSatish Balay #undef __FUNC__
1892a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatPrintHelp_MPISBAIJ"
1893a30f8f8cSSatish Balay int MatPrintHelp_MPISBAIJ(Mat A)
1894a30f8f8cSSatish Balay {
1895a30f8f8cSSatish Balay   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1896a30f8f8cSSatish Balay   MPI_Comm    comm = A->comm;
1897a30f8f8cSSatish Balay   static int  called = 0;
1898a30f8f8cSSatish Balay   int         ierr;
1899a30f8f8cSSatish Balay 
1900a30f8f8cSSatish Balay   PetscFunctionBegin;
1901a30f8f8cSSatish Balay   if (!a->rank) {
1902a30f8f8cSSatish Balay     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1903a30f8f8cSSatish Balay   }
1904a30f8f8cSSatish Balay   if (called) {PetscFunctionReturn(0);} else called = 1;
1905a30f8f8cSSatish Balay   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1906a30f8f8cSSatish Balay   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1907a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1908a30f8f8cSSatish Balay }
1909a30f8f8cSSatish Balay 
1910a30f8f8cSSatish Balay #undef __FUNC__
1911a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatSetUnfactored_MPISBAIJ"
1912a30f8f8cSSatish Balay int MatSetUnfactored_MPISBAIJ(Mat A)
1913a30f8f8cSSatish Balay {
1914a30f8f8cSSatish Balay   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1915a30f8f8cSSatish Balay   int         ierr;
1916a30f8f8cSSatish Balay 
1917a30f8f8cSSatish Balay   PetscFunctionBegin;
1918a30f8f8cSSatish Balay   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1919a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1920a30f8f8cSSatish Balay }
1921a30f8f8cSSatish Balay 
1922a30f8f8cSSatish Balay static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1923a30f8f8cSSatish Balay 
1924a30f8f8cSSatish Balay #undef __FUNC__
1925a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatEqual_MPISBAIJ"
1926a30f8f8cSSatish Balay int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1927a30f8f8cSSatish Balay {
1928a30f8f8cSSatish Balay   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1929a30f8f8cSSatish Balay   Mat         a,b,c,d;
1930a30f8f8cSSatish Balay   PetscTruth  flg;
1931a30f8f8cSSatish Balay   int         ierr;
1932a30f8f8cSSatish Balay 
1933a30f8f8cSSatish Balay   PetscFunctionBegin;
1934a30f8f8cSSatish Balay   if (B->type != MATMPISBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1935a30f8f8cSSatish Balay   a = matA->A; b = matA->B;
1936a30f8f8cSSatish Balay   c = matB->A; d = matB->B;
1937a30f8f8cSSatish Balay 
1938a30f8f8cSSatish Balay   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1939a30f8f8cSSatish Balay   if (flg == PETSC_TRUE) {
1940a30f8f8cSSatish Balay     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1941a30f8f8cSSatish Balay   }
1942a30f8f8cSSatish Balay   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1943a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1944a30f8f8cSSatish Balay }
1945a30f8f8cSSatish Balay 
1946a30f8f8cSSatish Balay /* -------------------------------------------------------------------*/
1947a30f8f8cSSatish Balay static struct _MatOps MatOps_Values = {
1948a30f8f8cSSatish Balay   MatSetValues_MPISBAIJ,
1949a30f8f8cSSatish Balay   MatGetRow_MPISBAIJ,
1950a30f8f8cSSatish Balay   MatRestoreRow_MPISBAIJ,
1951a30f8f8cSSatish Balay   MatMult_MPISBAIJ,
1952a30f8f8cSSatish Balay   MatMultAdd_MPISBAIJ,
1953a30f8f8cSSatish Balay   MatMultTranspose_MPISBAIJ,
1954a30f8f8cSSatish Balay   MatMultTransposeAdd_MPISBAIJ,
1955a30f8f8cSSatish Balay   0,
1956a30f8f8cSSatish Balay   0,
1957a30f8f8cSSatish Balay   0,
1958a30f8f8cSSatish Balay   0,
1959a30f8f8cSSatish Balay   0,
1960a30f8f8cSSatish Balay   0,
1961a30f8f8cSSatish Balay   0,
1962a30f8f8cSSatish Balay   MatTranspose_MPISBAIJ,
1963a30f8f8cSSatish Balay   MatGetInfo_MPISBAIJ,
1964a30f8f8cSSatish Balay   MatEqual_MPISBAIJ,
1965a30f8f8cSSatish Balay   MatGetDiagonal_MPISBAIJ,
1966a30f8f8cSSatish Balay   MatDiagonalScale_MPISBAIJ,
1967a30f8f8cSSatish Balay   MatNorm_MPISBAIJ,
1968a30f8f8cSSatish Balay   MatAssemblyBegin_MPISBAIJ,
1969a30f8f8cSSatish Balay   MatAssemblyEnd_MPISBAIJ,
1970a30f8f8cSSatish Balay   0,
1971a30f8f8cSSatish Balay   MatSetOption_MPISBAIJ,
1972a30f8f8cSSatish Balay   MatZeroEntries_MPISBAIJ,
1973a30f8f8cSSatish Balay   MatZeroRows_MPISBAIJ,
1974a30f8f8cSSatish Balay   0,
1975a30f8f8cSSatish Balay   0,
1976a30f8f8cSSatish Balay   0,
1977a30f8f8cSSatish Balay   0,
1978a30f8f8cSSatish Balay   MatGetSize_MPISBAIJ,
1979a30f8f8cSSatish Balay   MatGetLocalSize_MPISBAIJ,
1980a30f8f8cSSatish Balay   MatGetOwnershipRange_MPISBAIJ,
1981a30f8f8cSSatish Balay   0,
1982a30f8f8cSSatish Balay   0,
1983a30f8f8cSSatish Balay   0,
1984a30f8f8cSSatish Balay   0,
1985a30f8f8cSSatish Balay   MatDuplicate_MPISBAIJ,
1986a30f8f8cSSatish Balay   0,
1987a30f8f8cSSatish Balay   0,
1988a30f8f8cSSatish Balay   0,
1989a30f8f8cSSatish Balay   0,
1990a30f8f8cSSatish Balay   0,
1991a30f8f8cSSatish Balay   MatGetSubMatrices_MPISBAIJ,
1992a30f8f8cSSatish Balay   MatIncreaseOverlap_MPISBAIJ,
1993a30f8f8cSSatish Balay   MatGetValues_MPISBAIJ,
1994a30f8f8cSSatish Balay   0,
1995a30f8f8cSSatish Balay   MatPrintHelp_MPISBAIJ,
1996a30f8f8cSSatish Balay   MatScale_MPISBAIJ,
1997a30f8f8cSSatish Balay   0,
1998a30f8f8cSSatish Balay   0,
1999a30f8f8cSSatish Balay   0,
2000a30f8f8cSSatish Balay   MatGetBlockSize_MPISBAIJ,
2001a30f8f8cSSatish Balay   0,
2002a30f8f8cSSatish Balay   0,
2003a30f8f8cSSatish Balay   0,
2004a30f8f8cSSatish Balay   0,
2005a30f8f8cSSatish Balay   0,
2006a30f8f8cSSatish Balay   0,
2007a30f8f8cSSatish Balay   MatSetUnfactored_MPISBAIJ,
2008a30f8f8cSSatish Balay   0,
2009a30f8f8cSSatish Balay   MatSetValuesBlocked_MPISBAIJ,
2010a30f8f8cSSatish Balay   0,
2011a30f8f8cSSatish Balay   0,
2012a30f8f8cSSatish Balay   0,
2013a30f8f8cSSatish Balay   MatGetMaps_Petsc};
2014a30f8f8cSSatish Balay 
2015a30f8f8cSSatish Balay 
2016a30f8f8cSSatish Balay EXTERN_C_BEGIN
2017a30f8f8cSSatish Balay #undef __FUNC__
2018a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonalBlock_MPISBAIJ"
2019a30f8f8cSSatish Balay int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
2020a30f8f8cSSatish Balay {
2021a30f8f8cSSatish Balay   PetscFunctionBegin;
2022a30f8f8cSSatish Balay   *a      = ((Mat_MPISBAIJ *)A->data)->A;
2023a30f8f8cSSatish Balay   *iscopy = PETSC_FALSE;
2024a30f8f8cSSatish Balay   PetscFunctionReturn(0);
2025a30f8f8cSSatish Balay }
2026a30f8f8cSSatish Balay EXTERN_C_END
2027a30f8f8cSSatish Balay 
2028a30f8f8cSSatish Balay #undef __FUNC__
2029a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatCreateMPISBAIJ"
2030a30f8f8cSSatish Balay /*@C
2031a30f8f8cSSatish Balay    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2032a30f8f8cSSatish Balay    (block compressed row).  For good matrix assembly performance
2033a30f8f8cSSatish Balay    the user should preallocate the matrix storage by setting the parameters
2034a30f8f8cSSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2035a30f8f8cSSatish Balay    performance can be increased by more than a factor of 50.
2036a30f8f8cSSatish Balay 
2037a30f8f8cSSatish Balay    Collective on MPI_Comm
2038a30f8f8cSSatish Balay 
2039a30f8f8cSSatish Balay    Input Parameters:
2040a30f8f8cSSatish Balay +  comm - MPI communicator
2041a30f8f8cSSatish Balay .  bs   - size of blockk
2042a30f8f8cSSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2043a30f8f8cSSatish Balay            This value should be the same as the local size used in creating the
2044a30f8f8cSSatish Balay            y vector for the matrix-vector product y = Ax.
2045a30f8f8cSSatish Balay .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2046a30f8f8cSSatish Balay            This value should be the same as the local size used in creating the
2047a30f8f8cSSatish Balay            x vector for the matrix-vector product y = Ax.
2048a30f8f8cSSatish Balay .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2049a30f8f8cSSatish Balay .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2050a30f8f8cSSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2051a30f8f8cSSatish Balay            submatrix  (same for all local rows)
2052a30f8f8cSSatish Balay .  d_nnz - array containing the number of block nonzeros in the various block rows
2053a30f8f8cSSatish Balay            of the in diagonal portion of the local (possibly different for each block
2054a30f8f8cSSatish Balay            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2055a30f8f8cSSatish Balay .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2056a30f8f8cSSatish Balay            submatrix (same for all local rows).
2057a30f8f8cSSatish Balay -  o_nnz - array containing the number of nonzeros in the various block rows of the
2058a30f8f8cSSatish Balay            off-diagonal portion of the local submatrix (possibly different for
2059a30f8f8cSSatish Balay            each block row) or PETSC_NULL.
2060a30f8f8cSSatish Balay 
2061a30f8f8cSSatish Balay    Output Parameter:
2062a30f8f8cSSatish Balay .  A - the matrix
2063a30f8f8cSSatish Balay 
2064a30f8f8cSSatish Balay    Options Database Keys:
2065a30f8f8cSSatish Balay .   -mat_no_unroll - uses code that does not unroll the loops in the
2066a30f8f8cSSatish Balay                      block calculations (much slower)
2067a30f8f8cSSatish Balay .   -mat_block_size - size of the blocks to use
2068a30f8f8cSSatish Balay .   -mat_mpi - use the parallel matrix data structures even on one processor
2069a30f8f8cSSatish Balay                (defaults to using SeqBAIJ format on one processor)
2070a30f8f8cSSatish Balay 
2071a30f8f8cSSatish Balay    Notes:
2072a30f8f8cSSatish Balay    The user MUST specify either the local or global matrix dimensions
2073a30f8f8cSSatish Balay    (possibly both).
2074a30f8f8cSSatish Balay 
2075a30f8f8cSSatish Balay    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2076a30f8f8cSSatish Balay    than it must be used on all processors that share the object for that argument.
2077a30f8f8cSSatish Balay 
2078a30f8f8cSSatish Balay    Storage Information:
2079a30f8f8cSSatish Balay    For a square global matrix we define each processor's diagonal portion
2080a30f8f8cSSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
2081a30f8f8cSSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
2082a30f8f8cSSatish Balay    local matrix (a rectangular submatrix).
2083a30f8f8cSSatish Balay 
2084a30f8f8cSSatish Balay    The user can specify preallocated storage for the diagonal part of
2085a30f8f8cSSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
2086a30f8f8cSSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2087a30f8f8cSSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
2088a30f8f8cSSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2089a30f8f8cSSatish Balay 
2090a30f8f8cSSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2091a30f8f8cSSatish Balay    the figure below we depict these three local rows and all columns (0-11).
2092a30f8f8cSSatish Balay 
2093a30f8f8cSSatish Balay .vb
2094a30f8f8cSSatish Balay            0 1 2 3 4 5 6 7 8 9 10 11
2095a30f8f8cSSatish Balay           -------------------
2096a30f8f8cSSatish Balay    row 3  |  o o o d d d o o o o o o
2097a30f8f8cSSatish Balay    row 4  |  o o o d d d o o o o o o
2098a30f8f8cSSatish Balay    row 5  |  o o o d d d o o o o o o
2099a30f8f8cSSatish Balay           -------------------
2100a30f8f8cSSatish Balay .ve
2101a30f8f8cSSatish Balay 
2102a30f8f8cSSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
2103a30f8f8cSSatish Balay    submatrix, and any entries in the o locations are stored in the
2104a30f8f8cSSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2105a30f8f8cSSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
2106a30f8f8cSSatish Balay 
2107a30f8f8cSSatish Balay    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2108a30f8f8cSSatish Balay    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2109a30f8f8cSSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
2110a30f8f8cSSatish Balay    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2111a30f8f8cSSatish Balay    or you will get TERRIBLE performance; see the users' manual chapter on
2112a30f8f8cSSatish Balay    matrices.
2113a30f8f8cSSatish Balay 
2114a30f8f8cSSatish Balay    Level: intermediate
2115a30f8f8cSSatish Balay 
2116a30f8f8cSSatish Balay .keywords: matrix, block, aij, compressed row, sparse, parallel
2117a30f8f8cSSatish Balay 
2118a30f8f8cSSatish Balay .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPISBAIJ()
2119a30f8f8cSSatish Balay @*/
2120a30f8f8cSSatish Balay 
2121a30f8f8cSSatish Balay int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2122a30f8f8cSSatish Balay {
2123a30f8f8cSSatish Balay   Mat          B;
2124a30f8f8cSSatish Balay   Mat_MPISBAIJ  *b;
2125a30f8f8cSSatish Balay   int          ierr,i,sum[1],work[1],mbs,nbs,Mbs=PETSC_DECIDE,size;
2126a30f8f8cSSatish Balay   PetscTruth   flag1,flag2,flg;
2127a30f8f8cSSatish Balay 
2128a30f8f8cSSatish Balay   PetscFunctionBegin;
2129a30f8f8cSSatish Balay   if (M != N || m != n){ /* N and n are not used after this */
2130a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"For symmetric format, set M=N and m=n");
2131a30f8f8cSSatish Balay   }
2132a30f8f8cSSatish Balay   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2133a30f8f8cSSatish Balay 
2134a30f8f8cSSatish Balay   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive");
2135a30f8f8cSSatish Balay   if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -2: value %d",d_nz);
2136a30f8f8cSSatish Balay   if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -2: value %d",o_nz);
2137a30f8f8cSSatish Balay   if (d_nnz) {
2138a30f8f8cSSatish Balay     for (i=0; i<m/bs; i++) {
2139a30f8f8cSSatish Balay       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
2140a30f8f8cSSatish Balay     }
2141a30f8f8cSSatish Balay   }
2142a30f8f8cSSatish Balay   if (o_nnz) {
2143a30f8f8cSSatish Balay     for (i=0; i<m/bs; i++) {
2144a30f8f8cSSatish Balay       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
2145a30f8f8cSSatish Balay     }
2146a30f8f8cSSatish Balay   }
2147a30f8f8cSSatish Balay 
2148a30f8f8cSSatish Balay   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2149a30f8f8cSSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-mat_mpisbaij",&flag1);CHKERRQ(ierr);
2150a30f8f8cSSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr);
2151a30f8f8cSSatish Balay   if (!flag1 && !flag2 && size == 1) {
2152a30f8f8cSSatish Balay     if (M == PETSC_DECIDE) M = m;
2153a30f8f8cSSatish Balay     ierr = MatCreateSeqSBAIJ(comm,bs,M,M,d_nz,d_nnz,A);CHKERRQ(ierr);
2154a30f8f8cSSatish Balay     PetscFunctionReturn(0);
2155a30f8f8cSSatish Balay   }
2156a30f8f8cSSatish Balay 
2157a30f8f8cSSatish Balay   *A = 0;
2158a30f8f8cSSatish Balay   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPISBAIJ,"Mat",comm,MatDestroy,MatView);
2159a30f8f8cSSatish Balay   PLogObjectCreate(B);
2160a30f8f8cSSatish Balay   B->data = (void*)(b = PetscNew(Mat_MPISBAIJ));CHKPTRQ(b);
2161a30f8f8cSSatish Balay   ierr    = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
2162a30f8f8cSSatish Balay   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2163a30f8f8cSSatish Balay 
2164a30f8f8cSSatish Balay   B->ops->destroy    = MatDestroy_MPISBAIJ;
2165a30f8f8cSSatish Balay   B->ops->view       = MatView_MPISBAIJ;
2166a30f8f8cSSatish Balay   B->mapping    = 0;
2167a30f8f8cSSatish Balay   B->factor     = 0;
2168a30f8f8cSSatish Balay   B->assembled  = PETSC_FALSE;
2169a30f8f8cSSatish Balay 
2170a30f8f8cSSatish Balay   B->insertmode = NOT_SET_VALUES;
2171a30f8f8cSSatish Balay   ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr);
2172a30f8f8cSSatish Balay   ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr);
2173a30f8f8cSSatish Balay 
2174a30f8f8cSSatish Balay   if (m == PETSC_DECIDE && (d_nnz || o_nnz)) {
2175a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
2176a30f8f8cSSatish Balay   }
2177a30f8f8cSSatish Balay   if (M == PETSC_DECIDE && m == PETSC_DECIDE) {
2178a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified");
2179a30f8f8cSSatish Balay   }
2180a30f8f8cSSatish Balay   if (M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
2181a30f8f8cSSatish Balay 
2182a30f8f8cSSatish Balay   if (M == PETSC_DECIDE) {
2183a30f8f8cSSatish Balay     work[0] = m; mbs = m/bs;
2184a30f8f8cSSatish Balay     ierr = MPI_Allreduce(work,sum,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2185a30f8f8cSSatish Balay     M = sum[0]; Mbs = M/bs;
2186a30f8f8cSSatish Balay   } else { /* M is specified */
2187a30f8f8cSSatish Balay     Mbs = M/bs;
2188a30f8f8cSSatish Balay     if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize");
2189a30f8f8cSSatish Balay     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
2190a30f8f8cSSatish Balay     m   = mbs*bs;
2191a30f8f8cSSatish Balay   }
2192a30f8f8cSSatish Balay 
2193a30f8f8cSSatish Balay   if (mbs*bs != m) {
2194a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows/cols must be divisible by blocksize");
2195a30f8f8cSSatish Balay   }
2196a30f8f8cSSatish Balay 
2197a30f8f8cSSatish Balay   b->m = m; B->m = m;
2198a30f8f8cSSatish Balay   b->n = m; B->n = m;
2199a30f8f8cSSatish Balay   b->N = M; B->N = M;
2200a30f8f8cSSatish Balay   b->M = M;
2201a30f8f8cSSatish Balay   B->M = M;
2202a30f8f8cSSatish Balay   b->bs  = bs;
2203a30f8f8cSSatish Balay   b->bs2 = bs*bs;
2204a30f8f8cSSatish Balay   b->mbs = mbs;
2205a30f8f8cSSatish Balay   b->nbs = mbs;
2206a30f8f8cSSatish Balay   b->Mbs = Mbs;
2207a30f8f8cSSatish Balay   b->Nbs = Mbs;
2208a30f8f8cSSatish Balay 
2209a30f8f8cSSatish Balay   /* the information in the maps duplicates the information computed below, eventually
2210a30f8f8cSSatish Balay      we should remove the duplicate information that is not contained in the maps */
2211a30f8f8cSSatish Balay   ierr = MapCreateMPI(B->comm,m,M,&B->rmap);CHKERRQ(ierr);
2212a30f8f8cSSatish Balay   ierr = MapCreateMPI(B->comm,m,M,&B->cmap);CHKERRQ(ierr);
2213a30f8f8cSSatish Balay 
2214a30f8f8cSSatish Balay   /* build local table of row and column ownerships */
2215a30f8f8cSSatish Balay   b->rowners = (int*)PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners);
2216a30f8f8cSSatish Balay   PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
2217a30f8f8cSSatish Balay   b->cowners    = b->rowners + b->size + 2;
2218a30f8f8cSSatish Balay   b->rowners_bs = b->cowners + b->size + 2;
2219a30f8f8cSSatish Balay   ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2220a30f8f8cSSatish Balay   b->rowners[0]    = 0;
2221a30f8f8cSSatish Balay   for (i=2; i<=b->size; i++) {
2222a30f8f8cSSatish Balay     b->rowners[i] += b->rowners[i-1];
2223a30f8f8cSSatish Balay   }
2224a30f8f8cSSatish Balay   for (i=0; i<=b->size; i++) {
2225a30f8f8cSSatish Balay     b->rowners_bs[i] = b->rowners[i]*bs;
2226a30f8f8cSSatish Balay   }
2227a30f8f8cSSatish Balay   b->rstart    = b->rowners[b->rank];
2228a30f8f8cSSatish Balay   b->rend      = b->rowners[b->rank+1];
2229a30f8f8cSSatish Balay   b->rstart_bs = b->rstart * bs;
2230a30f8f8cSSatish Balay   b->rend_bs   = b->rend * bs;
2231a30f8f8cSSatish Balay 
2232a30f8f8cSSatish Balay   b->cstart    = b->rstart;
2233a30f8f8cSSatish Balay   b->cend      = b->rend;
2234a30f8f8cSSatish Balay   b->cstart_bs = b->cstart * bs;
2235a30f8f8cSSatish Balay   b->cend_bs   = b->cend * bs;
2236a30f8f8cSSatish Balay 
2237a30f8f8cSSatish Balay 
2238a30f8f8cSSatish Balay   if (d_nz == PETSC_DEFAULT) d_nz = 5;
2239a30f8f8cSSatish Balay   ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,m,m,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2240a30f8f8cSSatish Balay   PLogObjectParent(B,b->A);
2241a30f8f8cSSatish Balay   if (o_nz == PETSC_DEFAULT) o_nz = 0;
2242a30f8f8cSSatish Balay   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,M,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2243a30f8f8cSSatish Balay   PLogObjectParent(B,b->B);
2244a30f8f8cSSatish Balay 
2245a30f8f8cSSatish Balay   /* build cache for off array entries formed */
2246a30f8f8cSSatish Balay   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
2247a30f8f8cSSatish Balay   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
2248a30f8f8cSSatish Balay   b->donotstash  = PETSC_FALSE;
2249a30f8f8cSSatish Balay   b->colmap      = PETSC_NULL;
2250a30f8f8cSSatish Balay   b->garray      = PETSC_NULL;
2251a30f8f8cSSatish Balay   b->roworiented = PETSC_TRUE;
2252a30f8f8cSSatish Balay 
2253a30f8f8cSSatish Balay #if defined(PEYSC_USE_MAT_SINGLE)
2254a30f8f8cSSatish Balay   /* stuff for MatSetValues_XXX in single precision */
2255a30f8f8cSSatish Balay   b->lensetvalues     = 0;
2256a30f8f8cSSatish Balay   b->setvaluescopy    = PETSC_NULL;
2257a30f8f8cSSatish Balay #endif
2258a30f8f8cSSatish Balay 
2259a30f8f8cSSatish Balay   /* stuff used in block assembly */
2260a30f8f8cSSatish Balay   b->barray       = 0;
2261a30f8f8cSSatish Balay 
2262a30f8f8cSSatish Balay   /* stuff used for matrix vector multiply */
2263a30f8f8cSSatish Balay   b->lvec         = 0;
2264a30f8f8cSSatish Balay   b->Mvctx        = 0;
2265a30f8f8cSSatish Balay 
2266a30f8f8cSSatish Balay   /* stuff for MatGetRow() */
2267a30f8f8cSSatish Balay   b->rowindices   = 0;
2268a30f8f8cSSatish Balay   b->rowvalues    = 0;
2269a30f8f8cSSatish Balay   b->getrowactive = PETSC_FALSE;
2270a30f8f8cSSatish Balay 
2271a30f8f8cSSatish Balay   /* hash table stuff */
2272a30f8f8cSSatish Balay   b->ht           = 0;
2273a30f8f8cSSatish Balay   b->hd           = 0;
2274a30f8f8cSSatish Balay   b->ht_size      = 0;
2275a30f8f8cSSatish Balay   b->ht_flag      = PETSC_FALSE;
2276a30f8f8cSSatish Balay   b->ht_fact      = 0;
2277a30f8f8cSSatish Balay   b->ht_total_ct  = 0;
2278a30f8f8cSSatish Balay   b->ht_insert_ct = 0;
2279a30f8f8cSSatish Balay 
2280a30f8f8cSSatish Balay   *A = B;
2281a30f8f8cSSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2282a30f8f8cSSatish Balay   if (flg) {
2283a30f8f8cSSatish Balay     double fact = 1.39;
2284a30f8f8cSSatish Balay     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2285a30f8f8cSSatish Balay     ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2286a30f8f8cSSatish Balay     if (fact <= 1.0) fact = 1.39;
2287a30f8f8cSSatish Balay     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2288a30f8f8cSSatish Balay     PLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact);
2289a30f8f8cSSatish Balay   }
2290a30f8f8cSSatish Balay   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2291a30f8f8cSSatish Balay                                      "MatStoreValues_MPISBAIJ",
2292a30f8f8cSSatish Balay                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
2293a30f8f8cSSatish Balay   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2294a30f8f8cSSatish Balay                                      "MatRetrieveValues_MPISBAIJ",
2295a30f8f8cSSatish Balay                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
2296a30f8f8cSSatish Balay   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2297a30f8f8cSSatish Balay                                      "MatGetDiagonalBlock_MPISBAIJ",
2298a30f8f8cSSatish Balay                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
2299a30f8f8cSSatish Balay   PetscFunctionReturn(0);
2300a30f8f8cSSatish Balay }
2301a30f8f8cSSatish Balay 
2302a30f8f8cSSatish Balay 
2303a30f8f8cSSatish Balay #undef __FUNC__
2304a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatDuplicate_MPISBAIJ"
2305a30f8f8cSSatish Balay static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2306a30f8f8cSSatish Balay {
2307a30f8f8cSSatish Balay   Mat         mat;
2308a30f8f8cSSatish Balay   Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2309a30f8f8cSSatish Balay   int         ierr,len=0;
2310a30f8f8cSSatish Balay   PetscTruth  flg;
2311a30f8f8cSSatish Balay 
2312a30f8f8cSSatish Balay   PetscFunctionBegin;
2313a30f8f8cSSatish Balay   *newmat       = 0;
2314a30f8f8cSSatish Balay   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPISBAIJ,"Mat",matin->comm,MatDestroy,MatView);
2315a30f8f8cSSatish Balay   PLogObjectCreate(mat);
2316a30f8f8cSSatish Balay   mat->data         = (void*)(a = PetscNew(Mat_MPISBAIJ));CHKPTRQ(a);
2317a30f8f8cSSatish Balay   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2318a30f8f8cSSatish Balay   mat->ops->destroy = MatDestroy_MPISBAIJ;
2319a30f8f8cSSatish Balay   mat->ops->view    = MatView_MPISBAIJ;
2320a30f8f8cSSatish Balay   mat->factor       = matin->factor;
2321a30f8f8cSSatish Balay   mat->assembled    = PETSC_TRUE;
2322a30f8f8cSSatish Balay   mat->insertmode   = NOT_SET_VALUES;
2323a30f8f8cSSatish Balay 
2324a30f8f8cSSatish Balay   a->m = mat->m   = oldmat->m;
2325a30f8f8cSSatish Balay   a->n = mat->n   = oldmat->n;
2326a30f8f8cSSatish Balay   a->M = mat->M   = oldmat->M;
2327a30f8f8cSSatish Balay   a->N = mat->N   = oldmat->N;
2328a30f8f8cSSatish Balay 
2329a30f8f8cSSatish Balay   a->bs  = oldmat->bs;
2330a30f8f8cSSatish Balay   a->bs2 = oldmat->bs2;
2331a30f8f8cSSatish Balay   a->mbs = oldmat->mbs;
2332a30f8f8cSSatish Balay   a->nbs = oldmat->nbs;
2333a30f8f8cSSatish Balay   a->Mbs = oldmat->Mbs;
2334a30f8f8cSSatish Balay   a->Nbs = oldmat->Nbs;
2335a30f8f8cSSatish Balay 
2336a30f8f8cSSatish Balay   a->rstart       = oldmat->rstart;
2337a30f8f8cSSatish Balay   a->rend         = oldmat->rend;
2338a30f8f8cSSatish Balay   a->cstart       = oldmat->cstart;
2339a30f8f8cSSatish Balay   a->cend         = oldmat->cend;
2340a30f8f8cSSatish Balay   a->size         = oldmat->size;
2341a30f8f8cSSatish Balay   a->rank         = oldmat->rank;
2342a30f8f8cSSatish Balay   a->donotstash   = oldmat->donotstash;
2343a30f8f8cSSatish Balay   a->roworiented  = oldmat->roworiented;
2344a30f8f8cSSatish Balay   a->rowindices   = 0;
2345a30f8f8cSSatish Balay   a->rowvalues    = 0;
2346a30f8f8cSSatish Balay   a->getrowactive = PETSC_FALSE;
2347a30f8f8cSSatish Balay   a->barray       = 0;
2348a30f8f8cSSatish Balay   a->rstart_bs    = oldmat->rstart_bs;
2349a30f8f8cSSatish Balay   a->rend_bs      = oldmat->rend_bs;
2350a30f8f8cSSatish Balay   a->cstart_bs    = oldmat->cstart_bs;
2351a30f8f8cSSatish Balay   a->cend_bs      = oldmat->cend_bs;
2352a30f8f8cSSatish Balay 
2353a30f8f8cSSatish Balay   /* hash table stuff */
2354a30f8f8cSSatish Balay   a->ht           = 0;
2355a30f8f8cSSatish Balay   a->hd           = 0;
2356a30f8f8cSSatish Balay   a->ht_size      = 0;
2357a30f8f8cSSatish Balay   a->ht_flag      = oldmat->ht_flag;
2358a30f8f8cSSatish Balay   a->ht_fact      = oldmat->ht_fact;
2359a30f8f8cSSatish Balay   a->ht_total_ct  = 0;
2360a30f8f8cSSatish Balay   a->ht_insert_ct = 0;
2361a30f8f8cSSatish Balay 
2362a30f8f8cSSatish Balay 
2363a30f8f8cSSatish Balay   a->rowners = (int*)PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
2364a30f8f8cSSatish Balay   PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
2365a30f8f8cSSatish Balay   a->cowners    = a->rowners + a->size + 2;
2366a30f8f8cSSatish Balay   a->rowners_bs = a->cowners + a->size + 2;
2367a30f8f8cSSatish Balay   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2368a30f8f8cSSatish Balay   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2369a30f8f8cSSatish Balay   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2370a30f8f8cSSatish Balay   if (oldmat->colmap) {
2371a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE)
2372a30f8f8cSSatish Balay   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2373a30f8f8cSSatish Balay #else
2374a30f8f8cSSatish Balay     a->colmap = (int*)PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
2375a30f8f8cSSatish Balay     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2376a30f8f8cSSatish Balay     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2377a30f8f8cSSatish Balay #endif
2378a30f8f8cSSatish Balay   } else a->colmap = 0;
2379a30f8f8cSSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2380a30f8f8cSSatish Balay     a->garray = (int*)PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray);
2381a30f8f8cSSatish Balay     PLogObjectMemory(mat,len*sizeof(int));
2382a30f8f8cSSatish Balay     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2383a30f8f8cSSatish Balay   } else a->garray = 0;
2384a30f8f8cSSatish Balay 
2385a30f8f8cSSatish Balay   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2386a30f8f8cSSatish Balay   PLogObjectParent(mat,a->lvec);
2387a30f8f8cSSatish Balay   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2388a30f8f8cSSatish Balay 
2389a30f8f8cSSatish Balay   PLogObjectParent(mat,a->Mvctx);
2390a30f8f8cSSatish Balay   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2391a30f8f8cSSatish Balay   PLogObjectParent(mat,a->A);
2392a30f8f8cSSatish Balay   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2393a30f8f8cSSatish Balay   PLogObjectParent(mat,a->B);
2394a30f8f8cSSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
2395a30f8f8cSSatish Balay   ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2396a30f8f8cSSatish Balay   if (flg) {
2397a30f8f8cSSatish Balay     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
2398a30f8f8cSSatish Balay   }
2399a30f8f8cSSatish Balay   *newmat = mat;
2400a30f8f8cSSatish Balay   PetscFunctionReturn(0);
2401a30f8f8cSSatish Balay }
2402a30f8f8cSSatish Balay 
2403a30f8f8cSSatish Balay #include "petscsys.h"
2404a30f8f8cSSatish Balay 
2405a30f8f8cSSatish Balay #undef __FUNC__
2406a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPISBAIJ"
2407a30f8f8cSSatish Balay int MatLoad_MPISBAIJ(Viewer viewer,MatType type,Mat *newmat)
2408a30f8f8cSSatish Balay {
2409a30f8f8cSSatish Balay   Mat          A;
2410a30f8f8cSSatish Balay   int          i,nz,ierr,j,rstart,rend,fd;
2411a30f8f8cSSatish Balay   Scalar       *vals,*buf;
2412a30f8f8cSSatish Balay   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2413a30f8f8cSSatish Balay   MPI_Status   status;
2414a30f8f8cSSatish Balay   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2415a30f8f8cSSatish Balay   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2416a30f8f8cSSatish Balay   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2417a30f8f8cSSatish Balay   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2418a30f8f8cSSatish Balay   int          dcount,kmax,k,nzcount,tmp;
2419a30f8f8cSSatish Balay 
2420a30f8f8cSSatish Balay   PetscFunctionBegin;
2421a30f8f8cSSatish Balay   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2422a30f8f8cSSatish Balay 
2423a30f8f8cSSatish Balay   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2424a30f8f8cSSatish Balay   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2425a30f8f8cSSatish Balay   if (!rank) {
2426a30f8f8cSSatish Balay     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2427a30f8f8cSSatish Balay     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2428a30f8f8cSSatish Balay     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
2429a30f8f8cSSatish Balay     if (header[3] < 0) {
2430a30f8f8cSSatish Balay       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPISBAIJ");
2431a30f8f8cSSatish Balay     }
2432a30f8f8cSSatish Balay   }
2433a30f8f8cSSatish Balay 
2434a30f8f8cSSatish Balay   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2435a30f8f8cSSatish Balay   M = header[1]; N = header[2];
2436a30f8f8cSSatish Balay 
2437a30f8f8cSSatish Balay   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
2438a30f8f8cSSatish Balay 
2439a30f8f8cSSatish Balay   /*
2440a30f8f8cSSatish Balay      This code adds extra rows to make sure the number of rows is
2441a30f8f8cSSatish Balay      divisible by the blocksize
2442a30f8f8cSSatish Balay   */
2443a30f8f8cSSatish Balay   Mbs        = M/bs;
2444a30f8f8cSSatish Balay   extra_rows = bs - M + bs*(Mbs);
2445a30f8f8cSSatish Balay   if (extra_rows == bs) extra_rows = 0;
2446a30f8f8cSSatish Balay   else                  Mbs++;
2447a30f8f8cSSatish Balay   if (extra_rows &&!rank) {
2448a30f8f8cSSatish Balay     PLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n");
2449a30f8f8cSSatish Balay   }
2450a30f8f8cSSatish Balay 
2451a30f8f8cSSatish Balay   /* determine ownership of all rows */
2452a30f8f8cSSatish Balay   mbs = Mbs/size + ((Mbs % size) > rank);
2453a30f8f8cSSatish Balay   m   = mbs * bs;
2454a30f8f8cSSatish Balay   rowners = (int*)PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners);
2455a30f8f8cSSatish Balay   browners = rowners + size + 1;
2456a30f8f8cSSatish Balay   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2457a30f8f8cSSatish Balay   rowners[0] = 0;
2458a30f8f8cSSatish Balay   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2459a30f8f8cSSatish Balay   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2460a30f8f8cSSatish Balay   rstart = rowners[rank];
2461a30f8f8cSSatish Balay   rend   = rowners[rank+1];
2462a30f8f8cSSatish Balay 
2463a30f8f8cSSatish Balay   /* distribute row lengths to all processors */
2464a30f8f8cSSatish Balay   locrowlens = (int*)PetscMalloc((rend-rstart)*bs*sizeof(int));CHKPTRQ(locrowlens);
2465a30f8f8cSSatish Balay   if (!rank) {
2466a30f8f8cSSatish Balay     rowlengths = (int*)PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
2467a30f8f8cSSatish Balay     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2468a30f8f8cSSatish Balay     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2469a30f8f8cSSatish Balay     sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts);
2470a30f8f8cSSatish Balay     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2471a30f8f8cSSatish Balay     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2472a30f8f8cSSatish Balay     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2473a30f8f8cSSatish Balay   } else {
2474a30f8f8cSSatish Balay     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2475a30f8f8cSSatish Balay   }
2476a30f8f8cSSatish Balay 
2477a30f8f8cSSatish Balay   if (!rank) {
2478a30f8f8cSSatish Balay     /* calculate the number of nonzeros on each processor */
2479a30f8f8cSSatish Balay     procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz);
2480a30f8f8cSSatish Balay     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2481a30f8f8cSSatish Balay     for (i=0; i<size; i++) {
2482a30f8f8cSSatish Balay       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2483a30f8f8cSSatish Balay         procsnz[i] += rowlengths[j];
2484a30f8f8cSSatish Balay       }
2485a30f8f8cSSatish Balay     }
2486a30f8f8cSSatish Balay     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2487a30f8f8cSSatish Balay 
2488a30f8f8cSSatish Balay     /* determine max buffer needed and allocate it */
2489a30f8f8cSSatish Balay     maxnz = 0;
2490a30f8f8cSSatish Balay     for (i=0; i<size; i++) {
2491a30f8f8cSSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
2492a30f8f8cSSatish Balay     }
2493a30f8f8cSSatish Balay     cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols);
2494a30f8f8cSSatish Balay 
2495a30f8f8cSSatish Balay     /* read in my part of the matrix column indices  */
2496a30f8f8cSSatish Balay     nz = procsnz[0];
2497a30f8f8cSSatish Balay     ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf);
2498a30f8f8cSSatish Balay     mycols = ibuf;
2499a30f8f8cSSatish Balay     if (size == 1)  nz -= extra_rows;
2500a30f8f8cSSatish Balay     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2501a30f8f8cSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2502a30f8f8cSSatish Balay 
2503a30f8f8cSSatish Balay     /* read in every ones (except the last) and ship off */
2504a30f8f8cSSatish Balay     for (i=1; i<size-1; i++) {
2505a30f8f8cSSatish Balay       nz   = procsnz[i];
2506a30f8f8cSSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2507a30f8f8cSSatish Balay       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2508a30f8f8cSSatish Balay     }
2509a30f8f8cSSatish Balay     /* read in the stuff for the last proc */
2510a30f8f8cSSatish Balay     if (size != 1) {
2511a30f8f8cSSatish Balay       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2512a30f8f8cSSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2513a30f8f8cSSatish Balay       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2514a30f8f8cSSatish Balay       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2515a30f8f8cSSatish Balay     }
2516a30f8f8cSSatish Balay     ierr = PetscFree(cols);CHKERRQ(ierr);
2517a30f8f8cSSatish Balay   } else {
2518a30f8f8cSSatish Balay     /* determine buffer space needed for message */
2519a30f8f8cSSatish Balay     nz = 0;
2520a30f8f8cSSatish Balay     for (i=0; i<m; i++) {
2521a30f8f8cSSatish Balay       nz += locrowlens[i];
2522a30f8f8cSSatish Balay     }
2523a30f8f8cSSatish Balay     ibuf   = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf);
2524a30f8f8cSSatish Balay     mycols = ibuf;
2525a30f8f8cSSatish Balay     /* receive message of column indices*/
2526a30f8f8cSSatish Balay     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2527a30f8f8cSSatish Balay     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2528a30f8f8cSSatish Balay     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2529a30f8f8cSSatish Balay   }
2530a30f8f8cSSatish Balay 
2531a30f8f8cSSatish Balay   /* loop over local rows, determining number of off diagonal entries */
2532a30f8f8cSSatish Balay   dlens  = (int*)PetscMalloc(2*(rend-rstart+1)*sizeof(int));CHKPTRQ(dlens);
2533a30f8f8cSSatish Balay   odlens = dlens + (rend-rstart);
2534a30f8f8cSSatish Balay   mask   = (int*)PetscMalloc(3*Mbs*sizeof(int));CHKPTRQ(mask);
2535a30f8f8cSSatish Balay   ierr   = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2536a30f8f8cSSatish Balay   masked1 = mask    + Mbs;
2537a30f8f8cSSatish Balay   masked2 = masked1 + Mbs;
2538a30f8f8cSSatish Balay   rowcount = 0; nzcount = 0;
2539a30f8f8cSSatish Balay   for (i=0; i<mbs; i++) {
2540a30f8f8cSSatish Balay     dcount  = 0;
2541a30f8f8cSSatish Balay     odcount = 0;
2542a30f8f8cSSatish Balay     for (j=0; j<bs; j++) {
2543a30f8f8cSSatish Balay       kmax = locrowlens[rowcount];
2544a30f8f8cSSatish Balay       for (k=0; k<kmax; k++) {
2545a30f8f8cSSatish Balay         tmp = mycols[nzcount++]/bs;
2546a30f8f8cSSatish Balay         if (!mask[tmp]) {
2547a30f8f8cSSatish Balay           mask[tmp] = 1;
2548a30f8f8cSSatish Balay           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2549a30f8f8cSSatish Balay           else masked1[dcount++] = tmp;
2550a30f8f8cSSatish Balay         }
2551a30f8f8cSSatish Balay       }
2552a30f8f8cSSatish Balay       rowcount++;
2553a30f8f8cSSatish Balay     }
2554a30f8f8cSSatish Balay 
2555a30f8f8cSSatish Balay     dlens[i]  = dcount;
2556a30f8f8cSSatish Balay     odlens[i] = odcount;
2557a30f8f8cSSatish Balay 
2558a30f8f8cSSatish Balay     /* zero out the mask elements we set */
2559a30f8f8cSSatish Balay     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2560a30f8f8cSSatish Balay     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2561a30f8f8cSSatish Balay   }
2562a30f8f8cSSatish Balay 
2563a30f8f8cSSatish Balay   /* create our matrix */
2564a30f8f8cSSatish Balay   ierr = MatCreateMPISBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr);
2565a30f8f8cSSatish Balay   A = *newmat;
2566a30f8f8cSSatish Balay   MatSetOption(A,MAT_COLUMNS_SORTED);
2567a30f8f8cSSatish Balay 
2568a30f8f8cSSatish Balay   if (!rank) {
2569a30f8f8cSSatish Balay     buf = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(buf);
2570a30f8f8cSSatish Balay     /* read in my part of the matrix numerical values  */
2571a30f8f8cSSatish Balay     nz = procsnz[0];
2572a30f8f8cSSatish Balay     vals = buf;
2573a30f8f8cSSatish Balay     mycols = ibuf;
2574a30f8f8cSSatish Balay     if (size == 1)  nz -= extra_rows;
2575a30f8f8cSSatish Balay     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2576a30f8f8cSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2577a30f8f8cSSatish Balay 
2578a30f8f8cSSatish Balay     /* insert into matrix */
2579a30f8f8cSSatish Balay     jj      = rstart*bs;
2580a30f8f8cSSatish Balay     for (i=0; i<m; i++) {
2581a30f8f8cSSatish Balay       ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2582a30f8f8cSSatish Balay       mycols += locrowlens[i];
2583a30f8f8cSSatish Balay       vals   += locrowlens[i];
2584a30f8f8cSSatish Balay       jj++;
2585a30f8f8cSSatish Balay     }
2586a30f8f8cSSatish Balay     /* read in other processors (except the last one) and ship out */
2587a30f8f8cSSatish Balay     for (i=1; i<size-1; i++) {
2588a30f8f8cSSatish Balay       nz   = procsnz[i];
2589a30f8f8cSSatish Balay       vals = buf;
2590a30f8f8cSSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2591a30f8f8cSSatish Balay       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2592a30f8f8cSSatish Balay     }
2593a30f8f8cSSatish Balay     /* the last proc */
2594a30f8f8cSSatish Balay     if (size != 1){
2595a30f8f8cSSatish Balay       nz   = procsnz[i] - extra_rows;
2596a30f8f8cSSatish Balay       vals = buf;
2597a30f8f8cSSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2598a30f8f8cSSatish Balay       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2599a30f8f8cSSatish Balay       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2600a30f8f8cSSatish Balay     }
2601a30f8f8cSSatish Balay     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2602a30f8f8cSSatish Balay   } else {
2603a30f8f8cSSatish Balay     /* receive numeric values */
2604a30f8f8cSSatish Balay     buf = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(buf);
2605a30f8f8cSSatish Balay 
2606a30f8f8cSSatish Balay     /* receive message of values*/
2607a30f8f8cSSatish Balay     vals   = buf;
2608a30f8f8cSSatish Balay     mycols = ibuf;
2609a30f8f8cSSatish Balay     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2610a30f8f8cSSatish Balay     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2611a30f8f8cSSatish Balay     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2612a30f8f8cSSatish Balay 
2613a30f8f8cSSatish Balay     /* insert into matrix */
2614a30f8f8cSSatish Balay     jj      = rstart*bs;
2615a30f8f8cSSatish Balay     for (i=0; i<m; i++) {
2616a30f8f8cSSatish Balay       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2617a30f8f8cSSatish Balay       mycols += locrowlens[i];
2618a30f8f8cSSatish Balay       vals   += locrowlens[i];
2619a30f8f8cSSatish Balay       jj++;
2620a30f8f8cSSatish Balay     }
2621a30f8f8cSSatish Balay   }
2622a30f8f8cSSatish Balay   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2623a30f8f8cSSatish Balay   ierr = PetscFree(buf);CHKERRQ(ierr);
2624a30f8f8cSSatish Balay   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2625a30f8f8cSSatish Balay   ierr = PetscFree(rowners);CHKERRQ(ierr);
2626a30f8f8cSSatish Balay   ierr = PetscFree(dlens);CHKERRQ(ierr);
2627a30f8f8cSSatish Balay   ierr = PetscFree(mask);CHKERRQ(ierr);
2628a30f8f8cSSatish Balay   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2629a30f8f8cSSatish Balay   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2630a30f8f8cSSatish Balay   PetscFunctionReturn(0);
2631a30f8f8cSSatish Balay }
2632a30f8f8cSSatish Balay 
2633a30f8f8cSSatish Balay #undef __FUNC__
2634a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatMPISBAIJSetHashTableFactor"
2635a30f8f8cSSatish Balay /*@
2636a30f8f8cSSatish Balay    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2637a30f8f8cSSatish Balay 
2638a30f8f8cSSatish Balay    Input Parameters:
2639a30f8f8cSSatish Balay .  mat  - the matrix
2640a30f8f8cSSatish Balay .  fact - factor
2641a30f8f8cSSatish Balay 
2642a30f8f8cSSatish Balay    Collective on Mat
2643a30f8f8cSSatish Balay 
2644a30f8f8cSSatish Balay    Level: advanced
2645a30f8f8cSSatish Balay 
2646a30f8f8cSSatish Balay   Notes:
2647a30f8f8cSSatish Balay    This can also be set by the command line option: -mat_use_hash_table fact
2648a30f8f8cSSatish Balay 
2649a30f8f8cSSatish Balay .keywords: matrix, hashtable, factor, HT
2650a30f8f8cSSatish Balay 
2651a30f8f8cSSatish Balay .seealso: MatSetOption()
2652a30f8f8cSSatish Balay @*/
2653a30f8f8cSSatish Balay int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2654a30f8f8cSSatish Balay {
2655a30f8f8cSSatish Balay   Mat_MPIBAIJ *baij;
2656a30f8f8cSSatish Balay 
2657a30f8f8cSSatish Balay   PetscFunctionBegin;
2658a30f8f8cSSatish Balay   PetscValidHeaderSpecific(mat,MAT_COOKIE);
2659a30f8f8cSSatish Balay   if (mat->type != MATMPIBAIJ) {
2660a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only.");
2661a30f8f8cSSatish Balay   }
2662a30f8f8cSSatish Balay   baij = (Mat_MPIBAIJ*)mat->data;
2663a30f8f8cSSatish Balay   baij->ht_fact = fact;
2664a30f8f8cSSatish Balay   PetscFunctionReturn(0);
2665a30f8f8cSSatish Balay }
2666