xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision de8b6608baf6aee60dffeb7dae859cca4fb467b0)
1*de8b6608SHong Zhang /*$Id: mpisbaij.c,v 1.7 2000/07/19 16:34:18 hzhang Exp hzhang $*/
2a30f8f8cSSatish Balay 
3a30f8f8cSSatish Balay #include "src/mat/impls/baij/mpi/mpibaij.h"
4a30f8f8cSSatish Balay #include "src/vec/vecimpl.h"
5a30f8f8cSSatish Balay #include "mpisbaij.h"
6a30f8f8cSSatish Balay #include "src/mat/impls/sbaij/seq/sbaij.h"
7a30f8f8cSSatish Balay 
8a30f8f8cSSatish Balay extern int MatSetUpMultiply_MPISBAIJ(Mat);
9a30f8f8cSSatish Balay extern int DisAssemble_MPISBAIJ(Mat);
10a30f8f8cSSatish Balay extern int MatIncreaseOverlap_MPISBAIJ(Mat,int,IS *,int);
11a30f8f8cSSatish Balay extern int MatGetSubMatrices_MPISBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
12a30f8f8cSSatish Balay extern int MatGetValues_SeqSBAIJ(Mat,int,int *,int,int *,Scalar *);
13a30f8f8cSSatish Balay extern int MatSetValues_SeqSBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode);
14a30f8f8cSSatish Balay extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
15a30f8f8cSSatish Balay extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,Scalar**);
16a30f8f8cSSatish Balay extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,Scalar**);
17a30f8f8cSSatish Balay extern int MatPrintHelp_SeqSBAIJ(Mat);
18a30f8f8cSSatish Balay extern int MatZeroRows_SeqSBAIJ(Mat,IS,Scalar*);
19a30f8f8cSSatish Balay 
20a30f8f8cSSatish Balay /*  UGLY, ugly, ugly
21a30f8f8cSSatish Balay    When MatScalar == Scalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
22a30f8f8cSSatish Balay    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
23a30f8f8cSSatish Balay    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
24a30f8f8cSSatish Balay    converts the entries into single precision and then calls ..._MatScalar() to put them
25a30f8f8cSSatish Balay    into the single precision data structures.
26a30f8f8cSSatish Balay */
27a30f8f8cSSatish Balay #if defined(PETSC_USE_MAT_SINGLE)
28a30f8f8cSSatish Balay extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
29a30f8f8cSSatish Balay extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
30a30f8f8cSSatish Balay extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
31a30f8f8cSSatish Balay extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
32a30f8f8cSSatish Balay extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
33a30f8f8cSSatish Balay #else
34a30f8f8cSSatish Balay #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
35a30f8f8cSSatish Balay #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
36a30f8f8cSSatish Balay #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
37a30f8f8cSSatish Balay #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
38a30f8f8cSSatish Balay #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
39a30f8f8cSSatish Balay #endif
40a30f8f8cSSatish Balay 
41a30f8f8cSSatish Balay EXTERN_C_BEGIN
42a30f8f8cSSatish Balay #undef __FUNC__
43a30f8f8cSSatish Balay #define __FUNC__ /*<a name="MatStoreValues_MPIBAIJ"></a>*/"MatStoreValues_MPIBAIJ"
44a30f8f8cSSatish Balay int MatStoreValues_MPISBAIJ(Mat mat)
45a30f8f8cSSatish Balay {
46a30f8f8cSSatish Balay   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
47a30f8f8cSSatish Balay   int         ierr;
48a30f8f8cSSatish Balay 
49a30f8f8cSSatish Balay   PetscFunctionBegin;
50a30f8f8cSSatish Balay   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
51a30f8f8cSSatish Balay   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
52a30f8f8cSSatish Balay   PetscFunctionReturn(0);
53a30f8f8cSSatish Balay }
54a30f8f8cSSatish Balay EXTERN_C_END
55a30f8f8cSSatish Balay 
56a30f8f8cSSatish Balay EXTERN_C_BEGIN
57a30f8f8cSSatish Balay #undef __FUNC__
58a30f8f8cSSatish Balay #define __FUNC__ /*<a name="MatRetrieveValues_MPIBAIJ"></a>*/"MatRetrieveValues_MPIBAIJ"
59a30f8f8cSSatish Balay int MatRetrieveValues_MPISBAIJ(Mat mat)
60a30f8f8cSSatish Balay {
61a30f8f8cSSatish Balay   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
62a30f8f8cSSatish Balay   int         ierr;
63a30f8f8cSSatish Balay 
64a30f8f8cSSatish Balay   PetscFunctionBegin;
65a30f8f8cSSatish Balay   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
66a30f8f8cSSatish Balay   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
67a30f8f8cSSatish Balay   PetscFunctionReturn(0);
68a30f8f8cSSatish Balay }
69a30f8f8cSSatish Balay EXTERN_C_END
70a30f8f8cSSatish Balay 
71a30f8f8cSSatish Balay /*
72a30f8f8cSSatish Balay      Local utility routine that creates a mapping from the global column
73a30f8f8cSSatish Balay    number to the local number in the off-diagonal part of the local
74a30f8f8cSSatish Balay    storage of the matrix.  This is done in a non scable way since the
75a30f8f8cSSatish Balay    length of colmap equals the global matrix length.
76a30f8f8cSSatish Balay */
77a30f8f8cSSatish Balay #undef __FUNC__
78a30f8f8cSSatish Balay #define __FUNC__ /*<a name="CreateColmap_MPISBAIJ_Private"></a>*/"CreateColmap_MPISBAIJ_Private"
79a30f8f8cSSatish Balay static int CreateColmap_MPISBAIJ_Private(Mat mat)
80a30f8f8cSSatish Balay {
81a30f8f8cSSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
82a30f8f8cSSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data;
83a30f8f8cSSatish Balay   int         nbs = B->nbs,i,bs=B->bs,ierr;
84a30f8f8cSSatish Balay 
85a30f8f8cSSatish Balay   PetscFunctionBegin;
86a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE)
87a30f8f8cSSatish Balay   ierr = PetscTableCreate(baij->nbs/5,&baij->colmap);CHKERRQ(ierr);
88a30f8f8cSSatish Balay   for (i=0; i<nbs; i++){
89a30f8f8cSSatish Balay     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
90a30f8f8cSSatish Balay   }
91a30f8f8cSSatish Balay #else
92a30f8f8cSSatish Balay   baij->colmap = (int*)PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
93a30f8f8cSSatish Balay   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
94a30f8f8cSSatish Balay   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr);
95a30f8f8cSSatish Balay   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
96a30f8f8cSSatish Balay #endif
97a30f8f8cSSatish Balay   PetscFunctionReturn(0);
98a30f8f8cSSatish Balay }
99a30f8f8cSSatish Balay 
100a30f8f8cSSatish Balay #define CHUNKSIZE  10
101a30f8f8cSSatish Balay 
102a30f8f8cSSatish Balay #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
103a30f8f8cSSatish Balay { \
104a30f8f8cSSatish Balay  \
105a30f8f8cSSatish Balay     brow = row/bs;  \
106a30f8f8cSSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
107a30f8f8cSSatish Balay     rmax = aimax[brow]; nrow = ailen[brow]; \
108a30f8f8cSSatish Balay       bcol = col/bs; \
109a30f8f8cSSatish Balay       ridx = row % bs; cidx = col % bs; \
110a30f8f8cSSatish Balay       low = 0; high = nrow; \
111a30f8f8cSSatish Balay       while (high-low > 3) { \
112a30f8f8cSSatish Balay         t = (low+high)/2; \
113a30f8f8cSSatish Balay         if (rp[t] > bcol) high = t; \
114a30f8f8cSSatish Balay         else              low  = t; \
115a30f8f8cSSatish Balay       } \
116a30f8f8cSSatish Balay       for (_i=low; _i<high; _i++) { \
117a30f8f8cSSatish Balay         if (rp[_i] > bcol) break; \
118a30f8f8cSSatish Balay         if (rp[_i] == bcol) { \
119a30f8f8cSSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
120a30f8f8cSSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
121a30f8f8cSSatish Balay           else                    *bap  = value;  \
122a30f8f8cSSatish Balay           goto a_noinsert; \
123a30f8f8cSSatish Balay         } \
124a30f8f8cSSatish Balay       } \
125a30f8f8cSSatish Balay       if (a->nonew == 1) goto a_noinsert; \
126a30f8f8cSSatish Balay       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
127a30f8f8cSSatish Balay       if (nrow >= rmax) { \
128a30f8f8cSSatish Balay         /* there is no extra room in row, therefore enlarge */ \
129a30f8f8cSSatish Balay         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
130a30f8f8cSSatish Balay         MatScalar *new_a; \
131a30f8f8cSSatish Balay  \
132a30f8f8cSSatish Balay         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
133a30f8f8cSSatish Balay  \
134a30f8f8cSSatish Balay         /* malloc new storage space */ \
135a30f8f8cSSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
136a30f8f8cSSatish Balay         new_a   = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \
137a30f8f8cSSatish Balay         new_j   = (int*)(new_a + bs2*new_nz); \
138a30f8f8cSSatish Balay         new_i   = new_j + new_nz; \
139a30f8f8cSSatish Balay  \
140a30f8f8cSSatish Balay         /* copy over old data into new slots */ \
141a30f8f8cSSatish Balay         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
142a30f8f8cSSatish Balay         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
143a30f8f8cSSatish Balay         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
144a30f8f8cSSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
145a30f8f8cSSatish Balay         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
146a30f8f8cSSatish Balay         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
147a30f8f8cSSatish Balay         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \
148a30f8f8cSSatish Balay         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
149a30f8f8cSSatish Balay                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
150a30f8f8cSSatish Balay         /* free up old matrix storage */ \
151a30f8f8cSSatish Balay         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
152a30f8f8cSSatish Balay         if (!a->singlemalloc) { \
153a30f8f8cSSatish Balay           ierr = PetscFree(a->i);CHKERRQ(ierr); \
154a30f8f8cSSatish Balay           ierr = PetscFree(a->j);CHKERRQ(ierr);\
155a30f8f8cSSatish Balay         } \
156a30f8f8cSSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
157a30f8f8cSSatish Balay         a->singlemalloc = PETSC_TRUE; \
158a30f8f8cSSatish Balay  \
159a30f8f8cSSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
160a30f8f8cSSatish Balay         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
161a30f8f8cSSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
162a30f8f8cSSatish Balay         a->s_maxnz += bs2*CHUNKSIZE; \
163a30f8f8cSSatish Balay         a->reallocs++; \
164a30f8f8cSSatish Balay         a->s_nz++; \
165a30f8f8cSSatish Balay       } \
166a30f8f8cSSatish Balay       N = nrow++ - 1;  \
167a30f8f8cSSatish Balay       /* shift up all the later entries in this row */ \
168a30f8f8cSSatish Balay       for (ii=N; ii>=_i; ii--) { \
169a30f8f8cSSatish Balay         rp[ii+1] = rp[ii]; \
170a30f8f8cSSatish Balay         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
171a30f8f8cSSatish Balay       } \
172a30f8f8cSSatish Balay       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
173a30f8f8cSSatish Balay       rp[_i]                      = bcol;  \
174a30f8f8cSSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
175a30f8f8cSSatish Balay       a_noinsert:; \
176a30f8f8cSSatish Balay     ailen[brow] = nrow; \
177a30f8f8cSSatish Balay }
178a30f8f8cSSatish Balay #ifndef MatSetValues_SeqBAIJ_B_Private
179a30f8f8cSSatish Balay #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
180a30f8f8cSSatish Balay { \
181a30f8f8cSSatish Balay     brow = row/bs;  \
182a30f8f8cSSatish Balay     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
183a30f8f8cSSatish Balay     rmax = bimax[brow]; nrow = bilen[brow]; \
184a30f8f8cSSatish Balay       bcol = col/bs; \
185a30f8f8cSSatish Balay       ridx = row % bs; cidx = col % bs; \
186a30f8f8cSSatish Balay       low = 0; high = nrow; \
187a30f8f8cSSatish Balay       while (high-low > 3) { \
188a30f8f8cSSatish Balay         t = (low+high)/2; \
189a30f8f8cSSatish Balay         if (rp[t] > bcol) high = t; \
190a30f8f8cSSatish Balay         else              low  = t; \
191a30f8f8cSSatish Balay       } \
192a30f8f8cSSatish Balay       for (_i=low; _i<high; _i++) { \
193a30f8f8cSSatish Balay         if (rp[_i] > bcol) break; \
194a30f8f8cSSatish Balay         if (rp[_i] == bcol) { \
195a30f8f8cSSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
196a30f8f8cSSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
197a30f8f8cSSatish Balay           else                    *bap  = value;  \
198a30f8f8cSSatish Balay           goto b_noinsert; \
199a30f8f8cSSatish Balay         } \
200a30f8f8cSSatish Balay       } \
201a30f8f8cSSatish Balay       if (b->nonew == 1) goto b_noinsert; \
202a30f8f8cSSatish Balay       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
203a30f8f8cSSatish Balay       if (nrow >= rmax) { \
204a30f8f8cSSatish Balay         /* there is no extra room in row, therefore enlarge */ \
205a30f8f8cSSatish Balay         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
206a30f8f8cSSatish Balay         MatScalar *new_a; \
207a30f8f8cSSatish Balay  \
208a30f8f8cSSatish Balay         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
209a30f8f8cSSatish Balay  \
210a30f8f8cSSatish Balay         /* malloc new storage space */ \
211a30f8f8cSSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
212a30f8f8cSSatish Balay         new_a   = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \
213a30f8f8cSSatish Balay         new_j   = (int*)(new_a + bs2*new_nz); \
214a30f8f8cSSatish Balay         new_i   = new_j + new_nz; \
215a30f8f8cSSatish Balay  \
216a30f8f8cSSatish Balay         /* copy over old data into new slots */ \
217a30f8f8cSSatish Balay         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
218a30f8f8cSSatish Balay         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
219a30f8f8cSSatish Balay         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
220a30f8f8cSSatish Balay         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
221a30f8f8cSSatish Balay         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
222a30f8f8cSSatish Balay         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
223a30f8f8cSSatish Balay         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
224a30f8f8cSSatish Balay         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
225a30f8f8cSSatish Balay                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
226a30f8f8cSSatish Balay         /* free up old matrix storage */ \
227a30f8f8cSSatish Balay         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
228a30f8f8cSSatish Balay         if (!b->singlemalloc) { \
229a30f8f8cSSatish Balay           ierr = PetscFree(b->i);CHKERRQ(ierr); \
230a30f8f8cSSatish Balay           ierr = PetscFree(b->j);CHKERRQ(ierr); \
231a30f8f8cSSatish Balay         } \
232a30f8f8cSSatish Balay         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
233a30f8f8cSSatish Balay         b->singlemalloc = PETSC_TRUE; \
234a30f8f8cSSatish Balay  \
235a30f8f8cSSatish Balay         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
236a30f8f8cSSatish Balay         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
237a30f8f8cSSatish Balay         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
238a30f8f8cSSatish Balay         b->maxnz += bs2*CHUNKSIZE; \
239a30f8f8cSSatish Balay         b->reallocs++; \
240a30f8f8cSSatish Balay         b->nz++; \
241a30f8f8cSSatish Balay       } \
242a30f8f8cSSatish Balay       N = nrow++ - 1;  \
243a30f8f8cSSatish Balay       /* shift up all the later entries in this row */ \
244a30f8f8cSSatish Balay       for (ii=N; ii>=_i; ii--) { \
245a30f8f8cSSatish Balay         rp[ii+1] = rp[ii]; \
246a30f8f8cSSatish Balay         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
247a30f8f8cSSatish Balay       } \
248a30f8f8cSSatish Balay       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
249a30f8f8cSSatish Balay       rp[_i]                      = bcol;  \
250a30f8f8cSSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
251a30f8f8cSSatish Balay       b_noinsert:; \
252a30f8f8cSSatish Balay     bilen[brow] = nrow; \
253a30f8f8cSSatish Balay }
254a30f8f8cSSatish Balay #endif
255a30f8f8cSSatish Balay 
256a30f8f8cSSatish Balay #if defined(PETSC_USE_MAT_SINGLE)
257a30f8f8cSSatish Balay #undef __FUNC__
258a30f8f8cSSatish Balay #define __FUNC__ /*<a name="MatSetValues_MPISBAIJ"></a>*/"MatSetValues_MPISBAIJ"
259a30f8f8cSSatish Balay int MatSetValues_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
260a30f8f8cSSatish Balay {
261a30f8f8cSSatish Balay   Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data;
262a30f8f8cSSatish Balay   int         ierr,i,N = m*n;
263a30f8f8cSSatish Balay   MatScalar   *vsingle;
264a30f8f8cSSatish Balay 
265a30f8f8cSSatish Balay   PetscFunctionBegin;
266a30f8f8cSSatish Balay   if (N > b->setvalueslen) {
267a30f8f8cSSatish Balay     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
268a30f8f8cSSatish Balay     b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy);
269a30f8f8cSSatish Balay     b->setvalueslen  = N;
270a30f8f8cSSatish Balay   }
271a30f8f8cSSatish Balay   vsingle = b->setvaluescopy;
272a30f8f8cSSatish Balay 
273a30f8f8cSSatish Balay   for (i=0; i<N; i++) {
274a30f8f8cSSatish Balay     vsingle[i] = v[i];
275a30f8f8cSSatish Balay   }
276a30f8f8cSSatish Balay   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
277a30f8f8cSSatish Balay   PetscFunctionReturn(0);
278a30f8f8cSSatish Balay }
279a30f8f8cSSatish Balay 
280a30f8f8cSSatish Balay #undef __FUNC__
281a30f8f8cSSatish Balay #define __FUNC__ /*<a name="MatSetValuesBlocked_MPISBAIJ"></a>*/"MatSetValuesBlocked_MPISBAIJ"
282a30f8f8cSSatish Balay int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
283a30f8f8cSSatish Balay {
284a30f8f8cSSatish Balay   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
285a30f8f8cSSatish Balay   int         ierr,i,N = m*n*b->bs2;
286a30f8f8cSSatish Balay   MatScalar   *vsingle;
287a30f8f8cSSatish Balay 
288a30f8f8cSSatish Balay   PetscFunctionBegin;
289a30f8f8cSSatish Balay   if (N > b->setvalueslen) {
290a30f8f8cSSatish Balay     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
291a30f8f8cSSatish Balay     b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy);
292a30f8f8cSSatish Balay     b->setvalueslen  = N;
293a30f8f8cSSatish Balay   }
294a30f8f8cSSatish Balay   vsingle = b->setvaluescopy;
295a30f8f8cSSatish Balay   for (i=0; i<N; i++) {
296a30f8f8cSSatish Balay     vsingle[i] = v[i];
297a30f8f8cSSatish Balay   }
298a30f8f8cSSatish Balay   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
299a30f8f8cSSatish Balay   PetscFunctionReturn(0);
300a30f8f8cSSatish Balay }
301a30f8f8cSSatish Balay 
302a30f8f8cSSatish Balay #undef __FUNC__
303a30f8f8cSSatish Balay #define __FUNC__ /*<a name="MatSetValues_MPISBAIJ_HT"></a>*/"MatSetValues_MPISBAIJ_HT"
304a30f8f8cSSatish Balay int MatSetValues_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
305a30f8f8cSSatish Balay {
306a30f8f8cSSatish Balay   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
307a30f8f8cSSatish Balay   int         ierr,i,N = m*n;
308a30f8f8cSSatish Balay   MatScalar   *vsingle;
309a30f8f8cSSatish Balay 
310a30f8f8cSSatish Balay   PetscFunctionBegin;
311a30f8f8cSSatish Balay   if (N > b->setvalueslen) {
312a30f8f8cSSatish Balay     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
313a30f8f8cSSatish Balay     b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy);
314a30f8f8cSSatish Balay     b->setvalueslen  = N;
315a30f8f8cSSatish Balay   }
316a30f8f8cSSatish Balay   vsingle = b->setvaluescopy;
317a30f8f8cSSatish Balay   for (i=0; i<N; i++) {
318a30f8f8cSSatish Balay     vsingle[i] = v[i];
319a30f8f8cSSatish Balay   }
320a30f8f8cSSatish Balay   ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
321a30f8f8cSSatish Balay   PetscFunctionReturn(0);
322a30f8f8cSSatish Balay }
323a30f8f8cSSatish Balay 
324a30f8f8cSSatish Balay #undef __FUNC__
325a30f8f8cSSatish Balay #define __FUNC__ /*<a name="MatSetValuesBlocked_MPISBAIJ_HT"></a>*/"MatSetValuesBlocked_MPISBAIJ_HT"
326a30f8f8cSSatish Balay int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
327a30f8f8cSSatish Balay {
328a30f8f8cSSatish Balay   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
329a30f8f8cSSatish Balay   int         ierr,i,N = m*n*b->bs2;
330a30f8f8cSSatish Balay   MatScalar   *vsingle;
331a30f8f8cSSatish Balay 
332a30f8f8cSSatish Balay   PetscFunctionBegin;
333a30f8f8cSSatish Balay   if (N > b->setvalueslen) {
334a30f8f8cSSatish Balay     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
335a30f8f8cSSatish Balay     b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy);
336a30f8f8cSSatish Balay     b->setvalueslen  = N;
337a30f8f8cSSatish Balay   }
338a30f8f8cSSatish Balay   vsingle = b->setvaluescopy;
339a30f8f8cSSatish Balay   for (i=0; i<N; i++) {
340a30f8f8cSSatish Balay     vsingle[i] = v[i];
341a30f8f8cSSatish Balay   }
342a30f8f8cSSatish Balay   ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
343a30f8f8cSSatish Balay   PetscFunctionReturn(0);
344a30f8f8cSSatish Balay }
345a30f8f8cSSatish Balay #endif
346a30f8f8cSSatish Balay 
347a30f8f8cSSatish Balay /* Only add/insert a(i,j) with i<=j (blocks).
348a30f8f8cSSatish Balay    Any a(i,j) with i>j input by user is ingored.
349a30f8f8cSSatish Balay */
350a30f8f8cSSatish Balay #undef __FUNC__
351a30f8f8cSSatish Balay #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ"></a>*/"MatSetValues_MPIBAIJ"
352a30f8f8cSSatish Balay int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
353a30f8f8cSSatish Balay {
354a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
355a30f8f8cSSatish Balay   MatScalar   value;
356a30f8f8cSSatish Balay   int         ierr,i,j,row,col;
357a30f8f8cSSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
358a30f8f8cSSatish Balay   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
359a30f8f8cSSatish Balay   int         cend_orig=baij->cend_bs,bs=baij->bs;
360a30f8f8cSSatish Balay 
361a30f8f8cSSatish Balay   /* Some Variables required in the macro */
362a30f8f8cSSatish Balay   Mat         A = baij->A;
363a30f8f8cSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data;
364a30f8f8cSSatish Balay   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
365a30f8f8cSSatish Balay   MatScalar   *aa=a->a;
366a30f8f8cSSatish Balay 
367a30f8f8cSSatish Balay   Mat         B = baij->B;
368a30f8f8cSSatish Balay   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data;
369a30f8f8cSSatish Balay   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
370a30f8f8cSSatish Balay   MatScalar   *ba=b->a;
371a30f8f8cSSatish Balay 
372a30f8f8cSSatish Balay   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
373a30f8f8cSSatish Balay   int         low,high,t,ridx,cidx,bs2=a->bs2;
374a30f8f8cSSatish Balay   MatScalar   *ap,*bap;
375a30f8f8cSSatish Balay 
376a30f8f8cSSatish Balay   /* for stash */
377f65c83cfSHong Zhang   int         n_loc, *in_loc=0;
378f65c83cfSHong Zhang   MatScalar   *v_loc=0;
379a30f8f8cSSatish Balay 
380a30f8f8cSSatish Balay   PetscFunctionBegin;
381a30f8f8cSSatish Balay 
382a30f8f8cSSatish Balay   if(!baij->donotstash){
383a30f8f8cSSatish Balay     in_loc = (int*)PetscMalloc(n*sizeof(int)); CHKPTRQ(in_loc);
384a30f8f8cSSatish Balay     v_loc  = (MatScalar*)PetscMalloc(n*sizeof(MatScalar)); CHKPTRQ(v_loc);
385a30f8f8cSSatish Balay   }
386a30f8f8cSSatish Balay 
387a30f8f8cSSatish Balay   for (i=0; i<m; i++) {
388a30f8f8cSSatish Balay     if (im[i] < 0) continue;
389a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
390a30f8f8cSSatish Balay     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
391a30f8f8cSSatish Balay #endif
392a30f8f8cSSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
393a30f8f8cSSatish Balay       row = im[i] - rstart_orig;              /* local row index */
394a30f8f8cSSatish Balay       for (j=0; j<n; j++) {
395f65c83cfSHong Zhang         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
396a30f8f8cSSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
397a30f8f8cSSatish Balay           col = in[j] - cstart_orig;          /* local col index */
398a30f8f8cSSatish Balay           brow = row/bs; bcol = col/bs;
399a30f8f8cSSatish Balay           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
400a30f8f8cSSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
401a30f8f8cSSatish Balay           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
402a30f8f8cSSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
403a30f8f8cSSatish Balay         } else if (in[j] < 0) continue;
404a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
405a30f8f8cSSatish Balay         else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");}
406a30f8f8cSSatish Balay #endif
407a30f8f8cSSatish Balay         else {  /* off-diag entry (B) */
408a30f8f8cSSatish Balay           if (mat->was_assembled) {
409a30f8f8cSSatish Balay             if (!baij->colmap) {
410a30f8f8cSSatish Balay               ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr);
411a30f8f8cSSatish Balay             }
412a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE)
413a30f8f8cSSatish Balay             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
414a30f8f8cSSatish Balay             col  = col - 1 + in[j]%bs;
415a30f8f8cSSatish Balay #else
416a30f8f8cSSatish Balay             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
417a30f8f8cSSatish Balay #endif
418a30f8f8cSSatish Balay             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
419a30f8f8cSSatish Balay               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
420a30f8f8cSSatish Balay               col =  in[j];
421a30f8f8cSSatish Balay               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
422a30f8f8cSSatish Balay               B = baij->B;
423a30f8f8cSSatish Balay               b = (Mat_SeqBAIJ*)(B)->data;
424a30f8f8cSSatish Balay               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
425a30f8f8cSSatish Balay               ba=b->a;
426a30f8f8cSSatish Balay             }
427a30f8f8cSSatish Balay           } else col = in[j];
428a30f8f8cSSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
429a30f8f8cSSatish Balay           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
430a30f8f8cSSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
431a30f8f8cSSatish Balay         }
432a30f8f8cSSatish Balay       }
433a30f8f8cSSatish Balay     } else {  /* off processor entry */
434a30f8f8cSSatish Balay       if (!baij->donotstash) {
435a30f8f8cSSatish Balay         n_loc = 0;
436a30f8f8cSSatish Balay         for (j=0; j<n; j++){
437f65c83cfSHong Zhang           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
438a30f8f8cSSatish Balay           in_loc[n_loc] = in[j];
439a30f8f8cSSatish Balay           if (roworiented) {
440a30f8f8cSSatish Balay             v_loc[n_loc] = v[i*n+j];
441a30f8f8cSSatish Balay           } else {
442a30f8f8cSSatish Balay             v_loc[n_loc] = v[j*m+i];
443a30f8f8cSSatish Balay           }
444a30f8f8cSSatish Balay           n_loc++;
445a30f8f8cSSatish Balay         }
446a30f8f8cSSatish Balay         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
447a30f8f8cSSatish Balay       }
448a30f8f8cSSatish Balay     }
449a30f8f8cSSatish Balay   }
450a30f8f8cSSatish Balay 
451a30f8f8cSSatish Balay   if(!baij->donotstash){
452a30f8f8cSSatish Balay     ierr = PetscFree(in_loc);CHKERRQ(ierr);
453a30f8f8cSSatish Balay     ierr = PetscFree(v_loc);CHKERRQ(ierr);
454a30f8f8cSSatish Balay   }
455a30f8f8cSSatish Balay   PetscFunctionReturn(0);
456a30f8f8cSSatish Balay }
457a30f8f8cSSatish Balay 
458a30f8f8cSSatish Balay #undef __FUNC__
459a30f8f8cSSatish Balay #define __FUNC__ /*<a name="MatSetValuesBlocked_MPISBAIJ"></a>*/"MatSetValuesBlocked_MPISBAIJ"
460a30f8f8cSSatish Balay int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
461a30f8f8cSSatish Balay {
462a30f8f8cSSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
463a30f8f8cSSatish Balay   MatScalar   *value,*barray=baij->barray;
464a30f8f8cSSatish Balay   int         ierr,i,j,ii,jj,row,col;
465a30f8f8cSSatish Balay   int         roworiented = baij->roworiented,rstart=baij->rstart ;
466a30f8f8cSSatish Balay   int         rend=baij->rend,cstart=baij->cstart,stepval;
467a30f8f8cSSatish Balay   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
468a30f8f8cSSatish Balay 
469a30f8f8cSSatish Balay   PetscFunctionBegin;
470a30f8f8cSSatish Balay   if(!barray) {
471a30f8f8cSSatish Balay     baij->barray = barray = (MatScalar*)PetscMalloc(bs2*sizeof(MatScalar));CHKPTRQ(barray);
472a30f8f8cSSatish Balay   }
473a30f8f8cSSatish Balay 
474a30f8f8cSSatish Balay   if (roworiented) {
475a30f8f8cSSatish Balay     stepval = (n-1)*bs;
476a30f8f8cSSatish Balay   } else {
477a30f8f8cSSatish Balay     stepval = (m-1)*bs;
478a30f8f8cSSatish Balay   }
479a30f8f8cSSatish Balay   for (i=0; i<m; i++) {
480a30f8f8cSSatish Balay     if (im[i] < 0) continue;
481a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
482a30f8f8cSSatish Balay     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs);
483a30f8f8cSSatish Balay #endif
484a30f8f8cSSatish Balay     if (im[i] >= rstart && im[i] < rend) {
485a30f8f8cSSatish Balay       row = im[i] - rstart;
486a30f8f8cSSatish Balay       for (j=0; j<n; j++) {
487a30f8f8cSSatish Balay         /* If NumCol = 1 then a copy is not required */
488a30f8f8cSSatish Balay         if ((roworiented) && (n == 1)) {
489a30f8f8cSSatish Balay           barray = v + i*bs2;
490a30f8f8cSSatish Balay         } else if((!roworiented) && (m == 1)) {
491a30f8f8cSSatish Balay           barray = v + j*bs2;
492a30f8f8cSSatish Balay         } else { /* Here a copy is required */
493a30f8f8cSSatish Balay           if (roworiented) {
494a30f8f8cSSatish Balay             value = v + i*(stepval+bs)*bs + j*bs;
495a30f8f8cSSatish Balay           } else {
496a30f8f8cSSatish Balay             value = v + j*(stepval+bs)*bs + i*bs;
497a30f8f8cSSatish Balay           }
498a30f8f8cSSatish Balay           for (ii=0; ii<bs; ii++,value+=stepval) {
499a30f8f8cSSatish Balay             for (jj=0; jj<bs; jj++) {
500a30f8f8cSSatish Balay               *barray++  = *value++;
501a30f8f8cSSatish Balay             }
502a30f8f8cSSatish Balay           }
503a30f8f8cSSatish Balay           barray -=bs2;
504a30f8f8cSSatish Balay         }
505a30f8f8cSSatish Balay 
506a30f8f8cSSatish Balay         if (in[j] >= cstart && in[j] < cend){
507a30f8f8cSSatish Balay           col  = in[j] - cstart;
508a30f8f8cSSatish Balay           ierr = MatSetValuesBlocked_SeqSBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
509a30f8f8cSSatish Balay         }
510a30f8f8cSSatish Balay         else if (in[j] < 0) continue;
511a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
512a30f8f8cSSatish Balay         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);}
513a30f8f8cSSatish Balay #endif
514a30f8f8cSSatish Balay         else {
515a30f8f8cSSatish Balay           if (mat->was_assembled) {
516a30f8f8cSSatish Balay             if (!baij->colmap) {
517a30f8f8cSSatish Balay               ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr);
518a30f8f8cSSatish Balay             }
519a30f8f8cSSatish Balay 
520a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
521a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE)
522a30f8f8cSSatish Balay             { int data;
523a30f8f8cSSatish Balay               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
524a30f8f8cSSatish Balay               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");
525a30f8f8cSSatish Balay             }
526a30f8f8cSSatish Balay #else
527a30f8f8cSSatish Balay             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");
528a30f8f8cSSatish Balay #endif
529a30f8f8cSSatish Balay #endif
530a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE)
531a30f8f8cSSatish Balay 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
532a30f8f8cSSatish Balay             col  = (col - 1)/bs;
533a30f8f8cSSatish Balay #else
534a30f8f8cSSatish Balay             col = (baij->colmap[in[j]] - 1)/bs;
535a30f8f8cSSatish Balay #endif
536a30f8f8cSSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
537c8407628SSatish Balay               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
538a30f8f8cSSatish Balay               col =  in[j];
539a30f8f8cSSatish Balay             }
540a30f8f8cSSatish Balay           }
541a30f8f8cSSatish Balay           else col = in[j];
542a30f8f8cSSatish Balay           ierr = MatSetValuesBlocked_SeqSBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
543a30f8f8cSSatish Balay         }
544a30f8f8cSSatish Balay       }
545a30f8f8cSSatish Balay     } else {
546a30f8f8cSSatish Balay       if (!baij->donotstash) {
547a30f8f8cSSatish Balay         if (roworiented) {
548a30f8f8cSSatish Balay           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
549a30f8f8cSSatish Balay         } else {
550a30f8f8cSSatish Balay           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
551a30f8f8cSSatish Balay         }
552a30f8f8cSSatish Balay       }
553a30f8f8cSSatish Balay     }
554a30f8f8cSSatish Balay   }
555a30f8f8cSSatish Balay   PetscFunctionReturn(0);
556a30f8f8cSSatish Balay }
557a30f8f8cSSatish Balay 
558a30f8f8cSSatish Balay #define HASH_KEY 0.6180339887
559a30f8f8cSSatish Balay #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
560a30f8f8cSSatish Balay /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
561a30f8f8cSSatish Balay /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
562a30f8f8cSSatish Balay #undef __FUNC__
563a30f8f8cSSatish Balay #define __FUNC__ /*<a name="MatSetValues_MPISBAIJ_HT_MatScalar"></a>*/"MatSetValues_MPISBAIJ_HT_MatScalar"
564a30f8f8cSSatish Balay int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
565a30f8f8cSSatish Balay {
566a30f8f8cSSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
567a30f8f8cSSatish Balay   int         ierr,i,j,row,col;
568a30f8f8cSSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
569a30f8f8cSSatish Balay   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
570a30f8f8cSSatish Balay   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
571a30f8f8cSSatish Balay   PetscReal   tmp;
572a30f8f8cSSatish Balay   MatScalar   **HD = baij->hd,value;
573a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
574a30f8f8cSSatish Balay   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
575a30f8f8cSSatish Balay #endif
576a30f8f8cSSatish Balay 
577a30f8f8cSSatish Balay   PetscFunctionBegin;
578a30f8f8cSSatish Balay 
579a30f8f8cSSatish Balay   for (i=0; i<m; i++) {
580a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
581a30f8f8cSSatish Balay     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
582a30f8f8cSSatish Balay     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
583a30f8f8cSSatish Balay #endif
584a30f8f8cSSatish Balay       row = im[i];
585a30f8f8cSSatish Balay     if (row >= rstart_orig && row < rend_orig) {
586a30f8f8cSSatish Balay       for (j=0; j<n; j++) {
587a30f8f8cSSatish Balay         col = in[j];
588a30f8f8cSSatish Balay         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
589a30f8f8cSSatish Balay         /* Look up into the Hash Table */
590a30f8f8cSSatish Balay         key = (row/bs)*Nbs+(col/bs)+1;
591a30f8f8cSSatish Balay         h1  = HASH(size,key,tmp);
592a30f8f8cSSatish Balay 
593a30f8f8cSSatish Balay 
594a30f8f8cSSatish Balay         idx = h1;
595a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
596a30f8f8cSSatish Balay         insert_ct++;
597a30f8f8cSSatish Balay         total_ct++;
598a30f8f8cSSatish Balay         if (HT[idx] != key) {
599a30f8f8cSSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
600a30f8f8cSSatish Balay           if (idx == size) {
601a30f8f8cSSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
602a30f8f8cSSatish Balay             if (idx == h1) {
603a30f8f8cSSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
604a30f8f8cSSatish Balay             }
605a30f8f8cSSatish Balay           }
606a30f8f8cSSatish Balay         }
607a30f8f8cSSatish Balay #else
608a30f8f8cSSatish Balay         if (HT[idx] != key) {
609a30f8f8cSSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
610a30f8f8cSSatish Balay           if (idx == size) {
611a30f8f8cSSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
612a30f8f8cSSatish Balay             if (idx == h1) {
613a30f8f8cSSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
614a30f8f8cSSatish Balay             }
615a30f8f8cSSatish Balay           }
616a30f8f8cSSatish Balay         }
617a30f8f8cSSatish Balay #endif
618a30f8f8cSSatish Balay         /* A HASH table entry is found, so insert the values at the correct address */
619a30f8f8cSSatish Balay         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
620a30f8f8cSSatish Balay         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
621a30f8f8cSSatish Balay       }
622a30f8f8cSSatish Balay     } else {
623a30f8f8cSSatish Balay       if (!baij->donotstash) {
624a30f8f8cSSatish Balay         if (roworiented) {
625a30f8f8cSSatish Balay           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
626a30f8f8cSSatish Balay         } else {
627a30f8f8cSSatish Balay           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
628a30f8f8cSSatish Balay         }
629a30f8f8cSSatish Balay       }
630a30f8f8cSSatish Balay     }
631a30f8f8cSSatish Balay   }
632a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
633a30f8f8cSSatish Balay   baij->ht_total_ct = total_ct;
634a30f8f8cSSatish Balay   baij->ht_insert_ct = insert_ct;
635a30f8f8cSSatish Balay #endif
636a30f8f8cSSatish Balay   PetscFunctionReturn(0);
637a30f8f8cSSatish Balay }
638a30f8f8cSSatish Balay 
639a30f8f8cSSatish Balay #undef __FUNC__
640a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPISBAIJ_HT_MatScalar"
641a30f8f8cSSatish Balay int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
642a30f8f8cSSatish Balay {
643a30f8f8cSSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
644a30f8f8cSSatish Balay   int         ierr,i,j,ii,jj,row,col;
645a30f8f8cSSatish Balay   int         roworiented = baij->roworiented,rstart=baij->rstart ;
646a30f8f8cSSatish Balay   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
647a30f8f8cSSatish Balay   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
648a30f8f8cSSatish Balay   PetscReal   tmp;
649a30f8f8cSSatish Balay   MatScalar   **HD = baij->hd,*baij_a;
650a30f8f8cSSatish Balay   MatScalar   *v_t,*value;
651a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
652a30f8f8cSSatish Balay   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
653a30f8f8cSSatish Balay #endif
654a30f8f8cSSatish Balay 
655a30f8f8cSSatish Balay   PetscFunctionBegin;
656a30f8f8cSSatish Balay 
657a30f8f8cSSatish Balay   if (roworiented) {
658a30f8f8cSSatish Balay     stepval = (n-1)*bs;
659a30f8f8cSSatish Balay   } else {
660a30f8f8cSSatish Balay     stepval = (m-1)*bs;
661a30f8f8cSSatish Balay   }
662a30f8f8cSSatish Balay   for (i=0; i<m; i++) {
663a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
664a30f8f8cSSatish Balay     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
665a30f8f8cSSatish Balay     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
666a30f8f8cSSatish Balay #endif
667a30f8f8cSSatish Balay     row   = im[i];
668a30f8f8cSSatish Balay     v_t   = v + i*bs2;
669a30f8f8cSSatish Balay     if (row >= rstart && row < rend) {
670a30f8f8cSSatish Balay       for (j=0; j<n; j++) {
671a30f8f8cSSatish Balay         col = in[j];
672a30f8f8cSSatish Balay 
673a30f8f8cSSatish Balay         /* Look up into the Hash Table */
674a30f8f8cSSatish Balay         key = row*Nbs+col+1;
675a30f8f8cSSatish Balay         h1  = HASH(size,key,tmp);
676a30f8f8cSSatish Balay 
677a30f8f8cSSatish Balay         idx = h1;
678a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
679a30f8f8cSSatish Balay         total_ct++;
680a30f8f8cSSatish Balay         insert_ct++;
681a30f8f8cSSatish Balay        if (HT[idx] != key) {
682a30f8f8cSSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
683a30f8f8cSSatish Balay           if (idx == size) {
684a30f8f8cSSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
685a30f8f8cSSatish Balay             if (idx == h1) {
686a30f8f8cSSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
687a30f8f8cSSatish Balay             }
688a30f8f8cSSatish Balay           }
689a30f8f8cSSatish Balay         }
690a30f8f8cSSatish Balay #else
691a30f8f8cSSatish Balay         if (HT[idx] != key) {
692a30f8f8cSSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
693a30f8f8cSSatish Balay           if (idx == size) {
694a30f8f8cSSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
695a30f8f8cSSatish Balay             if (idx == h1) {
696a30f8f8cSSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
697a30f8f8cSSatish Balay             }
698a30f8f8cSSatish Balay           }
699a30f8f8cSSatish Balay         }
700a30f8f8cSSatish Balay #endif
701a30f8f8cSSatish Balay         baij_a = HD[idx];
702a30f8f8cSSatish Balay         if (roworiented) {
703a30f8f8cSSatish Balay           /*value = v + i*(stepval+bs)*bs + j*bs;*/
704a30f8f8cSSatish Balay           /* value = v + (i*(stepval+bs)+j)*bs; */
705a30f8f8cSSatish Balay           value = v_t;
706a30f8f8cSSatish Balay           v_t  += bs;
707a30f8f8cSSatish Balay           if (addv == ADD_VALUES) {
708a30f8f8cSSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval) {
709a30f8f8cSSatish Balay               for (jj=ii; jj<bs2; jj+=bs) {
710a30f8f8cSSatish Balay                 baij_a[jj]  += *value++;
711a30f8f8cSSatish Balay               }
712a30f8f8cSSatish Balay             }
713a30f8f8cSSatish Balay           } else {
714a30f8f8cSSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval) {
715a30f8f8cSSatish Balay               for (jj=ii; jj<bs2; jj+=bs) {
716a30f8f8cSSatish Balay                 baij_a[jj]  = *value++;
717a30f8f8cSSatish Balay               }
718a30f8f8cSSatish Balay             }
719a30f8f8cSSatish Balay           }
720a30f8f8cSSatish Balay         } else {
721a30f8f8cSSatish Balay           value = v + j*(stepval+bs)*bs + i*bs;
722a30f8f8cSSatish Balay           if (addv == ADD_VALUES) {
723a30f8f8cSSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
724a30f8f8cSSatish Balay               for (jj=0; jj<bs; jj++) {
725a30f8f8cSSatish Balay                 baij_a[jj]  += *value++;
726a30f8f8cSSatish Balay               }
727a30f8f8cSSatish Balay             }
728a30f8f8cSSatish Balay           } else {
729a30f8f8cSSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
730a30f8f8cSSatish Balay               for (jj=0; jj<bs; jj++) {
731a30f8f8cSSatish Balay                 baij_a[jj]  = *value++;
732a30f8f8cSSatish Balay               }
733a30f8f8cSSatish Balay             }
734a30f8f8cSSatish Balay           }
735a30f8f8cSSatish Balay         }
736a30f8f8cSSatish Balay       }
737a30f8f8cSSatish Balay     } else {
738a30f8f8cSSatish Balay       if (!baij->donotstash) {
739a30f8f8cSSatish Balay         if (roworiented) {
740a30f8f8cSSatish Balay           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
741a30f8f8cSSatish Balay         } else {
742a30f8f8cSSatish Balay           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
743a30f8f8cSSatish Balay         }
744a30f8f8cSSatish Balay       }
745a30f8f8cSSatish Balay     }
746a30f8f8cSSatish Balay   }
747a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
748a30f8f8cSSatish Balay   baij->ht_total_ct = total_ct;
749a30f8f8cSSatish Balay   baij->ht_insert_ct = insert_ct;
750a30f8f8cSSatish Balay #endif
751a30f8f8cSSatish Balay   PetscFunctionReturn(0);
752a30f8f8cSSatish Balay }
753a30f8f8cSSatish Balay 
754a30f8f8cSSatish Balay #undef __FUNC__
755a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatGetValues_MPISBAIJ"
756a30f8f8cSSatish Balay int MatGetValues_MPISBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
757a30f8f8cSSatish Balay {
758a30f8f8cSSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
759a30f8f8cSSatish Balay   int        bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
760a30f8f8cSSatish Balay   int        bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
761a30f8f8cSSatish Balay 
762a30f8f8cSSatish Balay   PetscFunctionBegin;
763a30f8f8cSSatish Balay   for (i=0; i<m; i++) {
764a30f8f8cSSatish Balay     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
765a30f8f8cSSatish Balay     if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
766a30f8f8cSSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
767a30f8f8cSSatish Balay       row = idxm[i] - bsrstart;
768a30f8f8cSSatish Balay       for (j=0; j<n; j++) {
769a30f8f8cSSatish Balay         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
770a30f8f8cSSatish Balay         if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
771a30f8f8cSSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
772a30f8f8cSSatish Balay           col = idxn[j] - bscstart;
773c8407628SSatish Balay           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
774a30f8f8cSSatish Balay         } else {
775a30f8f8cSSatish Balay           if (!baij->colmap) {
776a30f8f8cSSatish Balay             ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr);
777a30f8f8cSSatish Balay           }
778a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE)
779a30f8f8cSSatish Balay           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
780a30f8f8cSSatish Balay           data --;
781a30f8f8cSSatish Balay #else
782a30f8f8cSSatish Balay           data = baij->colmap[idxn[j]/bs]-1;
783a30f8f8cSSatish Balay #endif
784a30f8f8cSSatish Balay           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
785a30f8f8cSSatish Balay           else {
786a30f8f8cSSatish Balay             col  = data + idxn[j]%bs;
787c8407628SSatish Balay             ierr = MatGetValues_SeqSBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
788a30f8f8cSSatish Balay           }
789a30f8f8cSSatish Balay         }
790a30f8f8cSSatish Balay       }
791a30f8f8cSSatish Balay     } else {
792a30f8f8cSSatish Balay       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
793a30f8f8cSSatish Balay     }
794a30f8f8cSSatish Balay   }
795a30f8f8cSSatish Balay  PetscFunctionReturn(0);
796a30f8f8cSSatish Balay }
797a30f8f8cSSatish Balay 
798a30f8f8cSSatish Balay #undef __FUNC__
799a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatNorm_MPISBAIJ"
800a30f8f8cSSatish Balay int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
801a30f8f8cSSatish Balay {
802a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
803bc0ba53dSHong Zhang   /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */
804bc0ba53dSHong Zhang   /* Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ*)baij->B->data; */
805a30f8f8cSSatish Balay   int        ierr;
806a30f8f8cSSatish Balay   PetscReal  sum[2],*lnorm2;
807a30f8f8cSSatish Balay 
808a30f8f8cSSatish Balay   PetscFunctionBegin;
809a30f8f8cSSatish Balay   if (baij->size == 1) {
810a30f8f8cSSatish Balay     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
811a30f8f8cSSatish Balay   } else {
812a30f8f8cSSatish Balay     if (type == NORM_FROBENIUS) {
813ee237329SHong Zhang       lnorm2 = (double*)PetscMalloc(2*sizeof(double));CHKPTRQ(lnorm2);
814a30f8f8cSSatish Balay       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
815a30f8f8cSSatish Balay       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
816a30f8f8cSSatish Balay       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
817a30f8f8cSSatish Balay       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
818a30f8f8cSSatish Balay       /*
819a30f8f8cSSatish Balay       ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
820a30f8f8cSSatish Balay       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]);
821a30f8f8cSSatish Balay       */
822a30f8f8cSSatish Balay       ierr = MPI_Allreduce(lnorm2,&sum,2,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
823a30f8f8cSSatish Balay       /*
824a30f8f8cSSatish Balay       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]);
825a30f8f8cSSatish Balay       PetscSynchronizedFlush(PETSC_COMM_WORLD); */
826a30f8f8cSSatish Balay 
827a30f8f8cSSatish Balay       *norm = sqrt(sum[0] + 2*sum[1]);
828a30f8f8cSSatish Balay       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
829a30f8f8cSSatish Balay     } else {
830a30f8f8cSSatish Balay       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
831a30f8f8cSSatish Balay     }
832a30f8f8cSSatish Balay   }
833a30f8f8cSSatish Balay   PetscFunctionReturn(0);
834a30f8f8cSSatish Balay }
835a30f8f8cSSatish Balay 
836a30f8f8cSSatish Balay /*
837a30f8f8cSSatish Balay   Creates the hash table, and sets the table
838a30f8f8cSSatish Balay   This table is created only once.
839a30f8f8cSSatish Balay   If new entried need to be added to the matrix
840a30f8f8cSSatish Balay   then the hash table has to be destroyed and
841a30f8f8cSSatish Balay   recreated.
842a30f8f8cSSatish Balay */
843a30f8f8cSSatish Balay #undef __FUNC__
844a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatCreateHashTable_MPISBAIJ_Private"
845a30f8f8cSSatish Balay int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor)
846a30f8f8cSSatish Balay {
847a30f8f8cSSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
848a30f8f8cSSatish Balay   Mat         A = baij->A,B=baij->B;
849a30f8f8cSSatish Balay   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
850a30f8f8cSSatish Balay   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
851a30f8f8cSSatish Balay   int         size,bs2=baij->bs2,rstart=baij->rstart,ierr;
852a30f8f8cSSatish Balay   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
853a30f8f8cSSatish Balay   int         *HT,key;
854a30f8f8cSSatish Balay   MatScalar   **HD;
855a30f8f8cSSatish Balay   PetscReal   tmp;
856a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
857a30f8f8cSSatish Balay   int         ct=0,max=0;
858a30f8f8cSSatish Balay #endif
859a30f8f8cSSatish Balay 
860a30f8f8cSSatish Balay   PetscFunctionBegin;
861a30f8f8cSSatish Balay   baij->ht_size=(int)(factor*nz);
862a30f8f8cSSatish Balay   size = baij->ht_size;
863a30f8f8cSSatish Balay 
864a30f8f8cSSatish Balay   if (baij->ht) {
865a30f8f8cSSatish Balay     PetscFunctionReturn(0);
866a30f8f8cSSatish Balay   }
867a30f8f8cSSatish Balay 
868a30f8f8cSSatish Balay   /* Allocate Memory for Hash Table */
869a30f8f8cSSatish Balay   baij->hd = (MatScalar**)PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1);CHKPTRQ(baij->hd);
870a30f8f8cSSatish Balay   baij->ht = (int*)(baij->hd + size);
871a30f8f8cSSatish Balay   HD = baij->hd;
872a30f8f8cSSatish Balay   HT = baij->ht;
873a30f8f8cSSatish Balay 
874a30f8f8cSSatish Balay 
875a30f8f8cSSatish Balay   ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr);
876a30f8f8cSSatish Balay 
877a30f8f8cSSatish Balay 
878a30f8f8cSSatish Balay   /* Loop Over A */
879a30f8f8cSSatish Balay   for (i=0; i<a->mbs; i++) {
880a30f8f8cSSatish Balay     for (j=ai[i]; j<ai[i+1]; j++) {
881a30f8f8cSSatish Balay       row = i+rstart;
882a30f8f8cSSatish Balay       col = aj[j]+cstart;
883a30f8f8cSSatish Balay 
884a30f8f8cSSatish Balay       key = row*Nbs + col + 1;
885a30f8f8cSSatish Balay       h1  = HASH(size,key,tmp);
886a30f8f8cSSatish Balay       for (k=0; k<size; k++){
887a30f8f8cSSatish Balay         if (HT[(h1+k)%size] == 0.0) {
888a30f8f8cSSatish Balay           HT[(h1+k)%size] = key;
889a30f8f8cSSatish Balay           HD[(h1+k)%size] = a->a + j*bs2;
890a30f8f8cSSatish Balay           break;
891a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
892a30f8f8cSSatish Balay         } else {
893a30f8f8cSSatish Balay           ct++;
894a30f8f8cSSatish Balay #endif
895a30f8f8cSSatish Balay         }
896a30f8f8cSSatish Balay       }
897a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
898a30f8f8cSSatish Balay       if (k> max) max = k;
899a30f8f8cSSatish Balay #endif
900a30f8f8cSSatish Balay     }
901a30f8f8cSSatish Balay   }
902a30f8f8cSSatish Balay   /* Loop Over B */
903a30f8f8cSSatish Balay   for (i=0; i<b->mbs; i++) {
904a30f8f8cSSatish Balay     for (j=bi[i]; j<bi[i+1]; j++) {
905a30f8f8cSSatish Balay       row = i+rstart;
906a30f8f8cSSatish Balay       col = garray[bj[j]];
907a30f8f8cSSatish Balay       key = row*Nbs + col + 1;
908a30f8f8cSSatish Balay       h1  = HASH(size,key,tmp);
909a30f8f8cSSatish Balay       for (k=0; k<size; k++){
910a30f8f8cSSatish Balay         if (HT[(h1+k)%size] == 0.0) {
911a30f8f8cSSatish Balay           HT[(h1+k)%size] = key;
912a30f8f8cSSatish Balay           HD[(h1+k)%size] = b->a + j*bs2;
913a30f8f8cSSatish Balay           break;
914a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
915a30f8f8cSSatish Balay         } else {
916a30f8f8cSSatish Balay           ct++;
917a30f8f8cSSatish Balay #endif
918a30f8f8cSSatish Balay         }
919a30f8f8cSSatish Balay       }
920a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
921a30f8f8cSSatish Balay       if (k> max) max = k;
922a30f8f8cSSatish Balay #endif
923a30f8f8cSSatish Balay     }
924a30f8f8cSSatish Balay   }
925a30f8f8cSSatish Balay 
926a30f8f8cSSatish Balay   /* Print Summary */
927a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
928a30f8f8cSSatish Balay   for (i=0,j=0; i<size; i++) {
929a30f8f8cSSatish Balay     if (HT[i]) {j++;}
930a30f8f8cSSatish Balay   }
931a30f8f8cSSatish Balay   PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max);
932a30f8f8cSSatish Balay #endif
933a30f8f8cSSatish Balay   PetscFunctionReturn(0);
934a30f8f8cSSatish Balay }
935a30f8f8cSSatish Balay 
936a30f8f8cSSatish Balay #undef __FUNC__
937a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatAssemblyBegin_MPISBAIJ"
938a30f8f8cSSatish Balay int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
939a30f8f8cSSatish Balay {
940a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
941a30f8f8cSSatish Balay   int         ierr,nstash,reallocs;
942a30f8f8cSSatish Balay   InsertMode  addv;
943a30f8f8cSSatish Balay 
944a30f8f8cSSatish Balay   PetscFunctionBegin;
945a30f8f8cSSatish Balay   if (baij->donotstash) {
946a30f8f8cSSatish Balay     PetscFunctionReturn(0);
947a30f8f8cSSatish Balay   }
948a30f8f8cSSatish Balay 
949a30f8f8cSSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
950a30f8f8cSSatish Balay   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
951a30f8f8cSSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
952a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
953a30f8f8cSSatish Balay   }
954a30f8f8cSSatish Balay   mat->insertmode = addv; /* in case this processor had no cache */
955a30f8f8cSSatish Balay 
956a30f8f8cSSatish Balay   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
957a30f8f8cSSatish Balay   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
958a30f8f8cSSatish Balay   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
959a30f8f8cSSatish Balay   PLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
960a30f8f8cSSatish Balay   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
961a30f8f8cSSatish Balay   PLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
962a30f8f8cSSatish Balay   PetscFunctionReturn(0);
963a30f8f8cSSatish Balay }
964a30f8f8cSSatish Balay 
965a30f8f8cSSatish Balay #undef __FUNC__
966a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatAssemblyEnd_MPISBAIJ"
967a30f8f8cSSatish Balay int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
968a30f8f8cSSatish Balay {
969a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data;
970a30f8f8cSSatish Balay   Mat_SeqSBAIJ  *a=(Mat_SeqSBAIJ*)baij->A->data;
971a30f8f8cSSatish Balay   Mat_SeqBAIJ  *b=(Mat_SeqBAIJ*)baij->B->data;
972a30f8f8cSSatish Balay   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
973a30f8f8cSSatish Balay   int         *row,*col,other_disassembled;
974a30f8f8cSSatish Balay   PetscTruth  r1,r2,r3;
975a30f8f8cSSatish Balay   MatScalar   *val;
976a30f8f8cSSatish Balay   InsertMode  addv = mat->insertmode;
977a30f8f8cSSatish Balay   int         rank;
978a30f8f8cSSatish Balay 
979a30f8f8cSSatish Balay   PetscFunctionBegin;
980a30f8f8cSSatish Balay   /* remove 2 line below later */
981a30f8f8cSSatish Balay   ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr);
982a30f8f8cSSatish Balay 
983a30f8f8cSSatish Balay   if (!baij->donotstash) {
984a30f8f8cSSatish Balay     while (1) {
985a30f8f8cSSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
986a30f8f8cSSatish Balay       /*
987a30f8f8cSSatish Balay       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg);
988a30f8f8cSSatish Balay       PetscSynchronizedFlush(PETSC_COMM_WORLD);
989a30f8f8cSSatish Balay       */
990a30f8f8cSSatish Balay       if (!flg) break;
991a30f8f8cSSatish Balay 
992a30f8f8cSSatish Balay       for (i=0; i<n;) {
993a30f8f8cSSatish Balay         /* Now identify the consecutive vals belonging to the same row */
994a30f8f8cSSatish Balay         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
995a30f8f8cSSatish Balay         if (j < n) ncols = j-i;
996a30f8f8cSSatish Balay         else       ncols = n-i;
997a30f8f8cSSatish Balay         /* Now assemble all these values with a single function call */
998a30f8f8cSSatish Balay         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
999a30f8f8cSSatish Balay         i = j;
1000a30f8f8cSSatish Balay       }
1001a30f8f8cSSatish Balay     }
1002a30f8f8cSSatish Balay     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
1003a30f8f8cSSatish Balay     /* Now process the block-stash. Since the values are stashed column-oriented,
1004a30f8f8cSSatish Balay        set the roworiented flag to column oriented, and after MatSetValues()
1005a30f8f8cSSatish Balay        restore the original flags */
1006a30f8f8cSSatish Balay     r1 = baij->roworiented;
1007a30f8f8cSSatish Balay     r2 = a->roworiented;
1008a30f8f8cSSatish Balay     r3 = b->roworiented;
1009a30f8f8cSSatish Balay     baij->roworiented = PETSC_FALSE;
1010a30f8f8cSSatish Balay     a->roworiented    = PETSC_FALSE;
1011a30f8f8cSSatish Balay     b->roworiented    = PETSC_FALSE;
1012a30f8f8cSSatish Balay     while (1) {
1013a30f8f8cSSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1014a30f8f8cSSatish Balay       if (!flg) break;
1015a30f8f8cSSatish Balay 
1016a30f8f8cSSatish Balay       for (i=0; i<n;) {
1017a30f8f8cSSatish Balay         /* Now identify the consecutive vals belonging to the same row */
1018a30f8f8cSSatish Balay         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1019a30f8f8cSSatish Balay         if (j < n) ncols = j-i;
1020a30f8f8cSSatish Balay         else       ncols = n-i;
1021a30f8f8cSSatish Balay         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
1022a30f8f8cSSatish Balay         i = j;
1023a30f8f8cSSatish Balay       }
1024a30f8f8cSSatish Balay     }
1025a30f8f8cSSatish Balay     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
1026a30f8f8cSSatish Balay     baij->roworiented = r1;
1027a30f8f8cSSatish Balay     a->roworiented    = r2;
1028a30f8f8cSSatish Balay     b->roworiented    = r3;
1029a30f8f8cSSatish Balay   }
1030a30f8f8cSSatish Balay 
1031a30f8f8cSSatish Balay   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
1032a30f8f8cSSatish Balay   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
1033a30f8f8cSSatish Balay 
1034a30f8f8cSSatish Balay   /* determine if any processor has disassembled, if so we must
1035a30f8f8cSSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
1036a30f8f8cSSatish Balay   /*
1037a30f8f8cSSatish Balay      if nonzero structure of submatrix B cannot change then we know that
1038a30f8f8cSSatish Balay      no processor disassembled thus we can skip this stuff
1039a30f8f8cSSatish Balay   */
1040a30f8f8cSSatish Balay   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
1041a30f8f8cSSatish Balay     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
1042a30f8f8cSSatish Balay     if (mat->was_assembled && !other_disassembled) {
1043c8407628SSatish Balay       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
1044a30f8f8cSSatish Balay     }
1045a30f8f8cSSatish Balay   }
1046a30f8f8cSSatish Balay 
1047a30f8f8cSSatish Balay   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1048a30f8f8cSSatish Balay     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr);
1049a30f8f8cSSatish Balay   }
1050a30f8f8cSSatish Balay   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1051a30f8f8cSSatish Balay   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1052a30f8f8cSSatish Balay 
1053a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
1054a30f8f8cSSatish Balay   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1055a30f8f8cSSatish Balay     PLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((double)baij->ht_total_ct)/baij->ht_insert_ct);
1056a30f8f8cSSatish Balay     baij->ht_total_ct  = 0;
1057a30f8f8cSSatish Balay     baij->ht_insert_ct = 0;
1058a30f8f8cSSatish Balay   }
1059a30f8f8cSSatish Balay #endif
1060a30f8f8cSSatish Balay   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1061c8407628SSatish Balay     ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1062a30f8f8cSSatish Balay     mat->ops->setvalues        = MatSetValues_MPISBAIJ_HT;
1063a30f8f8cSSatish Balay     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT;
1064a30f8f8cSSatish Balay   }
1065a30f8f8cSSatish Balay 
1066a30f8f8cSSatish Balay   if (baij->rowvalues) {
1067a30f8f8cSSatish Balay     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1068a30f8f8cSSatish Balay     baij->rowvalues = 0;
1069a30f8f8cSSatish Balay   }
1070a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1071a30f8f8cSSatish Balay }
1072a30f8f8cSSatish Balay 
1073a30f8f8cSSatish Balay #undef __FUNC__
1074a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatView_MPISBAIJ_ASCIIorDraworSocket"
1075a30f8f8cSSatish Balay static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
1076a30f8f8cSSatish Balay {
1077a30f8f8cSSatish Balay   Mat_MPISBAIJ  *baij = (Mat_MPISBAIJ*)mat->data;
1078a30f8f8cSSatish Balay   int          ierr,format,bs = baij->bs,size = baij->size,rank = baij->rank;
1079a30f8f8cSSatish Balay   PetscTruth   isascii,isdraw;
108065d70643SHong Zhang   Viewer       sviewer;
1081a30f8f8cSSatish Balay 
1082a30f8f8cSSatish Balay   PetscFunctionBegin;
1083a30f8f8cSSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
1084a30f8f8cSSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
1085a30f8f8cSSatish Balay   if (isascii) {
1086a30f8f8cSSatish Balay     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1087a30f8f8cSSatish Balay     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
1088a30f8f8cSSatish Balay       MatInfo info;
1089a30f8f8cSSatish Balay       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
1090a30f8f8cSSatish Balay       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1091a30f8f8cSSatish Balay       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
1092a30f8f8cSSatish Balay               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
1093a30f8f8cSSatish Balay               baij->bs,(int)info.memory);CHKERRQ(ierr);
1094a30f8f8cSSatish Balay       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1095a30f8f8cSSatish Balay       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1096a30f8f8cSSatish Balay       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1097a30f8f8cSSatish Balay       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1098a30f8f8cSSatish Balay       ierr = ViewerFlush(viewer);CHKERRQ(ierr);
1099a30f8f8cSSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1100a30f8f8cSSatish Balay       PetscFunctionReturn(0);
1101a30f8f8cSSatish Balay     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
1102a30f8f8cSSatish Balay       ierr = ViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
1103a30f8f8cSSatish Balay       PetscFunctionReturn(0);
1104a30f8f8cSSatish Balay     }
1105a30f8f8cSSatish Balay   }
1106a30f8f8cSSatish Balay 
1107a30f8f8cSSatish Balay   if (isdraw) {
1108a30f8f8cSSatish Balay     Draw       draw;
1109a30f8f8cSSatish Balay     PetscTruth isnull;
1110a30f8f8cSSatish Balay     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1111a30f8f8cSSatish Balay     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1112a30f8f8cSSatish Balay   }
1113a30f8f8cSSatish Balay 
1114a30f8f8cSSatish Balay   if (size == 1) {
1115a30f8f8cSSatish Balay     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1116a30f8f8cSSatish Balay   } else {
1117a30f8f8cSSatish Balay     /* assemble the entire matrix onto first processor. */
1118a30f8f8cSSatish Balay     Mat         A;
111965d70643SHong Zhang     Mat_SeqSBAIJ *Aloc;
112065d70643SHong Zhang     Mat_SeqBAIJ *Bloc;
1121a30f8f8cSSatish Balay     int         M = baij->M,N = baij->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1122a30f8f8cSSatish Balay     MatScalar   *a;
1123a30f8f8cSSatish Balay 
1124a30f8f8cSSatish Balay     if (!rank) {
1125bc0ba53dSHong Zhang       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1126a30f8f8cSSatish Balay     } else {
1127f65c83cfSHong Zhang       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1128a30f8f8cSSatish Balay     }
1129a30f8f8cSSatish Balay     PLogObjectParent(mat,A);
1130a30f8f8cSSatish Balay 
1131a30f8f8cSSatish Balay     /* copy over the A part */
113265d70643SHong Zhang     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
1133a30f8f8cSSatish Balay     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
1134a30f8f8cSSatish Balay     rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
1135a30f8f8cSSatish Balay 
1136a30f8f8cSSatish Balay     for (i=0; i<mbs; i++) {
1137a30f8f8cSSatish Balay       rvals[0] = bs*(baij->rstart + i);
1138a30f8f8cSSatish Balay       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1139a30f8f8cSSatish Balay       for (j=ai[i]; j<ai[i+1]; j++) {
1140a30f8f8cSSatish Balay         col = (baij->cstart+aj[j])*bs;
1141a30f8f8cSSatish Balay         for (k=0; k<bs; k++) {
1142a30f8f8cSSatish Balay           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1143a30f8f8cSSatish Balay           col++; a += bs;
1144a30f8f8cSSatish Balay         }
1145a30f8f8cSSatish Balay       }
1146a30f8f8cSSatish Balay     }
1147a30f8f8cSSatish Balay     /* copy over the B part */
114865d70643SHong Zhang     Bloc = (Mat_SeqBAIJ*)baij->B->data;
114965d70643SHong Zhang     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
1150a30f8f8cSSatish Balay     for (i=0; i<mbs; i++) {
1151a30f8f8cSSatish Balay       rvals[0] = bs*(baij->rstart + i);
1152a30f8f8cSSatish Balay       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1153a30f8f8cSSatish Balay       for (j=ai[i]; j<ai[i+1]; j++) {
1154a30f8f8cSSatish Balay         col = baij->garray[aj[j]]*bs;
1155a30f8f8cSSatish Balay         for (k=0; k<bs; k++) {
1156a30f8f8cSSatish Balay           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1157a30f8f8cSSatish Balay           col++; a += bs;
1158a30f8f8cSSatish Balay         }
1159a30f8f8cSSatish Balay       }
1160a30f8f8cSSatish Balay     }
1161a30f8f8cSSatish Balay     ierr = PetscFree(rvals);CHKERRQ(ierr);
1162a30f8f8cSSatish Balay     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1163a30f8f8cSSatish Balay     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1164a30f8f8cSSatish Balay     /*
1165a30f8f8cSSatish Balay        Everyone has to call to draw the matrix since the graphics waits are
1166a30f8f8cSSatish Balay        synchronized across all processors that share the Draw object
1167a30f8f8cSSatish Balay     */
1168a30f8f8cSSatish Balay     ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
116965d70643SHong Zhang     if (!rank) {
1170a30f8f8cSSatish Balay       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1171a30f8f8cSSatish Balay     }
117265d70643SHong Zhang     ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1173a30f8f8cSSatish Balay     ierr = MatDestroy(A);CHKERRQ(ierr);
1174a30f8f8cSSatish Balay   }
1175a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1176a30f8f8cSSatish Balay }
1177a30f8f8cSSatish Balay 
1178a30f8f8cSSatish Balay #undef __FUNC__
1179a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatView_MPISBAIJ"
1180a30f8f8cSSatish Balay int MatView_MPISBAIJ(Mat mat,Viewer viewer)
1181a30f8f8cSSatish Balay {
1182a30f8f8cSSatish Balay   int        ierr;
1183a30f8f8cSSatish Balay   PetscTruth isascii,isdraw,issocket,isbinary;
1184a30f8f8cSSatish Balay 
1185a30f8f8cSSatish Balay   PetscFunctionBegin;
1186a30f8f8cSSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
1187a30f8f8cSSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
1188a30f8f8cSSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr);
1189a30f8f8cSSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
1190a30f8f8cSSatish Balay   if (isascii || isdraw || issocket || isbinary) {
1191a30f8f8cSSatish Balay     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1192a30f8f8cSSatish Balay   } else {
1193a30f8f8cSSatish Balay     SETERRQ1(1,1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
1194a30f8f8cSSatish Balay   }
1195a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1196a30f8f8cSSatish Balay }
1197a30f8f8cSSatish Balay 
1198a30f8f8cSSatish Balay #undef __FUNC__
1199a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatDestroy_MPISBAIJ"
1200a30f8f8cSSatish Balay int MatDestroy_MPISBAIJ(Mat mat)
1201a30f8f8cSSatish Balay {
1202a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1203a30f8f8cSSatish Balay   int         ierr;
1204a30f8f8cSSatish Balay 
1205a30f8f8cSSatish Balay   PetscFunctionBegin;
1206a30f8f8cSSatish Balay 
1207a30f8f8cSSatish Balay   if (mat->mapping) {
1208a30f8f8cSSatish Balay     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
1209a30f8f8cSSatish Balay   }
1210a30f8f8cSSatish Balay   if (mat->bmapping) {
1211a30f8f8cSSatish Balay     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
1212a30f8f8cSSatish Balay   }
1213a30f8f8cSSatish Balay   if (mat->rmap) {
1214a30f8f8cSSatish Balay     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
1215a30f8f8cSSatish Balay   }
1216a30f8f8cSSatish Balay   if (mat->cmap) {
1217a30f8f8cSSatish Balay     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
1218a30f8f8cSSatish Balay   }
1219a30f8f8cSSatish Balay #if defined(PETSC_USE_LOG)
1220a30f8f8cSSatish Balay   PLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",baij->M,baij->N);
1221a30f8f8cSSatish Balay #endif
1222a30f8f8cSSatish Balay 
1223a30f8f8cSSatish Balay   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1224a30f8f8cSSatish Balay   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1225a30f8f8cSSatish Balay 
1226a30f8f8cSSatish Balay   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1227a30f8f8cSSatish Balay   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1228a30f8f8cSSatish Balay   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1229a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE)
1230a30f8f8cSSatish Balay   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1231a30f8f8cSSatish Balay #else
1232a30f8f8cSSatish Balay   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1233a30f8f8cSSatish Balay #endif
1234a30f8f8cSSatish Balay   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1235a30f8f8cSSatish Balay   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1236a30f8f8cSSatish Balay   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1237a30f8f8cSSatish Balay   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1238a30f8f8cSSatish Balay   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1239a30f8f8cSSatish Balay   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
1240a30f8f8cSSatish Balay #if defined(PETSC_USE_MAT_SINGLE)
1241a30f8f8cSSatish Balay   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
1242a30f8f8cSSatish Balay #endif
1243a30f8f8cSSatish Balay   ierr = PetscFree(baij);CHKERRQ(ierr);
1244a30f8f8cSSatish Balay   PLogObjectDestroy(mat);
1245a30f8f8cSSatish Balay   PetscHeaderDestroy(mat);
1246a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1247a30f8f8cSSatish Balay }
1248a30f8f8cSSatish Balay 
1249a30f8f8cSSatish Balay #undef __FUNC__
1250a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatMult_MPISBAIJ"
1251a30f8f8cSSatish Balay int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
1252a30f8f8cSSatish Balay {
1253a30f8f8cSSatish Balay   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1254a30f8f8cSSatish Balay   int         ierr,nt;
1255a30f8f8cSSatish Balay 
1256a30f8f8cSSatish Balay   PetscFunctionBegin;
1257a30f8f8cSSatish Balay   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1258a30f8f8cSSatish Balay   if (nt != a->n) {
1259a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
1260a30f8f8cSSatish Balay   }
1261a30f8f8cSSatish Balay   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1262a30f8f8cSSatish Balay   if (nt != a->m) {
1263a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy");
1264a30f8f8cSSatish Balay   }
126565d70643SHong Zhang 
1266b941877fSHong Zhang   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1267b941877fSHong Zhang   /* do diagonal part */
1268b941877fSHong Zhang   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1269b941877fSHong Zhang   /* do supperdiagonal part */
1270b941877fSHong Zhang   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1271b941877fSHong Zhang   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1272b941877fSHong Zhang   /* do subdiagonal part */
1273b941877fSHong Zhang   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1274b941877fSHong Zhang   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1275b941877fSHong Zhang   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
127665d70643SHong Zhang 
1277a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1278a30f8f8cSSatish Balay }
1279a30f8f8cSSatish Balay 
1280a30f8f8cSSatish Balay #undef __FUNC__
1281a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatMultAdd_MPISBAIJ"
1282a30f8f8cSSatish Balay int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1283a30f8f8cSSatish Balay {
1284*de8b6608SHong Zhang   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1285*de8b6608SHong Zhang   int        ierr,i,high;
1286*de8b6608SHong Zhang   Scalar     zero = 0.0;
1287a30f8f8cSSatish Balay 
1288a30f8f8cSSatish Balay   PetscFunctionBegin;
1289*de8b6608SHong Zhang   /*
1290*de8b6608SHong Zhang   PetscSynchronizedPrintf(PETSC_COMM_WORLD,"calledMatMultAdd_MPISBAIJ \n");
1291b941877fSHong Zhang   PetscSynchronizedFlush(PETSC_COMM_WORLD);
1292*de8b6608SHong Zhang   */
1293b941877fSHong Zhang 
1294b941877fSHong Zhang   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1295b941877fSHong Zhang   /* do diagonal part */
1296b941877fSHong Zhang   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1297b941877fSHong Zhang   /* do supperdiagonal part */
1298b941877fSHong Zhang   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1299*de8b6608SHong Zhang   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1300*de8b6608SHong Zhang 
1301b941877fSHong Zhang   /* do subdiagonal part */
1302*de8b6608SHong Zhang   ierr = VecGetOwnershipRange(yy,PETSC_NULL,&high); CHKERRA(ierr);
1303*de8b6608SHong Zhang   PetscSynchronizedPrintf(PETSC_COMM_WORLD,"high=%d \n",high);
1304*de8b6608SHong Zhang   PetscSynchronizedFlush(PETSC_COMM_WORLD);
1305*de8b6608SHong Zhang 
1306*de8b6608SHong Zhang   ierr = VecScatterBegin(yy,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1307*de8b6608SHong Zhang   ierr = VecScatterEnd(yy,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1308*de8b6608SHong Zhang   /* set the begining part of lvec zero ? */
1309*de8b6608SHong Zhang 
1310*de8b6608SHong Zhang   /*
1311*de8b6608SHong Zhang   for (i=0; i<high; i++){
1312*de8b6608SHong Zhang     ierr = VecSetValues(a->lvec,1,&i,&zero,INSERT_VALUES);CHKERRA(ierr);
1313*de8b6608SHong Zhang   }
1314*de8b6608SHong Zhang   */
1315*de8b6608SHong Zhang   /* VecView(a->lvec, VIEWER_STDOUT_WORLD);  */
1316*de8b6608SHong Zhang 
1317*de8b6608SHong Zhang   ierr = (*a->B->ops->multtransposeadd)(a->B,xx,a->lvec,a->lvec);CHKERRQ(ierr);
1318b941877fSHong Zhang   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1319b941877fSHong Zhang   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1320b941877fSHong Zhang #ifdef old
1321a30f8f8cSSatish Balay   /* do subdiagonal part */
1322a30f8f8cSSatish Balay   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1323a30f8f8cSSatish Balay   /* send it on its way */
1324a30f8f8cSSatish Balay   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1325a30f8f8cSSatish Balay   /* do local part */
1326a30f8f8cSSatish Balay   ierr = (*a->B->ops->multadd)(a->B,xx,yy,zz);CHKERRQ(ierr);
1327a30f8f8cSSatish Balay   ierr = (*a->A->ops->multadd)(a->A,xx,zz,zz);CHKERRQ(ierr);
1328a30f8f8cSSatish Balay   /* receive remote parts */
1329a30f8f8cSSatish Balay   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1330b941877fSHong Zhang #endif
1331a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1332a30f8f8cSSatish Balay }
1333a30f8f8cSSatish Balay 
1334a30f8f8cSSatish Balay #undef __FUNC__
1335a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatMultTranspose_MPISBAIJ"
1336a30f8f8cSSatish Balay int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
1337a30f8f8cSSatish Balay {
1338a30f8f8cSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1339a30f8f8cSSatish Balay   int         ierr;
1340a30f8f8cSSatish Balay 
1341a30f8f8cSSatish Balay   PetscFunctionBegin;
1342a30f8f8cSSatish Balay   /* do nondiagonal part */
1343a30f8f8cSSatish Balay   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1344a30f8f8cSSatish Balay   /* send it on its way */
1345a30f8f8cSSatish Balay   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1346a30f8f8cSSatish Balay   /* do local part */
1347a30f8f8cSSatish Balay   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1348a30f8f8cSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
1349a30f8f8cSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
1350a30f8f8cSSatish Balay   /* but is not perhaps always true. */
1351a30f8f8cSSatish Balay   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1352a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1353a30f8f8cSSatish Balay }
1354a30f8f8cSSatish Balay 
1355a30f8f8cSSatish Balay #undef __FUNC__
1356a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatMultTransposeAdd_MPISBAIJ"
1357a30f8f8cSSatish Balay int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1358a30f8f8cSSatish Balay {
1359a30f8f8cSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1360a30f8f8cSSatish Balay   int         ierr;
1361a30f8f8cSSatish Balay 
1362a30f8f8cSSatish Balay   PetscFunctionBegin;
1363a30f8f8cSSatish Balay   /* do nondiagonal part */
1364a30f8f8cSSatish Balay   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1365a30f8f8cSSatish Balay   /* send it on its way */
1366a30f8f8cSSatish Balay   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1367a30f8f8cSSatish Balay   /* do local part */
1368a30f8f8cSSatish Balay   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1369a30f8f8cSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
1370a30f8f8cSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
1371a30f8f8cSSatish Balay   /* but is not perhaps always true. */
1372a30f8f8cSSatish Balay   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1373a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1374a30f8f8cSSatish Balay }
1375a30f8f8cSSatish Balay 
1376a30f8f8cSSatish Balay /*
1377a30f8f8cSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
1378a30f8f8cSSatish Balay    diagonal block
1379a30f8f8cSSatish Balay */
1380a30f8f8cSSatish Balay #undef __FUNC__
1381a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonal_MPISBAIJ"
1382a30f8f8cSSatish Balay int MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1383a30f8f8cSSatish Balay {
1384a30f8f8cSSatish Balay   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1385a30f8f8cSSatish Balay   int         ierr;
1386a30f8f8cSSatish Balay 
1387a30f8f8cSSatish Balay   PetscFunctionBegin;
1388a30f8f8cSSatish Balay   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); */
1389a30f8f8cSSatish Balay   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1390a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1391a30f8f8cSSatish Balay }
1392a30f8f8cSSatish Balay 
1393a30f8f8cSSatish Balay #undef __FUNC__
1394a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatScale_MPISBAIJ"
1395a30f8f8cSSatish Balay int MatScale_MPISBAIJ(Scalar *aa,Mat A)
1396a30f8f8cSSatish Balay {
1397a30f8f8cSSatish Balay   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1398a30f8f8cSSatish Balay   int         ierr;
1399a30f8f8cSSatish Balay 
1400a30f8f8cSSatish Balay   PetscFunctionBegin;
1401a30f8f8cSSatish Balay   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1402a30f8f8cSSatish Balay   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1403a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1404a30f8f8cSSatish Balay }
1405a30f8f8cSSatish Balay 
1406a30f8f8cSSatish Balay #undef __FUNC__
1407a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatGetSize_MPISBAIJ"
1408a30f8f8cSSatish Balay int MatGetSize_MPISBAIJ(Mat matin,int *m,int *n)
1409a30f8f8cSSatish Balay {
1410a30f8f8cSSatish Balay   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;
1411a30f8f8cSSatish Balay 
1412a30f8f8cSSatish Balay   PetscFunctionBegin;
1413a30f8f8cSSatish Balay   if (m) *m = mat->M;
1414a30f8f8cSSatish Balay   if (n) *n = mat->N;
1415a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1416a30f8f8cSSatish Balay }
1417a30f8f8cSSatish Balay 
1418a30f8f8cSSatish Balay #undef __FUNC__
1419a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatGetLocalSize_MPISBAIJ"
1420a30f8f8cSSatish Balay int MatGetLocalSize_MPISBAIJ(Mat matin,int *m,int *n)
1421a30f8f8cSSatish Balay {
1422a30f8f8cSSatish Balay   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;
1423a30f8f8cSSatish Balay 
1424a30f8f8cSSatish Balay   PetscFunctionBegin;
1425a30f8f8cSSatish Balay   *m = mat->m; *n = mat->n;
1426a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1427a30f8f8cSSatish Balay }
1428a30f8f8cSSatish Balay 
1429a30f8f8cSSatish Balay #undef __FUNC__
1430a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatGetOwnershipRange_MPISBAIJ"
1431a30f8f8cSSatish Balay int MatGetOwnershipRange_MPISBAIJ(Mat matin,int *m,int *n)
1432a30f8f8cSSatish Balay {
1433a30f8f8cSSatish Balay   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;
1434a30f8f8cSSatish Balay 
1435a30f8f8cSSatish Balay   PetscFunctionBegin;
1436a30f8f8cSSatish Balay   if (m) *m = mat->rstart*mat->bs;
1437a30f8f8cSSatish Balay   if (n) *n = mat->rend*mat->bs;
1438a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1439a30f8f8cSSatish Balay }
1440a30f8f8cSSatish Balay 
1441a30f8f8cSSatish Balay #undef __FUNC__
1442a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatGetRow_MPISBAIJ"
1443a30f8f8cSSatish Balay int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1444a30f8f8cSSatish Balay {
1445a30f8f8cSSatish Balay   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;
1446a30f8f8cSSatish Balay   Scalar     *vworkA,*vworkB,**pvA,**pvB,*v_p;
1447a30f8f8cSSatish Balay   int        bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1448a30f8f8cSSatish Balay   int        nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1449a30f8f8cSSatish Balay   int        *cmap,*idx_p,cstart = mat->cstart;
1450a30f8f8cSSatish Balay 
1451a30f8f8cSSatish Balay   PetscFunctionBegin;
1452a30f8f8cSSatish Balay   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1453a30f8f8cSSatish Balay   mat->getrowactive = PETSC_TRUE;
1454a30f8f8cSSatish Balay 
1455a30f8f8cSSatish Balay   if (!mat->rowvalues && (idx || v)) {
1456a30f8f8cSSatish Balay     /*
1457a30f8f8cSSatish Balay         allocate enough space to hold information from the longest row.
1458a30f8f8cSSatish Balay     */
1459a30f8f8cSSatish Balay     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1460a30f8f8cSSatish Balay     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1461a30f8f8cSSatish Balay     int     max = 1,mbs = mat->mbs,tmp;
1462a30f8f8cSSatish Balay     for (i=0; i<mbs; i++) {
1463a30f8f8cSSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1464a30f8f8cSSatish Balay       if (max < tmp) { max = tmp; }
1465a30f8f8cSSatish Balay     }
1466a30f8f8cSSatish Balay     mat->rowvalues = (Scalar*)PetscMalloc(max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1467a30f8f8cSSatish Balay     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1468a30f8f8cSSatish Balay   }
1469a30f8f8cSSatish Balay 
1470a30f8f8cSSatish Balay   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows")
1471a30f8f8cSSatish Balay   lrow = row - brstart;  /* local row index */
1472a30f8f8cSSatish Balay 
1473a30f8f8cSSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1474a30f8f8cSSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
1475a30f8f8cSSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1476a30f8f8cSSatish Balay   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1477a30f8f8cSSatish Balay   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1478a30f8f8cSSatish Balay   nztot = nzA + nzB;
1479a30f8f8cSSatish Balay 
1480a30f8f8cSSatish Balay   cmap  = mat->garray;
1481a30f8f8cSSatish Balay   if (v  || idx) {
1482a30f8f8cSSatish Balay     if (nztot) {
1483a30f8f8cSSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
1484a30f8f8cSSatish Balay       int imark = -1;
1485a30f8f8cSSatish Balay       if (v) {
1486a30f8f8cSSatish Balay         *v = v_p = mat->rowvalues;
1487a30f8f8cSSatish Balay         for (i=0; i<nzB; i++) {
1488a30f8f8cSSatish Balay           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1489a30f8f8cSSatish Balay           else break;
1490a30f8f8cSSatish Balay         }
1491a30f8f8cSSatish Balay         imark = i;
1492a30f8f8cSSatish Balay         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1493a30f8f8cSSatish Balay         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1494a30f8f8cSSatish Balay       }
1495a30f8f8cSSatish Balay       if (idx) {
1496a30f8f8cSSatish Balay         *idx = idx_p = mat->rowindices;
1497a30f8f8cSSatish Balay         if (imark > -1) {
1498a30f8f8cSSatish Balay           for (i=0; i<imark; i++) {
1499a30f8f8cSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1500a30f8f8cSSatish Balay           }
1501a30f8f8cSSatish Balay         } else {
1502a30f8f8cSSatish Balay           for (i=0; i<nzB; i++) {
1503a30f8f8cSSatish Balay             if (cmap[cworkB[i]/bs] < cstart)
1504a30f8f8cSSatish Balay               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1505a30f8f8cSSatish Balay             else break;
1506a30f8f8cSSatish Balay           }
1507a30f8f8cSSatish Balay           imark = i;
1508a30f8f8cSSatish Balay         }
1509a30f8f8cSSatish Balay         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1510a30f8f8cSSatish Balay         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1511a30f8f8cSSatish Balay       }
1512a30f8f8cSSatish Balay     } else {
1513a30f8f8cSSatish Balay       if (idx) *idx = 0;
1514a30f8f8cSSatish Balay       if (v)   *v   = 0;
1515a30f8f8cSSatish Balay     }
1516a30f8f8cSSatish Balay   }
1517a30f8f8cSSatish Balay   *nz = nztot;
1518a30f8f8cSSatish Balay   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1519a30f8f8cSSatish Balay   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1520a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1521a30f8f8cSSatish Balay }
1522a30f8f8cSSatish Balay 
1523a30f8f8cSSatish Balay #undef __FUNC__
1524a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatRestoreRow_MPISBAIJ"
1525a30f8f8cSSatish Balay int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1526a30f8f8cSSatish Balay {
1527a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1528a30f8f8cSSatish Balay 
1529a30f8f8cSSatish Balay   PetscFunctionBegin;
1530a30f8f8cSSatish Balay   if (baij->getrowactive == PETSC_FALSE) {
1531a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1532a30f8f8cSSatish Balay   }
1533a30f8f8cSSatish Balay   baij->getrowactive = PETSC_FALSE;
1534a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1535a30f8f8cSSatish Balay }
1536a30f8f8cSSatish Balay 
1537a30f8f8cSSatish Balay #undef __FUNC__
1538a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatGetBlockSize_MPISBAIJ"
1539a30f8f8cSSatish Balay int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs)
1540a30f8f8cSSatish Balay {
1541a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1542a30f8f8cSSatish Balay 
1543a30f8f8cSSatish Balay   PetscFunctionBegin;
1544a30f8f8cSSatish Balay   *bs = baij->bs;
1545a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1546a30f8f8cSSatish Balay }
1547a30f8f8cSSatish Balay 
1548a30f8f8cSSatish Balay #undef __FUNC__
1549a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatZeroEntries_MPISBAIJ"
1550a30f8f8cSSatish Balay int MatZeroEntries_MPISBAIJ(Mat A)
1551a30f8f8cSSatish Balay {
1552a30f8f8cSSatish Balay   Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data;
1553a30f8f8cSSatish Balay   int         ierr;
1554a30f8f8cSSatish Balay 
1555a30f8f8cSSatish Balay   PetscFunctionBegin;
1556a30f8f8cSSatish Balay   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1557a30f8f8cSSatish Balay   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1558a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1559a30f8f8cSSatish Balay }
1560a30f8f8cSSatish Balay 
1561a30f8f8cSSatish Balay #undef __FUNC__
1562a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatGetInfo_MPISBAIJ"
1563a30f8f8cSSatish Balay int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1564a30f8f8cSSatish Balay {
1565a30f8f8cSSatish Balay   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data;
1566a30f8f8cSSatish Balay   Mat         A = a->A,B = a->B;
1567a30f8f8cSSatish Balay   int         ierr;
1568a30f8f8cSSatish Balay   PetscReal   isend[5],irecv[5];
1569a30f8f8cSSatish Balay 
1570a30f8f8cSSatish Balay   PetscFunctionBegin;
1571a30f8f8cSSatish Balay   info->block_size     = (double)a->bs;
1572a30f8f8cSSatish Balay   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1573a30f8f8cSSatish Balay   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1574a30f8f8cSSatish Balay   isend[3] = info->memory;  isend[4] = info->mallocs;
1575a30f8f8cSSatish Balay   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1576a30f8f8cSSatish Balay   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1577a30f8f8cSSatish Balay   isend[3] += info->memory;  isend[4] += info->mallocs;
1578a30f8f8cSSatish Balay   if (flag == MAT_LOCAL) {
1579a30f8f8cSSatish Balay     info->nz_used      = isend[0];
1580a30f8f8cSSatish Balay     info->nz_allocated = isend[1];
1581a30f8f8cSSatish Balay     info->nz_unneeded  = isend[2];
1582a30f8f8cSSatish Balay     info->memory       = isend[3];
1583a30f8f8cSSatish Balay     info->mallocs      = isend[4];
1584a30f8f8cSSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
1585a30f8f8cSSatish Balay     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1586a30f8f8cSSatish Balay     info->nz_used      = irecv[0];
1587a30f8f8cSSatish Balay     info->nz_allocated = irecv[1];
1588a30f8f8cSSatish Balay     info->nz_unneeded  = irecv[2];
1589a30f8f8cSSatish Balay     info->memory       = irecv[3];
1590a30f8f8cSSatish Balay     info->mallocs      = irecv[4];
1591a30f8f8cSSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
1592a30f8f8cSSatish Balay     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1593a30f8f8cSSatish Balay     info->nz_used      = irecv[0];
1594a30f8f8cSSatish Balay     info->nz_allocated = irecv[1];
1595a30f8f8cSSatish Balay     info->nz_unneeded  = irecv[2];
1596a30f8f8cSSatish Balay     info->memory       = irecv[3];
1597a30f8f8cSSatish Balay     info->mallocs      = irecv[4];
1598a30f8f8cSSatish Balay   } else {
1599a30f8f8cSSatish Balay     SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag);
1600a30f8f8cSSatish Balay   }
1601a30f8f8cSSatish Balay   info->rows_global       = (double)a->M;
1602a30f8f8cSSatish Balay   info->columns_global    = (double)a->N;
1603a30f8f8cSSatish Balay   info->rows_local        = (double)a->m;
1604a30f8f8cSSatish Balay   info->columns_local     = (double)a->N;
1605a30f8f8cSSatish Balay   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1606a30f8f8cSSatish Balay   info->fill_ratio_needed = 0;
1607a30f8f8cSSatish Balay   info->factor_mallocs    = 0;
1608a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1609a30f8f8cSSatish Balay }
1610a30f8f8cSSatish Balay 
1611a30f8f8cSSatish Balay #undef __FUNC__
1612a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatSetOption_MPISBAIJ"
1613a30f8f8cSSatish Balay int MatSetOption_MPISBAIJ(Mat A,MatOption op)
1614a30f8f8cSSatish Balay {
1615a30f8f8cSSatish Balay   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1616a30f8f8cSSatish Balay   int         ierr;
1617a30f8f8cSSatish Balay 
1618a30f8f8cSSatish Balay   PetscFunctionBegin;
1619a30f8f8cSSatish Balay   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1620a30f8f8cSSatish Balay       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1621a30f8f8cSSatish Balay       op == MAT_COLUMNS_UNSORTED ||
1622a30f8f8cSSatish Balay       op == MAT_COLUMNS_SORTED ||
1623a30f8f8cSSatish Balay       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1624a30f8f8cSSatish Balay       op == MAT_KEEP_ZEROED_ROWS ||
1625a30f8f8cSSatish Balay       op == MAT_NEW_NONZERO_LOCATION_ERR) {
1626a30f8f8cSSatish Balay         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1627a30f8f8cSSatish Balay         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1628a30f8f8cSSatish Balay   } else if (op == MAT_ROW_ORIENTED) {
1629a30f8f8cSSatish Balay         a->roworiented = PETSC_TRUE;
1630a30f8f8cSSatish Balay         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1631a30f8f8cSSatish Balay         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1632a30f8f8cSSatish Balay   } else if (op == MAT_ROWS_SORTED ||
1633a30f8f8cSSatish Balay              op == MAT_ROWS_UNSORTED ||
1634a30f8f8cSSatish Balay              op == MAT_SYMMETRIC ||
1635a30f8f8cSSatish Balay              op == MAT_STRUCTURALLY_SYMMETRIC ||
1636a30f8f8cSSatish Balay              op == MAT_YES_NEW_DIAGONALS ||
1637a30f8f8cSSatish Balay              op == MAT_USE_HASH_TABLE) {
1638a30f8f8cSSatish Balay     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1639a30f8f8cSSatish Balay   } else if (op == MAT_COLUMN_ORIENTED) {
1640a30f8f8cSSatish Balay     a->roworiented = PETSC_FALSE;
1641a30f8f8cSSatish Balay     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1642a30f8f8cSSatish Balay     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1643a30f8f8cSSatish Balay   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1644a30f8f8cSSatish Balay     a->donotstash = PETSC_TRUE;
1645a30f8f8cSSatish Balay   } else if (op == MAT_NO_NEW_DIAGONALS) {
1646a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1647a30f8f8cSSatish Balay   } else if (op == MAT_USE_HASH_TABLE) {
1648a30f8f8cSSatish Balay     a->ht_flag = PETSC_TRUE;
1649a30f8f8cSSatish Balay   } else {
1650a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1651a30f8f8cSSatish Balay   }
1652a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1653a30f8f8cSSatish Balay }
1654a30f8f8cSSatish Balay 
1655a30f8f8cSSatish Balay #undef __FUNC__
1656a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatTranspose_MPISBAIJ("
1657a30f8f8cSSatish Balay int MatTranspose_MPISBAIJ(Mat A,Mat *matout)
1658a30f8f8cSSatish Balay {
1659a30f8f8cSSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data;
1660a30f8f8cSSatish Balay   Mat_SeqBAIJ *Aloc;
1661a30f8f8cSSatish Balay   Mat         B;
1662a30f8f8cSSatish Balay   int         ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col;
1663a30f8f8cSSatish Balay   int         bs=baij->bs,mbs=baij->mbs;
1664a30f8f8cSSatish Balay   MatScalar   *a;
1665a30f8f8cSSatish Balay 
1666a30f8f8cSSatish Balay   PetscFunctionBegin;
1667a30f8f8cSSatish Balay   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1668a30f8f8cSSatish Balay   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1669a30f8f8cSSatish Balay 
1670a30f8f8cSSatish Balay   /* copy over the A part */
1671a30f8f8cSSatish Balay   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1672a30f8f8cSSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1673a30f8f8cSSatish Balay   rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
1674a30f8f8cSSatish Balay 
1675a30f8f8cSSatish Balay   for (i=0; i<mbs; i++) {
1676a30f8f8cSSatish Balay     rvals[0] = bs*(baij->rstart + i);
1677a30f8f8cSSatish Balay     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1678a30f8f8cSSatish Balay     for (j=ai[i]; j<ai[i+1]; j++) {
1679a30f8f8cSSatish Balay       col = (baij->cstart+aj[j])*bs;
1680a30f8f8cSSatish Balay       for (k=0; k<bs; k++) {
1681a30f8f8cSSatish Balay         ierr = MatSetValues_MPISBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1682a30f8f8cSSatish Balay         col++; a += bs;
1683a30f8f8cSSatish Balay       }
1684a30f8f8cSSatish Balay     }
1685a30f8f8cSSatish Balay   }
1686a30f8f8cSSatish Balay   /* copy over the B part */
1687a30f8f8cSSatish Balay   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1688a30f8f8cSSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1689a30f8f8cSSatish Balay   for (i=0; i<mbs; i++) {
1690a30f8f8cSSatish Balay     rvals[0] = bs*(baij->rstart + i);
1691a30f8f8cSSatish Balay     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1692a30f8f8cSSatish Balay     for (j=ai[i]; j<ai[i+1]; j++) {
1693a30f8f8cSSatish Balay       col = baij->garray[aj[j]]*bs;
1694a30f8f8cSSatish Balay       for (k=0; k<bs; k++) {
1695a30f8f8cSSatish Balay         ierr = MatSetValues_MPISBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1696a30f8f8cSSatish Balay         col++; a += bs;
1697a30f8f8cSSatish Balay       }
1698a30f8f8cSSatish Balay     }
1699a30f8f8cSSatish Balay   }
1700a30f8f8cSSatish Balay   ierr = PetscFree(rvals);CHKERRQ(ierr);
1701a30f8f8cSSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1702a30f8f8cSSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1703a30f8f8cSSatish Balay 
1704a30f8f8cSSatish Balay   if (matout) {
1705a30f8f8cSSatish Balay     *matout = B;
1706a30f8f8cSSatish Balay   } else {
1707a30f8f8cSSatish Balay     PetscOps *Abops;
1708a30f8f8cSSatish Balay     MatOps   Aops;
1709a30f8f8cSSatish Balay 
1710a30f8f8cSSatish Balay     /* This isn't really an in-place transpose .... but free data structures from baij */
1711a30f8f8cSSatish Balay     ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1712a30f8f8cSSatish Balay     ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1713a30f8f8cSSatish Balay     ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1714a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE)
1715a30f8f8cSSatish Balay     if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1716a30f8f8cSSatish Balay #else
1717a30f8f8cSSatish Balay     if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1718a30f8f8cSSatish Balay #endif
1719a30f8f8cSSatish Balay     if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1720a30f8f8cSSatish Balay     if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1721a30f8f8cSSatish Balay     if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1722a30f8f8cSSatish Balay     ierr = PetscFree(baij);CHKERRQ(ierr);
1723a30f8f8cSSatish Balay 
1724a30f8f8cSSatish Balay     /*
1725a30f8f8cSSatish Balay        This is horrible, horrible code. We need to keep the
1726a30f8f8cSSatish Balay       A pointers for the bops and ops but copy everything
1727a30f8f8cSSatish Balay       else from C.
1728a30f8f8cSSatish Balay     */
1729a30f8f8cSSatish Balay     Abops   = A->bops;
1730a30f8f8cSSatish Balay     Aops    = A->ops;
1731a30f8f8cSSatish Balay     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
1732a30f8f8cSSatish Balay     A->bops = Abops;
1733a30f8f8cSSatish Balay     A->ops  = Aops;
1734a30f8f8cSSatish Balay 
1735a30f8f8cSSatish Balay     PetscHeaderDestroy(B);
1736a30f8f8cSSatish Balay   }
1737a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1738a30f8f8cSSatish Balay }
1739a30f8f8cSSatish Balay 
1740a30f8f8cSSatish Balay #undef __FUNC__
1741a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatDiagonalScale_MPISBAIJ"
1742a30f8f8cSSatish Balay int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1743a30f8f8cSSatish Balay {
1744a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1745a30f8f8cSSatish Balay   Mat         a = baij->A,b = baij->B;
1746a30f8f8cSSatish Balay   int         ierr,s1,s2,s3;
1747a30f8f8cSSatish Balay 
1748a30f8f8cSSatish Balay   PetscFunctionBegin;
1749a30f8f8cSSatish Balay   if (ll != rr) {
1750a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"For symmetric format, left and right scaling vectors must be same\n");
1751a30f8f8cSSatish Balay   }
1752a30f8f8cSSatish Balay   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1753a30f8f8cSSatish Balay   if (rr) {
1754a30f8f8cSSatish Balay     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1755a30f8f8cSSatish Balay     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
1756a30f8f8cSSatish Balay     /* Overlap communication with computation. */
1757a30f8f8cSSatish Balay     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1758a30f8f8cSSatish Balay     /*} if (ll) { */
1759a30f8f8cSSatish Balay     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1760a30f8f8cSSatish Balay     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1761a30f8f8cSSatish Balay     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1762a30f8f8cSSatish Balay     /* } */
1763a30f8f8cSSatish Balay   /* scale  the diagonal block */
1764a30f8f8cSSatish Balay   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1765a30f8f8cSSatish Balay 
1766a30f8f8cSSatish Balay   /* if (rr) { */
1767a30f8f8cSSatish Balay     /* Do a scatter end and then right scale the off-diagonal block */
1768a30f8f8cSSatish Balay     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1769a30f8f8cSSatish Balay     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1770a30f8f8cSSatish Balay   }
1771a30f8f8cSSatish Balay 
1772a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1773a30f8f8cSSatish Balay }
1774a30f8f8cSSatish Balay 
1775a30f8f8cSSatish Balay #undef __FUNC__
1776a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatZeroRows_MPISBAIJ"
1777a30f8f8cSSatish Balay int MatZeroRows_MPISBAIJ(Mat A,IS is,Scalar *diag)
1778a30f8f8cSSatish Balay {
1779a30f8f8cSSatish Balay   Mat_MPISBAIJ    *l = (Mat_MPISBAIJ*)A->data;
1780a30f8f8cSSatish Balay   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
1781a30f8f8cSSatish Balay   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
1782a30f8f8cSSatish Balay   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1783a30f8f8cSSatish Balay   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1784a30f8f8cSSatish Balay   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1785a30f8f8cSSatish Balay   MPI_Comm       comm = A->comm;
1786a30f8f8cSSatish Balay   MPI_Request    *send_waits,*recv_waits;
1787a30f8f8cSSatish Balay   MPI_Status     recv_status,*send_status;
1788a30f8f8cSSatish Balay   IS             istmp;
1789a30f8f8cSSatish Balay 
1790a30f8f8cSSatish Balay   PetscFunctionBegin;
1791a30f8f8cSSatish Balay   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
1792a30f8f8cSSatish Balay   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1793a30f8f8cSSatish Balay 
1794a30f8f8cSSatish Balay   /*  first count number of contributors to each processor */
1795a30f8f8cSSatish Balay   nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs);
1796a30f8f8cSSatish Balay   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
1797a30f8f8cSSatish Balay   procs  = nprocs + size;
1798a30f8f8cSSatish Balay   owner  = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
1799a30f8f8cSSatish Balay   for (i=0; i<N; i++) {
1800a30f8f8cSSatish Balay     idx   = rows[i];
1801a30f8f8cSSatish Balay     found = 0;
1802a30f8f8cSSatish Balay     for (j=0; j<size; j++) {
1803a30f8f8cSSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1804a30f8f8cSSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
1805a30f8f8cSSatish Balay       }
1806a30f8f8cSSatish Balay     }
1807a30f8f8cSSatish Balay     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
1808a30f8f8cSSatish Balay   }
1809a30f8f8cSSatish Balay   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
1810a30f8f8cSSatish Balay 
1811a30f8f8cSSatish Balay   /* inform other processors of number of messages and max length*/
1812a30f8f8cSSatish Balay   work   = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work);
1813a30f8f8cSSatish Balay   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
1814a30f8f8cSSatish Balay   nmax   = work[rank];
1815a30f8f8cSSatish Balay   nrecvs = work[size+rank];
1816a30f8f8cSSatish Balay   ierr = PetscFree(work);CHKERRQ(ierr);
1817a30f8f8cSSatish Balay 
1818a30f8f8cSSatish Balay   /* post receives:   */
1819a30f8f8cSSatish Balay   rvalues = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
1820a30f8f8cSSatish Balay   recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
1821a30f8f8cSSatish Balay   for (i=0; i<nrecvs; i++) {
1822a30f8f8cSSatish Balay     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1823a30f8f8cSSatish Balay   }
1824a30f8f8cSSatish Balay 
1825a30f8f8cSSatish Balay   /* do sends:
1826a30f8f8cSSatish Balay      1) starts[i] gives the starting index in svalues for stuff going to
1827a30f8f8cSSatish Balay      the ith processor
1828a30f8f8cSSatish Balay   */
1829a30f8f8cSSatish Balay   svalues    = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues);
1830a30f8f8cSSatish Balay   send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
1831a30f8f8cSSatish Balay   starts     = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts);
1832a30f8f8cSSatish Balay   starts[0]  = 0;
1833a30f8f8cSSatish Balay   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1834a30f8f8cSSatish Balay   for (i=0; i<N; i++) {
1835a30f8f8cSSatish Balay     svalues[starts[owner[i]]++] = rows[i];
1836a30f8f8cSSatish Balay   }
1837a30f8f8cSSatish Balay   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
1838a30f8f8cSSatish Balay 
1839a30f8f8cSSatish Balay   starts[0] = 0;
1840a30f8f8cSSatish Balay   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1841a30f8f8cSSatish Balay   count = 0;
1842a30f8f8cSSatish Balay   for (i=0; i<size; i++) {
1843a30f8f8cSSatish Balay     if (procs[i]) {
1844a30f8f8cSSatish Balay       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1845a30f8f8cSSatish Balay     }
1846a30f8f8cSSatish Balay   }
1847a30f8f8cSSatish Balay   ierr = PetscFree(starts);CHKERRQ(ierr);
1848a30f8f8cSSatish Balay 
1849a30f8f8cSSatish Balay   base = owners[rank]*bs;
1850a30f8f8cSSatish Balay 
1851a30f8f8cSSatish Balay   /*  wait on receives */
1852a30f8f8cSSatish Balay   lens   = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens);
1853a30f8f8cSSatish Balay   source = lens + nrecvs;
1854a30f8f8cSSatish Balay   count  = nrecvs; slen = 0;
1855a30f8f8cSSatish Balay   while (count) {
1856a30f8f8cSSatish Balay     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1857a30f8f8cSSatish Balay     /* unpack receives into our local space */
1858a30f8f8cSSatish Balay     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1859a30f8f8cSSatish Balay     source[imdex]  = recv_status.MPI_SOURCE;
1860a30f8f8cSSatish Balay     lens[imdex]    = n;
1861a30f8f8cSSatish Balay     slen          += n;
1862a30f8f8cSSatish Balay     count--;
1863a30f8f8cSSatish Balay   }
1864a30f8f8cSSatish Balay   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1865a30f8f8cSSatish Balay 
1866a30f8f8cSSatish Balay   /* move the data into the send scatter */
1867a30f8f8cSSatish Balay   lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows);
1868a30f8f8cSSatish Balay   count = 0;
1869a30f8f8cSSatish Balay   for (i=0; i<nrecvs; i++) {
1870a30f8f8cSSatish Balay     values = rvalues + i*nmax;
1871a30f8f8cSSatish Balay     for (j=0; j<lens[i]; j++) {
1872a30f8f8cSSatish Balay       lrows[count++] = values[j] - base;
1873a30f8f8cSSatish Balay     }
1874a30f8f8cSSatish Balay   }
1875a30f8f8cSSatish Balay   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1876a30f8f8cSSatish Balay   ierr = PetscFree(lens);CHKERRQ(ierr);
1877a30f8f8cSSatish Balay   ierr = PetscFree(owner);CHKERRQ(ierr);
1878a30f8f8cSSatish Balay   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1879a30f8f8cSSatish Balay 
1880a30f8f8cSSatish Balay   /* actually zap the local rows */
1881a30f8f8cSSatish Balay   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1882a30f8f8cSSatish Balay   PLogObjectParent(A,istmp);
1883a30f8f8cSSatish Balay 
1884a30f8f8cSSatish Balay   /*
1885a30f8f8cSSatish Balay         Zero the required rows. If the "diagonal block" of the matrix
1886a30f8f8cSSatish Balay      is square and the user wishes to set the diagonal we use seperate
1887a30f8f8cSSatish Balay      code so that MatSetValues() is not called for each diagonal allocating
1888a30f8f8cSSatish Balay      new memory, thus calling lots of mallocs and slowing things down.
1889a30f8f8cSSatish Balay 
1890a30f8f8cSSatish Balay        Contributed by: Mathew Knepley
1891a30f8f8cSSatish Balay   */
1892a30f8f8cSSatish Balay   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1893c8407628SSatish Balay   ierr = MatZeroRows_SeqSBAIJ(l->B,istmp,0);CHKERRQ(ierr);
1894a30f8f8cSSatish Balay   if (diag && (l->A->M == l->A->N)) {
1895a30f8f8cSSatish Balay     ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,diag);CHKERRQ(ierr);
1896a30f8f8cSSatish Balay   } else if (diag) {
1897a30f8f8cSSatish Balay     ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1898a30f8f8cSSatish Balay     if (((Mat_SeqSBAIJ*)l->A->data)->nonew) {
1899a30f8f8cSSatish Balay       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1900a30f8f8cSSatish Balay MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1901a30f8f8cSSatish Balay     }
1902a30f8f8cSSatish Balay     for (i=0; i<slen; i++) {
1903a30f8f8cSSatish Balay       row  = lrows[i] + rstart_bs;
1904a30f8f8cSSatish Balay       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1905a30f8f8cSSatish Balay     }
1906a30f8f8cSSatish Balay     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1907a30f8f8cSSatish Balay     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1908a30f8f8cSSatish Balay   } else {
1909a30f8f8cSSatish Balay     ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1910a30f8f8cSSatish Balay   }
1911a30f8f8cSSatish Balay 
1912a30f8f8cSSatish Balay   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1913a30f8f8cSSatish Balay   ierr = PetscFree(lrows);CHKERRQ(ierr);
1914a30f8f8cSSatish Balay 
1915a30f8f8cSSatish Balay   /* wait on sends */
1916a30f8f8cSSatish Balay   if (nsends) {
1917a30f8f8cSSatish Balay     send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1918a30f8f8cSSatish Balay     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1919a30f8f8cSSatish Balay     ierr        = PetscFree(send_status);CHKERRQ(ierr);
1920a30f8f8cSSatish Balay   }
1921a30f8f8cSSatish Balay   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1922a30f8f8cSSatish Balay   ierr = PetscFree(svalues);CHKERRQ(ierr);
1923a30f8f8cSSatish Balay 
1924a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1925a30f8f8cSSatish Balay }
1926a30f8f8cSSatish Balay 
1927a30f8f8cSSatish Balay #undef __FUNC__
1928a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatPrintHelp_MPISBAIJ"
1929a30f8f8cSSatish Balay int MatPrintHelp_MPISBAIJ(Mat A)
1930a30f8f8cSSatish Balay {
1931a30f8f8cSSatish Balay   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1932a30f8f8cSSatish Balay   MPI_Comm    comm = A->comm;
1933a30f8f8cSSatish Balay   static int  called = 0;
1934a30f8f8cSSatish Balay   int         ierr;
1935a30f8f8cSSatish Balay 
1936a30f8f8cSSatish Balay   PetscFunctionBegin;
1937a30f8f8cSSatish Balay   if (!a->rank) {
1938a30f8f8cSSatish Balay     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1939a30f8f8cSSatish Balay   }
1940a30f8f8cSSatish Balay   if (called) {PetscFunctionReturn(0);} else called = 1;
1941a30f8f8cSSatish Balay   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1942a30f8f8cSSatish Balay   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1943a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1944a30f8f8cSSatish Balay }
1945a30f8f8cSSatish Balay 
1946a30f8f8cSSatish Balay #undef __FUNC__
1947a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatSetUnfactored_MPISBAIJ"
1948a30f8f8cSSatish Balay int MatSetUnfactored_MPISBAIJ(Mat A)
1949a30f8f8cSSatish Balay {
1950a30f8f8cSSatish Balay   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1951a30f8f8cSSatish Balay   int         ierr;
1952a30f8f8cSSatish Balay 
1953a30f8f8cSSatish Balay   PetscFunctionBegin;
1954a30f8f8cSSatish Balay   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1955a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1956a30f8f8cSSatish Balay }
1957a30f8f8cSSatish Balay 
1958a30f8f8cSSatish Balay static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1959a30f8f8cSSatish Balay 
1960a30f8f8cSSatish Balay #undef __FUNC__
1961a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatEqual_MPISBAIJ"
1962a30f8f8cSSatish Balay int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1963a30f8f8cSSatish Balay {
1964a30f8f8cSSatish Balay   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1965a30f8f8cSSatish Balay   Mat         a,b,c,d;
1966a30f8f8cSSatish Balay   PetscTruth  flg;
1967a30f8f8cSSatish Balay   int         ierr;
1968a30f8f8cSSatish Balay 
1969a30f8f8cSSatish Balay   PetscFunctionBegin;
1970a30f8f8cSSatish Balay   if (B->type != MATMPISBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1971a30f8f8cSSatish Balay   a = matA->A; b = matA->B;
1972a30f8f8cSSatish Balay   c = matB->A; d = matB->B;
1973a30f8f8cSSatish Balay 
1974a30f8f8cSSatish Balay   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1975a30f8f8cSSatish Balay   if (flg == PETSC_TRUE) {
1976a30f8f8cSSatish Balay     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1977a30f8f8cSSatish Balay   }
1978a30f8f8cSSatish Balay   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1979a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1980a30f8f8cSSatish Balay }
1981a30f8f8cSSatish Balay 
1982a30f8f8cSSatish Balay /* -------------------------------------------------------------------*/
1983a30f8f8cSSatish Balay static struct _MatOps MatOps_Values = {
1984a30f8f8cSSatish Balay   MatSetValues_MPISBAIJ,
1985a30f8f8cSSatish Balay   MatGetRow_MPISBAIJ,
1986a30f8f8cSSatish Balay   MatRestoreRow_MPISBAIJ,
1987a30f8f8cSSatish Balay   MatMult_MPISBAIJ,
1988a30f8f8cSSatish Balay   MatMultAdd_MPISBAIJ,
1989a30f8f8cSSatish Balay   MatMultTranspose_MPISBAIJ,
1990a30f8f8cSSatish Balay   MatMultTransposeAdd_MPISBAIJ,
1991a30f8f8cSSatish Balay   0,
1992a30f8f8cSSatish Balay   0,
1993a30f8f8cSSatish Balay   0,
1994a30f8f8cSSatish Balay   0,
1995a30f8f8cSSatish Balay   0,
1996a30f8f8cSSatish Balay   0,
1997a30f8f8cSSatish Balay   0,
1998a30f8f8cSSatish Balay   MatTranspose_MPISBAIJ,
1999a30f8f8cSSatish Balay   MatGetInfo_MPISBAIJ,
2000a30f8f8cSSatish Balay   MatEqual_MPISBAIJ,
2001a30f8f8cSSatish Balay   MatGetDiagonal_MPISBAIJ,
2002a30f8f8cSSatish Balay   MatDiagonalScale_MPISBAIJ,
2003a30f8f8cSSatish Balay   MatNorm_MPISBAIJ,
2004a30f8f8cSSatish Balay   MatAssemblyBegin_MPISBAIJ,
2005a30f8f8cSSatish Balay   MatAssemblyEnd_MPISBAIJ,
2006a30f8f8cSSatish Balay   0,
2007a30f8f8cSSatish Balay   MatSetOption_MPISBAIJ,
2008a30f8f8cSSatish Balay   MatZeroEntries_MPISBAIJ,
2009a30f8f8cSSatish Balay   MatZeroRows_MPISBAIJ,
2010a30f8f8cSSatish Balay   0,
2011a30f8f8cSSatish Balay   0,
2012a30f8f8cSSatish Balay   0,
2013a30f8f8cSSatish Balay   0,
2014a30f8f8cSSatish Balay   MatGetSize_MPISBAIJ,
2015a30f8f8cSSatish Balay   MatGetLocalSize_MPISBAIJ,
2016a30f8f8cSSatish Balay   MatGetOwnershipRange_MPISBAIJ,
2017a30f8f8cSSatish Balay   0,
2018a30f8f8cSSatish Balay   0,
2019a30f8f8cSSatish Balay   0,
2020a30f8f8cSSatish Balay   0,
2021a30f8f8cSSatish Balay   MatDuplicate_MPISBAIJ,
2022a30f8f8cSSatish Balay   0,
2023a30f8f8cSSatish Balay   0,
2024a30f8f8cSSatish Balay   0,
2025a30f8f8cSSatish Balay   0,
2026a30f8f8cSSatish Balay   0,
2027a30f8f8cSSatish Balay   MatGetSubMatrices_MPISBAIJ,
2028a30f8f8cSSatish Balay   MatIncreaseOverlap_MPISBAIJ,
2029a30f8f8cSSatish Balay   MatGetValues_MPISBAIJ,
2030a30f8f8cSSatish Balay   0,
2031a30f8f8cSSatish Balay   MatPrintHelp_MPISBAIJ,
2032a30f8f8cSSatish Balay   MatScale_MPISBAIJ,
2033a30f8f8cSSatish Balay   0,
2034a30f8f8cSSatish Balay   0,
2035a30f8f8cSSatish Balay   0,
2036a30f8f8cSSatish Balay   MatGetBlockSize_MPISBAIJ,
2037a30f8f8cSSatish Balay   0,
2038a30f8f8cSSatish Balay   0,
2039a30f8f8cSSatish Balay   0,
2040a30f8f8cSSatish Balay   0,
2041a30f8f8cSSatish Balay   0,
2042a30f8f8cSSatish Balay   0,
2043a30f8f8cSSatish Balay   MatSetUnfactored_MPISBAIJ,
2044a30f8f8cSSatish Balay   0,
2045a30f8f8cSSatish Balay   MatSetValuesBlocked_MPISBAIJ,
2046a30f8f8cSSatish Balay   0,
2047a30f8f8cSSatish Balay   0,
2048a30f8f8cSSatish Balay   0,
2049a30f8f8cSSatish Balay   MatGetMaps_Petsc};
2050a30f8f8cSSatish Balay 
2051a30f8f8cSSatish Balay 
2052a30f8f8cSSatish Balay EXTERN_C_BEGIN
2053a30f8f8cSSatish Balay #undef __FUNC__
2054a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonalBlock_MPISBAIJ"
2055a30f8f8cSSatish Balay int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
2056a30f8f8cSSatish Balay {
2057a30f8f8cSSatish Balay   PetscFunctionBegin;
2058a30f8f8cSSatish Balay   *a      = ((Mat_MPISBAIJ *)A->data)->A;
2059a30f8f8cSSatish Balay   *iscopy = PETSC_FALSE;
2060a30f8f8cSSatish Balay   PetscFunctionReturn(0);
2061a30f8f8cSSatish Balay }
2062a30f8f8cSSatish Balay EXTERN_C_END
2063a30f8f8cSSatish Balay 
2064a30f8f8cSSatish Balay #undef __FUNC__
2065a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatCreateMPISBAIJ"
2066a30f8f8cSSatish Balay /*@C
2067a30f8f8cSSatish Balay    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2068a30f8f8cSSatish Balay    (block compressed row).  For good matrix assembly performance
2069a30f8f8cSSatish Balay    the user should preallocate the matrix storage by setting the parameters
2070a30f8f8cSSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2071a30f8f8cSSatish Balay    performance can be increased by more than a factor of 50.
2072a30f8f8cSSatish Balay 
2073a30f8f8cSSatish Balay    Collective on MPI_Comm
2074a30f8f8cSSatish Balay 
2075a30f8f8cSSatish Balay    Input Parameters:
2076a30f8f8cSSatish Balay +  comm - MPI communicator
2077a30f8f8cSSatish Balay .  bs   - size of blockk
2078a30f8f8cSSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2079a30f8f8cSSatish Balay            This value should be the same as the local size used in creating the
2080a30f8f8cSSatish Balay            y vector for the matrix-vector product y = Ax.
2081a30f8f8cSSatish Balay .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2082a30f8f8cSSatish Balay            This value should be the same as the local size used in creating the
2083a30f8f8cSSatish Balay            x vector for the matrix-vector product y = Ax.
2084a30f8f8cSSatish Balay .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2085a30f8f8cSSatish Balay .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2086a30f8f8cSSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2087a30f8f8cSSatish Balay            submatrix  (same for all local rows)
2088a30f8f8cSSatish Balay .  d_nnz - array containing the number of block nonzeros in the various block rows
2089a30f8f8cSSatish Balay            of the in diagonal portion of the local (possibly different for each block
2090a30f8f8cSSatish Balay            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2091a30f8f8cSSatish Balay .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2092a30f8f8cSSatish Balay            submatrix (same for all local rows).
2093a30f8f8cSSatish Balay -  o_nnz - array containing the number of nonzeros in the various block rows of the
2094a30f8f8cSSatish Balay            off-diagonal portion of the local submatrix (possibly different for
2095a30f8f8cSSatish Balay            each block row) or PETSC_NULL.
2096a30f8f8cSSatish Balay 
2097a30f8f8cSSatish Balay    Output Parameter:
2098a30f8f8cSSatish Balay .  A - the matrix
2099a30f8f8cSSatish Balay 
2100a30f8f8cSSatish Balay    Options Database Keys:
2101a30f8f8cSSatish Balay .   -mat_no_unroll - uses code that does not unroll the loops in the
2102a30f8f8cSSatish Balay                      block calculations (much slower)
2103a30f8f8cSSatish Balay .   -mat_block_size - size of the blocks to use
2104a30f8f8cSSatish Balay .   -mat_mpi - use the parallel matrix data structures even on one processor
2105a30f8f8cSSatish Balay                (defaults to using SeqBAIJ format on one processor)
2106a30f8f8cSSatish Balay 
2107a30f8f8cSSatish Balay    Notes:
2108a30f8f8cSSatish Balay    The user MUST specify either the local or global matrix dimensions
2109a30f8f8cSSatish Balay    (possibly both).
2110a30f8f8cSSatish Balay 
2111a30f8f8cSSatish Balay    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2112a30f8f8cSSatish Balay    than it must be used on all processors that share the object for that argument.
2113a30f8f8cSSatish Balay 
2114a30f8f8cSSatish Balay    Storage Information:
2115a30f8f8cSSatish Balay    For a square global matrix we define each processor's diagonal portion
2116a30f8f8cSSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
2117a30f8f8cSSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
2118a30f8f8cSSatish Balay    local matrix (a rectangular submatrix).
2119a30f8f8cSSatish Balay 
2120a30f8f8cSSatish Balay    The user can specify preallocated storage for the diagonal part of
2121a30f8f8cSSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
2122a30f8f8cSSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2123a30f8f8cSSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
2124a30f8f8cSSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2125a30f8f8cSSatish Balay 
2126a30f8f8cSSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2127a30f8f8cSSatish Balay    the figure below we depict these three local rows and all columns (0-11).
2128a30f8f8cSSatish Balay 
2129a30f8f8cSSatish Balay .vb
2130a30f8f8cSSatish Balay            0 1 2 3 4 5 6 7 8 9 10 11
2131a30f8f8cSSatish Balay           -------------------
2132a30f8f8cSSatish Balay    row 3  |  o o o d d d o o o o o o
2133a30f8f8cSSatish Balay    row 4  |  o o o d d d o o o o o o
2134a30f8f8cSSatish Balay    row 5  |  o o o d d d o o o o o o
2135a30f8f8cSSatish Balay           -------------------
2136a30f8f8cSSatish Balay .ve
2137a30f8f8cSSatish Balay 
2138a30f8f8cSSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
2139a30f8f8cSSatish Balay    submatrix, and any entries in the o locations are stored in the
2140a30f8f8cSSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2141a30f8f8cSSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
2142a30f8f8cSSatish Balay 
2143a30f8f8cSSatish Balay    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2144a30f8f8cSSatish Balay    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2145a30f8f8cSSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
2146a30f8f8cSSatish Balay    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2147a30f8f8cSSatish Balay    or you will get TERRIBLE performance; see the users' manual chapter on
2148a30f8f8cSSatish Balay    matrices.
2149a30f8f8cSSatish Balay 
2150a30f8f8cSSatish Balay    Level: intermediate
2151a30f8f8cSSatish Balay 
2152a30f8f8cSSatish Balay .keywords: matrix, block, aij, compressed row, sparse, parallel
2153a30f8f8cSSatish Balay 
2154a30f8f8cSSatish Balay .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPISBAIJ()
2155a30f8f8cSSatish Balay @*/
2156a30f8f8cSSatish Balay 
2157a30f8f8cSSatish Balay int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2158a30f8f8cSSatish Balay {
2159a30f8f8cSSatish Balay   Mat          B;
2160a30f8f8cSSatish Balay   Mat_MPISBAIJ  *b;
2161f65c83cfSHong Zhang   int          ierr,i,sum[1],work[1],mbs,Mbs=PETSC_DECIDE,size;
2162a30f8f8cSSatish Balay   PetscTruth   flag1,flag2,flg;
2163a30f8f8cSSatish Balay 
2164a30f8f8cSSatish Balay   PetscFunctionBegin;
2165a30f8f8cSSatish Balay   if (M != N || m != n){ /* N and n are not used after this */
2166a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"For symmetric format, set M=N and m=n");
2167a30f8f8cSSatish Balay   }
2168a30f8f8cSSatish Balay   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2169a30f8f8cSSatish Balay 
2170a30f8f8cSSatish Balay   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive");
2171a30f8f8cSSatish Balay   if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -2: value %d",d_nz);
2172a30f8f8cSSatish Balay   if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -2: value %d",o_nz);
2173a30f8f8cSSatish Balay   if (d_nnz) {
2174a30f8f8cSSatish Balay     for (i=0; i<m/bs; i++) {
2175a30f8f8cSSatish Balay       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
2176a30f8f8cSSatish Balay     }
2177a30f8f8cSSatish Balay   }
2178a30f8f8cSSatish Balay   if (o_nnz) {
2179a30f8f8cSSatish Balay     for (i=0; i<m/bs; i++) {
2180a30f8f8cSSatish Balay       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
2181a30f8f8cSSatish Balay     }
2182a30f8f8cSSatish Balay   }
2183a30f8f8cSSatish Balay 
2184a30f8f8cSSatish Balay   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2185a30f8f8cSSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-mat_mpisbaij",&flag1);CHKERRQ(ierr);
2186a30f8f8cSSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr);
2187a30f8f8cSSatish Balay   if (!flag1 && !flag2 && size == 1) {
2188a30f8f8cSSatish Balay     if (M == PETSC_DECIDE) M = m;
2189a30f8f8cSSatish Balay     ierr = MatCreateSeqSBAIJ(comm,bs,M,M,d_nz,d_nnz,A);CHKERRQ(ierr);
2190a30f8f8cSSatish Balay     PetscFunctionReturn(0);
2191a30f8f8cSSatish Balay   }
2192a30f8f8cSSatish Balay 
2193a30f8f8cSSatish Balay   *A = 0;
2194a30f8f8cSSatish Balay   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPISBAIJ,"Mat",comm,MatDestroy,MatView);
2195a30f8f8cSSatish Balay   PLogObjectCreate(B);
2196a30f8f8cSSatish Balay   B->data = (void*)(b = PetscNew(Mat_MPISBAIJ));CHKPTRQ(b);
2197a30f8f8cSSatish Balay   ierr    = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
2198a30f8f8cSSatish Balay   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2199a30f8f8cSSatish Balay 
2200a30f8f8cSSatish Balay   B->ops->destroy    = MatDestroy_MPISBAIJ;
2201a30f8f8cSSatish Balay   B->ops->view       = MatView_MPISBAIJ;
2202a30f8f8cSSatish Balay   B->mapping    = 0;
2203a30f8f8cSSatish Balay   B->factor     = 0;
2204a30f8f8cSSatish Balay   B->assembled  = PETSC_FALSE;
2205a30f8f8cSSatish Balay 
2206a30f8f8cSSatish Balay   B->insertmode = NOT_SET_VALUES;
2207a30f8f8cSSatish Balay   ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr);
2208a30f8f8cSSatish Balay   ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr);
2209a30f8f8cSSatish Balay 
2210a30f8f8cSSatish Balay   if (m == PETSC_DECIDE && (d_nnz || o_nnz)) {
2211a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
2212a30f8f8cSSatish Balay   }
2213a30f8f8cSSatish Balay   if (M == PETSC_DECIDE && m == PETSC_DECIDE) {
2214a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified");
2215a30f8f8cSSatish Balay   }
2216a30f8f8cSSatish Balay   if (M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
2217a30f8f8cSSatish Balay 
2218a30f8f8cSSatish Balay   if (M == PETSC_DECIDE) {
2219a30f8f8cSSatish Balay     work[0] = m; mbs = m/bs;
2220a30f8f8cSSatish Balay     ierr = MPI_Allreduce(work,sum,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2221a30f8f8cSSatish Balay     M = sum[0]; Mbs = M/bs;
2222a30f8f8cSSatish Balay   } else { /* M is specified */
2223a30f8f8cSSatish Balay     Mbs = M/bs;
2224a30f8f8cSSatish Balay     if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize");
2225a30f8f8cSSatish Balay     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
2226a30f8f8cSSatish Balay     m   = mbs*bs;
2227a30f8f8cSSatish Balay   }
2228a30f8f8cSSatish Balay 
2229a30f8f8cSSatish Balay   if (mbs*bs != m) {
2230a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows/cols must be divisible by blocksize");
2231a30f8f8cSSatish Balay   }
2232a30f8f8cSSatish Balay 
2233a30f8f8cSSatish Balay   b->m = m; B->m = m;
2234a30f8f8cSSatish Balay   b->n = m; B->n = m;
2235a30f8f8cSSatish Balay   b->N = M; B->N = M;
2236a30f8f8cSSatish Balay   b->M = M;
2237a30f8f8cSSatish Balay   B->M = M;
2238a30f8f8cSSatish Balay   b->bs  = bs;
2239a30f8f8cSSatish Balay   b->bs2 = bs*bs;
2240a30f8f8cSSatish Balay   b->mbs = mbs;
2241a30f8f8cSSatish Balay   b->nbs = mbs;
2242a30f8f8cSSatish Balay   b->Mbs = Mbs;
2243a30f8f8cSSatish Balay   b->Nbs = Mbs;
2244a30f8f8cSSatish Balay 
2245a30f8f8cSSatish Balay   /* the information in the maps duplicates the information computed below, eventually
2246a30f8f8cSSatish Balay      we should remove the duplicate information that is not contained in the maps */
2247a30f8f8cSSatish Balay   ierr = MapCreateMPI(B->comm,m,M,&B->rmap);CHKERRQ(ierr);
2248a30f8f8cSSatish Balay   ierr = MapCreateMPI(B->comm,m,M,&B->cmap);CHKERRQ(ierr);
2249a30f8f8cSSatish Balay 
2250a30f8f8cSSatish Balay   /* build local table of row and column ownerships */
2251a30f8f8cSSatish Balay   b->rowners = (int*)PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners);
2252a30f8f8cSSatish Balay   PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
2253a30f8f8cSSatish Balay   b->cowners    = b->rowners + b->size + 2;
2254a30f8f8cSSatish Balay   b->rowners_bs = b->cowners + b->size + 2;
2255a30f8f8cSSatish Balay   ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2256a30f8f8cSSatish Balay   b->rowners[0]    = 0;
2257a30f8f8cSSatish Balay   for (i=2; i<=b->size; i++) {
2258a30f8f8cSSatish Balay     b->rowners[i] += b->rowners[i-1];
2259a30f8f8cSSatish Balay   }
2260a30f8f8cSSatish Balay   for (i=0; i<=b->size; i++) {
2261a30f8f8cSSatish Balay     b->rowners_bs[i] = b->rowners[i]*bs;
2262a30f8f8cSSatish Balay   }
2263a30f8f8cSSatish Balay   b->rstart    = b->rowners[b->rank];
2264a30f8f8cSSatish Balay   b->rend      = b->rowners[b->rank+1];
2265a30f8f8cSSatish Balay   b->rstart_bs = b->rstart * bs;
2266a30f8f8cSSatish Balay   b->rend_bs   = b->rend * bs;
2267a30f8f8cSSatish Balay 
2268a30f8f8cSSatish Balay   b->cstart    = b->rstart;
2269a30f8f8cSSatish Balay   b->cend      = b->rend;
2270a30f8f8cSSatish Balay   b->cstart_bs = b->cstart * bs;
2271a30f8f8cSSatish Balay   b->cend_bs   = b->cend * bs;
2272a30f8f8cSSatish Balay 
2273a30f8f8cSSatish Balay 
2274a30f8f8cSSatish Balay   if (d_nz == PETSC_DEFAULT) d_nz = 5;
2275a30f8f8cSSatish Balay   ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,m,m,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2276a30f8f8cSSatish Balay   PLogObjectParent(B,b->A);
2277a30f8f8cSSatish Balay   if (o_nz == PETSC_DEFAULT) o_nz = 0;
2278a30f8f8cSSatish Balay   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,M,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2279a30f8f8cSSatish Balay   PLogObjectParent(B,b->B);
2280a30f8f8cSSatish Balay 
2281a30f8f8cSSatish Balay   /* build cache for off array entries formed */
2282a30f8f8cSSatish Balay   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
2283a30f8f8cSSatish Balay   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
2284a30f8f8cSSatish Balay   b->donotstash  = PETSC_FALSE;
2285a30f8f8cSSatish Balay   b->colmap      = PETSC_NULL;
2286a30f8f8cSSatish Balay   b->garray      = PETSC_NULL;
2287a30f8f8cSSatish Balay   b->roworiented = PETSC_TRUE;
2288a30f8f8cSSatish Balay 
2289a30f8f8cSSatish Balay #if defined(PEYSC_USE_MAT_SINGLE)
2290a30f8f8cSSatish Balay   /* stuff for MatSetValues_XXX in single precision */
2291a30f8f8cSSatish Balay   b->lensetvalues     = 0;
2292a30f8f8cSSatish Balay   b->setvaluescopy    = PETSC_NULL;
2293a30f8f8cSSatish Balay #endif
2294a30f8f8cSSatish Balay 
2295a30f8f8cSSatish Balay   /* stuff used in block assembly */
2296a30f8f8cSSatish Balay   b->barray       = 0;
2297a30f8f8cSSatish Balay 
2298a30f8f8cSSatish Balay   /* stuff used for matrix vector multiply */
2299a30f8f8cSSatish Balay   b->lvec         = 0;
2300a30f8f8cSSatish Balay   b->Mvctx        = 0;
2301a30f8f8cSSatish Balay 
2302a30f8f8cSSatish Balay   /* stuff for MatGetRow() */
2303a30f8f8cSSatish Balay   b->rowindices   = 0;
2304a30f8f8cSSatish Balay   b->rowvalues    = 0;
2305a30f8f8cSSatish Balay   b->getrowactive = PETSC_FALSE;
2306a30f8f8cSSatish Balay 
2307a30f8f8cSSatish Balay   /* hash table stuff */
2308a30f8f8cSSatish Balay   b->ht           = 0;
2309a30f8f8cSSatish Balay   b->hd           = 0;
2310a30f8f8cSSatish Balay   b->ht_size      = 0;
2311a30f8f8cSSatish Balay   b->ht_flag      = PETSC_FALSE;
2312a30f8f8cSSatish Balay   b->ht_fact      = 0;
2313a30f8f8cSSatish Balay   b->ht_total_ct  = 0;
2314a30f8f8cSSatish Balay   b->ht_insert_ct = 0;
2315a30f8f8cSSatish Balay 
2316a30f8f8cSSatish Balay   *A = B;
2317a30f8f8cSSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2318a30f8f8cSSatish Balay   if (flg) {
2319a30f8f8cSSatish Balay     double fact = 1.39;
2320a30f8f8cSSatish Balay     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2321a30f8f8cSSatish Balay     ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2322a30f8f8cSSatish Balay     if (fact <= 1.0) fact = 1.39;
2323a30f8f8cSSatish Balay     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2324a30f8f8cSSatish Balay     PLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact);
2325a30f8f8cSSatish Balay   }
2326a30f8f8cSSatish Balay   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2327a30f8f8cSSatish Balay                                      "MatStoreValues_MPISBAIJ",
2328a30f8f8cSSatish Balay                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
2329a30f8f8cSSatish Balay   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2330a30f8f8cSSatish Balay                                      "MatRetrieveValues_MPISBAIJ",
2331a30f8f8cSSatish Balay                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
2332a30f8f8cSSatish Balay   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2333a30f8f8cSSatish Balay                                      "MatGetDiagonalBlock_MPISBAIJ",
2334a30f8f8cSSatish Balay                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
2335a30f8f8cSSatish Balay   PetscFunctionReturn(0);
2336a30f8f8cSSatish Balay }
2337a30f8f8cSSatish Balay 
2338a30f8f8cSSatish Balay 
2339a30f8f8cSSatish Balay #undef __FUNC__
2340a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatDuplicate_MPISBAIJ"
2341a30f8f8cSSatish Balay static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2342a30f8f8cSSatish Balay {
2343a30f8f8cSSatish Balay   Mat         mat;
2344a30f8f8cSSatish Balay   Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2345a30f8f8cSSatish Balay   int         ierr,len=0;
2346a30f8f8cSSatish Balay   PetscTruth  flg;
2347a30f8f8cSSatish Balay 
2348a30f8f8cSSatish Balay   PetscFunctionBegin;
2349a30f8f8cSSatish Balay   *newmat       = 0;
2350a30f8f8cSSatish Balay   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPISBAIJ,"Mat",matin->comm,MatDestroy,MatView);
2351a30f8f8cSSatish Balay   PLogObjectCreate(mat);
2352a30f8f8cSSatish Balay   mat->data         = (void*)(a = PetscNew(Mat_MPISBAIJ));CHKPTRQ(a);
2353a30f8f8cSSatish Balay   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2354a30f8f8cSSatish Balay   mat->ops->destroy = MatDestroy_MPISBAIJ;
2355a30f8f8cSSatish Balay   mat->ops->view    = MatView_MPISBAIJ;
2356a30f8f8cSSatish Balay   mat->factor       = matin->factor;
2357a30f8f8cSSatish Balay   mat->assembled    = PETSC_TRUE;
2358a30f8f8cSSatish Balay   mat->insertmode   = NOT_SET_VALUES;
2359a30f8f8cSSatish Balay 
2360a30f8f8cSSatish Balay   a->m = mat->m   = oldmat->m;
2361a30f8f8cSSatish Balay   a->n = mat->n   = oldmat->n;
2362a30f8f8cSSatish Balay   a->M = mat->M   = oldmat->M;
2363a30f8f8cSSatish Balay   a->N = mat->N   = oldmat->N;
2364a30f8f8cSSatish Balay 
2365a30f8f8cSSatish Balay   a->bs  = oldmat->bs;
2366a30f8f8cSSatish Balay   a->bs2 = oldmat->bs2;
2367a30f8f8cSSatish Balay   a->mbs = oldmat->mbs;
2368a30f8f8cSSatish Balay   a->nbs = oldmat->nbs;
2369a30f8f8cSSatish Balay   a->Mbs = oldmat->Mbs;
2370a30f8f8cSSatish Balay   a->Nbs = oldmat->Nbs;
2371a30f8f8cSSatish Balay 
2372a30f8f8cSSatish Balay   a->rstart       = oldmat->rstart;
2373a30f8f8cSSatish Balay   a->rend         = oldmat->rend;
2374a30f8f8cSSatish Balay   a->cstart       = oldmat->cstart;
2375a30f8f8cSSatish Balay   a->cend         = oldmat->cend;
2376a30f8f8cSSatish Balay   a->size         = oldmat->size;
2377a30f8f8cSSatish Balay   a->rank         = oldmat->rank;
2378a30f8f8cSSatish Balay   a->donotstash   = oldmat->donotstash;
2379a30f8f8cSSatish Balay   a->roworiented  = oldmat->roworiented;
2380a30f8f8cSSatish Balay   a->rowindices   = 0;
2381a30f8f8cSSatish Balay   a->rowvalues    = 0;
2382a30f8f8cSSatish Balay   a->getrowactive = PETSC_FALSE;
2383a30f8f8cSSatish Balay   a->barray       = 0;
2384a30f8f8cSSatish Balay   a->rstart_bs    = oldmat->rstart_bs;
2385a30f8f8cSSatish Balay   a->rend_bs      = oldmat->rend_bs;
2386a30f8f8cSSatish Balay   a->cstart_bs    = oldmat->cstart_bs;
2387a30f8f8cSSatish Balay   a->cend_bs      = oldmat->cend_bs;
2388a30f8f8cSSatish Balay 
2389a30f8f8cSSatish Balay   /* hash table stuff */
2390a30f8f8cSSatish Balay   a->ht           = 0;
2391a30f8f8cSSatish Balay   a->hd           = 0;
2392a30f8f8cSSatish Balay   a->ht_size      = 0;
2393a30f8f8cSSatish Balay   a->ht_flag      = oldmat->ht_flag;
2394a30f8f8cSSatish Balay   a->ht_fact      = oldmat->ht_fact;
2395a30f8f8cSSatish Balay   a->ht_total_ct  = 0;
2396a30f8f8cSSatish Balay   a->ht_insert_ct = 0;
2397a30f8f8cSSatish Balay 
2398a30f8f8cSSatish Balay 
2399a30f8f8cSSatish Balay   a->rowners = (int*)PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
2400a30f8f8cSSatish Balay   PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
2401a30f8f8cSSatish Balay   a->cowners    = a->rowners + a->size + 2;
2402a30f8f8cSSatish Balay   a->rowners_bs = a->cowners + a->size + 2;
2403a30f8f8cSSatish Balay   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2404a30f8f8cSSatish Balay   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2405a30f8f8cSSatish Balay   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2406a30f8f8cSSatish Balay   if (oldmat->colmap) {
2407a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE)
2408a30f8f8cSSatish Balay   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2409a30f8f8cSSatish Balay #else
2410a30f8f8cSSatish Balay     a->colmap = (int*)PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
2411a30f8f8cSSatish Balay     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2412a30f8f8cSSatish Balay     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2413a30f8f8cSSatish Balay #endif
2414a30f8f8cSSatish Balay   } else a->colmap = 0;
2415a30f8f8cSSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2416a30f8f8cSSatish Balay     a->garray = (int*)PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray);
2417a30f8f8cSSatish Balay     PLogObjectMemory(mat,len*sizeof(int));
2418a30f8f8cSSatish Balay     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2419a30f8f8cSSatish Balay   } else a->garray = 0;
2420a30f8f8cSSatish Balay 
2421a30f8f8cSSatish Balay   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2422a30f8f8cSSatish Balay   PLogObjectParent(mat,a->lvec);
2423a30f8f8cSSatish Balay   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2424a30f8f8cSSatish Balay 
2425a30f8f8cSSatish Balay   PLogObjectParent(mat,a->Mvctx);
2426a30f8f8cSSatish Balay   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2427a30f8f8cSSatish Balay   PLogObjectParent(mat,a->A);
2428a30f8f8cSSatish Balay   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2429a30f8f8cSSatish Balay   PLogObjectParent(mat,a->B);
2430a30f8f8cSSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
2431a30f8f8cSSatish Balay   ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2432a30f8f8cSSatish Balay   if (flg) {
2433a30f8f8cSSatish Balay     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
2434a30f8f8cSSatish Balay   }
2435a30f8f8cSSatish Balay   *newmat = mat;
2436a30f8f8cSSatish Balay   PetscFunctionReturn(0);
2437a30f8f8cSSatish Balay }
2438a30f8f8cSSatish Balay 
2439a30f8f8cSSatish Balay #include "petscsys.h"
2440a30f8f8cSSatish Balay 
2441a30f8f8cSSatish Balay #undef __FUNC__
2442a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPISBAIJ"
2443a30f8f8cSSatish Balay int MatLoad_MPISBAIJ(Viewer viewer,MatType type,Mat *newmat)
2444a30f8f8cSSatish Balay {
2445a30f8f8cSSatish Balay   Mat          A;
2446a30f8f8cSSatish Balay   int          i,nz,ierr,j,rstart,rend,fd;
2447a30f8f8cSSatish Balay   Scalar       *vals,*buf;
2448a30f8f8cSSatish Balay   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2449a30f8f8cSSatish Balay   MPI_Status   status;
2450a30f8f8cSSatish Balay   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2451a30f8f8cSSatish Balay   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2452a30f8f8cSSatish Balay   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2453a30f8f8cSSatish Balay   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2454a30f8f8cSSatish Balay   int          dcount,kmax,k,nzcount,tmp;
2455a30f8f8cSSatish Balay 
2456a30f8f8cSSatish Balay   PetscFunctionBegin;
2457a30f8f8cSSatish Balay   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2458a30f8f8cSSatish Balay 
2459a30f8f8cSSatish Balay   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2460a30f8f8cSSatish Balay   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2461a30f8f8cSSatish Balay   if (!rank) {
2462a30f8f8cSSatish Balay     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2463a30f8f8cSSatish Balay     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2464a30f8f8cSSatish Balay     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
2465a30f8f8cSSatish Balay     if (header[3] < 0) {
2466a30f8f8cSSatish Balay       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPISBAIJ");
2467a30f8f8cSSatish Balay     }
2468a30f8f8cSSatish Balay   }
2469a30f8f8cSSatish Balay 
2470a30f8f8cSSatish Balay   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2471a30f8f8cSSatish Balay   M = header[1]; N = header[2];
2472a30f8f8cSSatish Balay 
2473a30f8f8cSSatish Balay   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
2474a30f8f8cSSatish Balay 
2475a30f8f8cSSatish Balay   /*
2476a30f8f8cSSatish Balay      This code adds extra rows to make sure the number of rows is
2477a30f8f8cSSatish Balay      divisible by the blocksize
2478a30f8f8cSSatish Balay   */
2479a30f8f8cSSatish Balay   Mbs        = M/bs;
2480a30f8f8cSSatish Balay   extra_rows = bs - M + bs*(Mbs);
2481a30f8f8cSSatish Balay   if (extra_rows == bs) extra_rows = 0;
2482a30f8f8cSSatish Balay   else                  Mbs++;
2483a30f8f8cSSatish Balay   if (extra_rows &&!rank) {
2484a30f8f8cSSatish Balay     PLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n");
2485a30f8f8cSSatish Balay   }
2486a30f8f8cSSatish Balay 
2487a30f8f8cSSatish Balay   /* determine ownership of all rows */
2488a30f8f8cSSatish Balay   mbs = Mbs/size + ((Mbs % size) > rank);
2489a30f8f8cSSatish Balay   m   = mbs * bs;
2490a30f8f8cSSatish Balay   rowners = (int*)PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners);
2491a30f8f8cSSatish Balay   browners = rowners + size + 1;
2492a30f8f8cSSatish Balay   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2493a30f8f8cSSatish Balay   rowners[0] = 0;
2494a30f8f8cSSatish Balay   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2495a30f8f8cSSatish Balay   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2496a30f8f8cSSatish Balay   rstart = rowners[rank];
2497a30f8f8cSSatish Balay   rend   = rowners[rank+1];
2498a30f8f8cSSatish Balay 
2499a30f8f8cSSatish Balay   /* distribute row lengths to all processors */
2500a30f8f8cSSatish Balay   locrowlens = (int*)PetscMalloc((rend-rstart)*bs*sizeof(int));CHKPTRQ(locrowlens);
2501a30f8f8cSSatish Balay   if (!rank) {
2502a30f8f8cSSatish Balay     rowlengths = (int*)PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
2503a30f8f8cSSatish Balay     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2504a30f8f8cSSatish Balay     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2505a30f8f8cSSatish Balay     sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts);
2506a30f8f8cSSatish Balay     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2507a30f8f8cSSatish Balay     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2508a30f8f8cSSatish Balay     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2509a30f8f8cSSatish Balay   } else {
2510a30f8f8cSSatish Balay     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2511a30f8f8cSSatish Balay   }
2512a30f8f8cSSatish Balay 
2513a30f8f8cSSatish Balay   if (!rank) {
2514a30f8f8cSSatish Balay     /* calculate the number of nonzeros on each processor */
2515a30f8f8cSSatish Balay     procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz);
2516a30f8f8cSSatish Balay     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2517a30f8f8cSSatish Balay     for (i=0; i<size; i++) {
2518a30f8f8cSSatish Balay       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2519a30f8f8cSSatish Balay         procsnz[i] += rowlengths[j];
2520a30f8f8cSSatish Balay       }
2521a30f8f8cSSatish Balay     }
2522a30f8f8cSSatish Balay     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2523a30f8f8cSSatish Balay 
2524a30f8f8cSSatish Balay     /* determine max buffer needed and allocate it */
2525a30f8f8cSSatish Balay     maxnz = 0;
2526a30f8f8cSSatish Balay     for (i=0; i<size; i++) {
2527a30f8f8cSSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
2528a30f8f8cSSatish Balay     }
2529a30f8f8cSSatish Balay     cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols);
2530a30f8f8cSSatish Balay 
2531a30f8f8cSSatish Balay     /* read in my part of the matrix column indices  */
2532a30f8f8cSSatish Balay     nz = procsnz[0];
2533a30f8f8cSSatish Balay     ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf);
2534a30f8f8cSSatish Balay     mycols = ibuf;
2535a30f8f8cSSatish Balay     if (size == 1)  nz -= extra_rows;
2536a30f8f8cSSatish Balay     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2537a30f8f8cSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2538a30f8f8cSSatish Balay 
2539a30f8f8cSSatish Balay     /* read in every ones (except the last) and ship off */
2540a30f8f8cSSatish Balay     for (i=1; i<size-1; i++) {
2541a30f8f8cSSatish Balay       nz   = procsnz[i];
2542a30f8f8cSSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2543a30f8f8cSSatish Balay       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2544a30f8f8cSSatish Balay     }
2545a30f8f8cSSatish Balay     /* read in the stuff for the last proc */
2546a30f8f8cSSatish Balay     if (size != 1) {
2547a30f8f8cSSatish Balay       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2548a30f8f8cSSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2549a30f8f8cSSatish Balay       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2550a30f8f8cSSatish Balay       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2551a30f8f8cSSatish Balay     }
2552a30f8f8cSSatish Balay     ierr = PetscFree(cols);CHKERRQ(ierr);
2553a30f8f8cSSatish Balay   } else {
2554a30f8f8cSSatish Balay     /* determine buffer space needed for message */
2555a30f8f8cSSatish Balay     nz = 0;
2556a30f8f8cSSatish Balay     for (i=0; i<m; i++) {
2557a30f8f8cSSatish Balay       nz += locrowlens[i];
2558a30f8f8cSSatish Balay     }
2559a30f8f8cSSatish Balay     ibuf   = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf);
2560a30f8f8cSSatish Balay     mycols = ibuf;
2561a30f8f8cSSatish Balay     /* receive message of column indices*/
2562a30f8f8cSSatish Balay     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2563a30f8f8cSSatish Balay     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2564a30f8f8cSSatish Balay     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2565a30f8f8cSSatish Balay   }
2566a30f8f8cSSatish Balay 
2567a30f8f8cSSatish Balay   /* loop over local rows, determining number of off diagonal entries */
2568a30f8f8cSSatish Balay   dlens  = (int*)PetscMalloc(2*(rend-rstart+1)*sizeof(int));CHKPTRQ(dlens);
2569a30f8f8cSSatish Balay   odlens = dlens + (rend-rstart);
2570a30f8f8cSSatish Balay   mask   = (int*)PetscMalloc(3*Mbs*sizeof(int));CHKPTRQ(mask);
2571a30f8f8cSSatish Balay   ierr   = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2572a30f8f8cSSatish Balay   masked1 = mask    + Mbs;
2573a30f8f8cSSatish Balay   masked2 = masked1 + Mbs;
2574a30f8f8cSSatish Balay   rowcount = 0; nzcount = 0;
2575a30f8f8cSSatish Balay   for (i=0; i<mbs; i++) {
2576a30f8f8cSSatish Balay     dcount  = 0;
2577a30f8f8cSSatish Balay     odcount = 0;
2578a30f8f8cSSatish Balay     for (j=0; j<bs; j++) {
2579a30f8f8cSSatish Balay       kmax = locrowlens[rowcount];
2580a30f8f8cSSatish Balay       for (k=0; k<kmax; k++) {
2581a30f8f8cSSatish Balay         tmp = mycols[nzcount++]/bs;
2582a30f8f8cSSatish Balay         if (!mask[tmp]) {
2583a30f8f8cSSatish Balay           mask[tmp] = 1;
2584a30f8f8cSSatish Balay           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2585a30f8f8cSSatish Balay           else masked1[dcount++] = tmp;
2586a30f8f8cSSatish Balay         }
2587a30f8f8cSSatish Balay       }
2588a30f8f8cSSatish Balay       rowcount++;
2589a30f8f8cSSatish Balay     }
2590a30f8f8cSSatish Balay 
2591a30f8f8cSSatish Balay     dlens[i]  = dcount;
2592a30f8f8cSSatish Balay     odlens[i] = odcount;
2593a30f8f8cSSatish Balay 
2594a30f8f8cSSatish Balay     /* zero out the mask elements we set */
2595a30f8f8cSSatish Balay     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2596a30f8f8cSSatish Balay     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2597a30f8f8cSSatish Balay   }
2598a30f8f8cSSatish Balay 
2599a30f8f8cSSatish Balay   /* create our matrix */
2600a30f8f8cSSatish Balay   ierr = MatCreateMPISBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr);
2601a30f8f8cSSatish Balay   A = *newmat;
2602a30f8f8cSSatish Balay   MatSetOption(A,MAT_COLUMNS_SORTED);
2603a30f8f8cSSatish Balay 
2604a30f8f8cSSatish Balay   if (!rank) {
2605a30f8f8cSSatish Balay     buf = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(buf);
2606a30f8f8cSSatish Balay     /* read in my part of the matrix numerical values  */
2607a30f8f8cSSatish Balay     nz = procsnz[0];
2608a30f8f8cSSatish Balay     vals = buf;
2609a30f8f8cSSatish Balay     mycols = ibuf;
2610a30f8f8cSSatish Balay     if (size == 1)  nz -= extra_rows;
2611a30f8f8cSSatish Balay     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2612a30f8f8cSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2613a30f8f8cSSatish Balay 
2614a30f8f8cSSatish Balay     /* insert into matrix */
2615a30f8f8cSSatish Balay     jj      = rstart*bs;
2616a30f8f8cSSatish Balay     for (i=0; i<m; i++) {
2617a30f8f8cSSatish Balay       ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2618a30f8f8cSSatish Balay       mycols += locrowlens[i];
2619a30f8f8cSSatish Balay       vals   += locrowlens[i];
2620a30f8f8cSSatish Balay       jj++;
2621a30f8f8cSSatish Balay     }
2622a30f8f8cSSatish Balay     /* read in other processors (except the last one) and ship out */
2623a30f8f8cSSatish Balay     for (i=1; i<size-1; i++) {
2624a30f8f8cSSatish Balay       nz   = procsnz[i];
2625a30f8f8cSSatish Balay       vals = buf;
2626a30f8f8cSSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2627a30f8f8cSSatish Balay       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2628a30f8f8cSSatish Balay     }
2629a30f8f8cSSatish Balay     /* the last proc */
2630a30f8f8cSSatish Balay     if (size != 1){
2631a30f8f8cSSatish Balay       nz   = procsnz[i] - extra_rows;
2632a30f8f8cSSatish Balay       vals = buf;
2633a30f8f8cSSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2634a30f8f8cSSatish Balay       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2635a30f8f8cSSatish Balay       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2636a30f8f8cSSatish Balay     }
2637a30f8f8cSSatish Balay     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2638a30f8f8cSSatish Balay   } else {
2639a30f8f8cSSatish Balay     /* receive numeric values */
2640a30f8f8cSSatish Balay     buf = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(buf);
2641a30f8f8cSSatish Balay 
2642a30f8f8cSSatish Balay     /* receive message of values*/
2643a30f8f8cSSatish Balay     vals   = buf;
2644a30f8f8cSSatish Balay     mycols = ibuf;
2645a30f8f8cSSatish Balay     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2646a30f8f8cSSatish Balay     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2647a30f8f8cSSatish Balay     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2648a30f8f8cSSatish Balay 
2649a30f8f8cSSatish Balay     /* insert into matrix */
2650a30f8f8cSSatish Balay     jj      = rstart*bs;
2651a30f8f8cSSatish Balay     for (i=0; i<m; i++) {
2652a30f8f8cSSatish Balay       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2653a30f8f8cSSatish Balay       mycols += locrowlens[i];
2654a30f8f8cSSatish Balay       vals   += locrowlens[i];
2655a30f8f8cSSatish Balay       jj++;
2656a30f8f8cSSatish Balay     }
2657a30f8f8cSSatish Balay   }
2658a30f8f8cSSatish Balay   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2659a30f8f8cSSatish Balay   ierr = PetscFree(buf);CHKERRQ(ierr);
2660a30f8f8cSSatish Balay   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2661a30f8f8cSSatish Balay   ierr = PetscFree(rowners);CHKERRQ(ierr);
2662a30f8f8cSSatish Balay   ierr = PetscFree(dlens);CHKERRQ(ierr);
2663a30f8f8cSSatish Balay   ierr = PetscFree(mask);CHKERRQ(ierr);
2664a30f8f8cSSatish Balay   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2665a30f8f8cSSatish Balay   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2666a30f8f8cSSatish Balay   PetscFunctionReturn(0);
2667a30f8f8cSSatish Balay }
2668a30f8f8cSSatish Balay 
2669a30f8f8cSSatish Balay #undef __FUNC__
2670a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatMPISBAIJSetHashTableFactor"
2671a30f8f8cSSatish Balay /*@
2672a30f8f8cSSatish Balay    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2673a30f8f8cSSatish Balay 
2674a30f8f8cSSatish Balay    Input Parameters:
2675a30f8f8cSSatish Balay .  mat  - the matrix
2676a30f8f8cSSatish Balay .  fact - factor
2677a30f8f8cSSatish Balay 
2678a30f8f8cSSatish Balay    Collective on Mat
2679a30f8f8cSSatish Balay 
2680a30f8f8cSSatish Balay    Level: advanced
2681a30f8f8cSSatish Balay 
2682a30f8f8cSSatish Balay   Notes:
2683a30f8f8cSSatish Balay    This can also be set by the command line option: -mat_use_hash_table fact
2684a30f8f8cSSatish Balay 
2685a30f8f8cSSatish Balay .keywords: matrix, hashtable, factor, HT
2686a30f8f8cSSatish Balay 
2687a30f8f8cSSatish Balay .seealso: MatSetOption()
2688a30f8f8cSSatish Balay @*/
2689a30f8f8cSSatish Balay int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2690a30f8f8cSSatish Balay {
2691a30f8f8cSSatish Balay   Mat_MPIBAIJ *baij;
2692a30f8f8cSSatish Balay 
2693a30f8f8cSSatish Balay   PetscFunctionBegin;
2694a30f8f8cSSatish Balay   PetscValidHeaderSpecific(mat,MAT_COOKIE);
2695a30f8f8cSSatish Balay   if (mat->type != MATMPIBAIJ) {
2696a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only.");
2697a30f8f8cSSatish Balay   }
2698a30f8f8cSSatish Balay   baij = (Mat_MPIBAIJ*)mat->data;
2699a30f8f8cSSatish Balay   baij->ht_fact = fact;
2700a30f8f8cSSatish Balay   PetscFunctionReturn(0);
2701a30f8f8cSSatish Balay }
2702