xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision c8117e2204c1a2cc3d94ebcf38067c193840db20)
1*c8117e22SSatish Balay /*$Id: mpisbaij.c,v 1.11 2000/08/03 15:03:00 balay Exp balay $*/
2a30f8f8cSSatish Balay 
3*c8117e22SSatish Balay #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4a30f8f8cSSatish Balay #include "src/vec/vecimpl.h"
5a30f8f8cSSatish Balay #include "mpisbaij.h"
6a30f8f8cSSatish Balay #include "src/mat/impls/sbaij/seq/sbaij.h"
7a30f8f8cSSatish Balay 
8a30f8f8cSSatish Balay extern int MatSetUpMultiply_MPISBAIJ(Mat);
9a30f8f8cSSatish Balay extern int DisAssemble_MPISBAIJ(Mat);
10a30f8f8cSSatish Balay extern int MatIncreaseOverlap_MPISBAIJ(Mat,int,IS *,int);
11a30f8f8cSSatish Balay extern int MatGetSubMatrices_MPISBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
12a30f8f8cSSatish Balay extern int MatGetValues_SeqSBAIJ(Mat,int,int *,int,int *,Scalar *);
13a30f8f8cSSatish Balay extern int MatSetValues_SeqSBAIJ(Mat,int,int *,int,int *,Scalar *,InsertMode);
14a30f8f8cSSatish Balay extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,int*,int,int*,Scalar*,InsertMode);
15a30f8f8cSSatish Balay extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,Scalar**);
16a30f8f8cSSatish Balay extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,Scalar**);
17a30f8f8cSSatish Balay extern int MatPrintHelp_SeqSBAIJ(Mat);
18a30f8f8cSSatish Balay extern int MatZeroRows_SeqSBAIJ(Mat,IS,Scalar*);
19a30f8f8cSSatish Balay 
20a30f8f8cSSatish Balay /*  UGLY, ugly, ugly
21a30f8f8cSSatish Balay    When MatScalar == Scalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
22a30f8f8cSSatish Balay    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
23a30f8f8cSSatish Balay    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
24a30f8f8cSSatish Balay    converts the entries into single precision and then calls ..._MatScalar() to put them
25a30f8f8cSSatish Balay    into the single precision data structures.
26a30f8f8cSSatish Balay */
27a30f8f8cSSatish Balay #if defined(PETSC_USE_MAT_SINGLE)
28a30f8f8cSSatish Balay extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
29a30f8f8cSSatish Balay extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
30a30f8f8cSSatish Balay extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
31a30f8f8cSSatish Balay extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
32a30f8f8cSSatish Balay extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
33a30f8f8cSSatish Balay #else
34a30f8f8cSSatish Balay #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
35a30f8f8cSSatish Balay #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
36a30f8f8cSSatish Balay #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
37a30f8f8cSSatish Balay #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
38a30f8f8cSSatish Balay #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
39a30f8f8cSSatish Balay #endif
40a30f8f8cSSatish Balay 
41a30f8f8cSSatish Balay EXTERN_C_BEGIN
42a30f8f8cSSatish Balay #undef __FUNC__
43a30f8f8cSSatish Balay #define __FUNC__ /*<a name="MatStoreValues_MPIBAIJ"></a>*/"MatStoreValues_MPIBAIJ"
44a30f8f8cSSatish Balay int MatStoreValues_MPISBAIJ(Mat mat)
45a30f8f8cSSatish Balay {
46a30f8f8cSSatish Balay   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
47a30f8f8cSSatish Balay   int         ierr;
48a30f8f8cSSatish Balay 
49a30f8f8cSSatish Balay   PetscFunctionBegin;
50a30f8f8cSSatish Balay   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
51a30f8f8cSSatish Balay   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
52a30f8f8cSSatish Balay   PetscFunctionReturn(0);
53a30f8f8cSSatish Balay }
54a30f8f8cSSatish Balay EXTERN_C_END
55a30f8f8cSSatish Balay 
56a30f8f8cSSatish Balay EXTERN_C_BEGIN
57a30f8f8cSSatish Balay #undef __FUNC__
58a30f8f8cSSatish Balay #define __FUNC__ /*<a name="MatRetrieveValues_MPIBAIJ"></a>*/"MatRetrieveValues_MPIBAIJ"
59a30f8f8cSSatish Balay int MatRetrieveValues_MPISBAIJ(Mat mat)
60a30f8f8cSSatish Balay {
61a30f8f8cSSatish Balay   Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data;
62a30f8f8cSSatish Balay   int         ierr;
63a30f8f8cSSatish Balay 
64a30f8f8cSSatish Balay   PetscFunctionBegin;
65a30f8f8cSSatish Balay   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
66a30f8f8cSSatish Balay   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
67a30f8f8cSSatish Balay   PetscFunctionReturn(0);
68a30f8f8cSSatish Balay }
69a30f8f8cSSatish Balay EXTERN_C_END
70a30f8f8cSSatish Balay 
71a30f8f8cSSatish Balay /*
72a30f8f8cSSatish Balay      Local utility routine that creates a mapping from the global column
73a30f8f8cSSatish Balay    number to the local number in the off-diagonal part of the local
74a30f8f8cSSatish Balay    storage of the matrix.  This is done in a non scable way since the
75a30f8f8cSSatish Balay    length of colmap equals the global matrix length.
76a30f8f8cSSatish Balay */
77a30f8f8cSSatish Balay #undef __FUNC__
78a30f8f8cSSatish Balay #define __FUNC__ /*<a name="CreateColmap_MPISBAIJ_Private"></a>*/"CreateColmap_MPISBAIJ_Private"
79a30f8f8cSSatish Balay static int CreateColmap_MPISBAIJ_Private(Mat mat)
80a30f8f8cSSatish Balay {
81a30f8f8cSSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
82a30f8f8cSSatish Balay   Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)baij->B->data;
83a30f8f8cSSatish Balay   int         nbs = B->nbs,i,bs=B->bs,ierr;
84a30f8f8cSSatish Balay 
85a30f8f8cSSatish Balay   PetscFunctionBegin;
86a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE)
87a30f8f8cSSatish Balay   ierr = PetscTableCreate(baij->nbs/5,&baij->colmap);CHKERRQ(ierr);
88a30f8f8cSSatish Balay   for (i=0; i<nbs; i++){
89a30f8f8cSSatish Balay     ierr = PetscTableAdd(baij->colmap,baij->garray[i]+1,i*bs+1);CHKERRQ(ierr);
90a30f8f8cSSatish Balay   }
91a30f8f8cSSatish Balay #else
92a30f8f8cSSatish Balay   baij->colmap = (int*)PetscMalloc((baij->Nbs+1)*sizeof(int));CHKPTRQ(baij->colmap);
93a30f8f8cSSatish Balay   PLogObjectMemory(mat,baij->Nbs*sizeof(int));
94a30f8f8cSSatish Balay   ierr = PetscMemzero(baij->colmap,baij->Nbs*sizeof(int));CHKERRQ(ierr);
95a30f8f8cSSatish Balay   for (i=0; i<nbs; i++) baij->colmap[baij->garray[i]] = i*bs+1;
96a30f8f8cSSatish Balay #endif
97a30f8f8cSSatish Balay   PetscFunctionReturn(0);
98a30f8f8cSSatish Balay }
99a30f8f8cSSatish Balay 
100a30f8f8cSSatish Balay #define CHUNKSIZE  10
101a30f8f8cSSatish Balay 
102a30f8f8cSSatish Balay #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
103a30f8f8cSSatish Balay { \
104a30f8f8cSSatish Balay  \
105a30f8f8cSSatish Balay     brow = row/bs;  \
106a30f8f8cSSatish Balay     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
107a30f8f8cSSatish Balay     rmax = aimax[brow]; nrow = ailen[brow]; \
108a30f8f8cSSatish Balay       bcol = col/bs; \
109a30f8f8cSSatish Balay       ridx = row % bs; cidx = col % bs; \
110a30f8f8cSSatish Balay       low = 0; high = nrow; \
111a30f8f8cSSatish Balay       while (high-low > 3) { \
112a30f8f8cSSatish Balay         t = (low+high)/2; \
113a30f8f8cSSatish Balay         if (rp[t] > bcol) high = t; \
114a30f8f8cSSatish Balay         else              low  = t; \
115a30f8f8cSSatish Balay       } \
116a30f8f8cSSatish Balay       for (_i=low; _i<high; _i++) { \
117a30f8f8cSSatish Balay         if (rp[_i] > bcol) break; \
118a30f8f8cSSatish Balay         if (rp[_i] == bcol) { \
119a30f8f8cSSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
120a30f8f8cSSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
121a30f8f8cSSatish Balay           else                    *bap  = value;  \
122a30f8f8cSSatish Balay           goto a_noinsert; \
123a30f8f8cSSatish Balay         } \
124a30f8f8cSSatish Balay       } \
125a30f8f8cSSatish Balay       if (a->nonew == 1) goto a_noinsert; \
126a30f8f8cSSatish Balay       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
127a30f8f8cSSatish Balay       if (nrow >= rmax) { \
128a30f8f8cSSatish Balay         /* there is no extra room in row, therefore enlarge */ \
129a30f8f8cSSatish Balay         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
130a30f8f8cSSatish Balay         MatScalar *new_a; \
131a30f8f8cSSatish Balay  \
132a30f8f8cSSatish Balay         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
133a30f8f8cSSatish Balay  \
134a30f8f8cSSatish Balay         /* malloc new storage space */ \
135a30f8f8cSSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
136a30f8f8cSSatish Balay         new_a   = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \
137a30f8f8cSSatish Balay         new_j   = (int*)(new_a + bs2*new_nz); \
138a30f8f8cSSatish Balay         new_i   = new_j + new_nz; \
139a30f8f8cSSatish Balay  \
140a30f8f8cSSatish Balay         /* copy over old data into new slots */ \
141a30f8f8cSSatish Balay         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
142a30f8f8cSSatish Balay         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
143a30f8f8cSSatish Balay         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
144a30f8f8cSSatish Balay         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
145a30f8f8cSSatish Balay         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
146a30f8f8cSSatish Balay         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
147a30f8f8cSSatish Balay         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(Scalar));CHKERRQ(ierr); \
148a30f8f8cSSatish Balay         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
149a30f8f8cSSatish Balay                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
150a30f8f8cSSatish Balay         /* free up old matrix storage */ \
151a30f8f8cSSatish Balay         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
152a30f8f8cSSatish Balay         if (!a->singlemalloc) { \
153a30f8f8cSSatish Balay           ierr = PetscFree(a->i);CHKERRQ(ierr); \
154a30f8f8cSSatish Balay           ierr = PetscFree(a->j);CHKERRQ(ierr);\
155a30f8f8cSSatish Balay         } \
156a30f8f8cSSatish Balay         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
157a30f8f8cSSatish Balay         a->singlemalloc = PETSC_TRUE; \
158a30f8f8cSSatish Balay  \
159a30f8f8cSSatish Balay         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
160a30f8f8cSSatish Balay         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
161a30f8f8cSSatish Balay         PLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
162a30f8f8cSSatish Balay         a->s_maxnz += bs2*CHUNKSIZE; \
163a30f8f8cSSatish Balay         a->reallocs++; \
164a30f8f8cSSatish Balay         a->s_nz++; \
165a30f8f8cSSatish Balay       } \
166a30f8f8cSSatish Balay       N = nrow++ - 1;  \
167a30f8f8cSSatish Balay       /* shift up all the later entries in this row */ \
168a30f8f8cSSatish Balay       for (ii=N; ii>=_i; ii--) { \
169a30f8f8cSSatish Balay         rp[ii+1] = rp[ii]; \
170a30f8f8cSSatish Balay         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
171a30f8f8cSSatish Balay       } \
172a30f8f8cSSatish Balay       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
173a30f8f8cSSatish Balay       rp[_i]                      = bcol;  \
174a30f8f8cSSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
175a30f8f8cSSatish Balay       a_noinsert:; \
176a30f8f8cSSatish Balay     ailen[brow] = nrow; \
177a30f8f8cSSatish Balay }
178a30f8f8cSSatish Balay #ifndef MatSetValues_SeqBAIJ_B_Private
179a30f8f8cSSatish Balay #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
180a30f8f8cSSatish Balay { \
181a30f8f8cSSatish Balay     brow = row/bs;  \
182a30f8f8cSSatish Balay     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
183a30f8f8cSSatish Balay     rmax = bimax[brow]; nrow = bilen[brow]; \
184a30f8f8cSSatish Balay       bcol = col/bs; \
185a30f8f8cSSatish Balay       ridx = row % bs; cidx = col % bs; \
186a30f8f8cSSatish Balay       low = 0; high = nrow; \
187a30f8f8cSSatish Balay       while (high-low > 3) { \
188a30f8f8cSSatish Balay         t = (low+high)/2; \
189a30f8f8cSSatish Balay         if (rp[t] > bcol) high = t; \
190a30f8f8cSSatish Balay         else              low  = t; \
191a30f8f8cSSatish Balay       } \
192a30f8f8cSSatish Balay       for (_i=low; _i<high; _i++) { \
193a30f8f8cSSatish Balay         if (rp[_i] > bcol) break; \
194a30f8f8cSSatish Balay         if (rp[_i] == bcol) { \
195a30f8f8cSSatish Balay           bap  = ap +  bs2*_i + bs*cidx + ridx; \
196a30f8f8cSSatish Balay           if (addv == ADD_VALUES) *bap += value;  \
197a30f8f8cSSatish Balay           else                    *bap  = value;  \
198a30f8f8cSSatish Balay           goto b_noinsert; \
199a30f8f8cSSatish Balay         } \
200a30f8f8cSSatish Balay       } \
201a30f8f8cSSatish Balay       if (b->nonew == 1) goto b_noinsert; \
202a30f8f8cSSatish Balay       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero into matrix"); \
203a30f8f8cSSatish Balay       if (nrow >= rmax) { \
204a30f8f8cSSatish Balay         /* there is no extra room in row, therefore enlarge */ \
205a30f8f8cSSatish Balay         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
206a30f8f8cSSatish Balay         MatScalar *new_a; \
207a30f8f8cSSatish Balay  \
208a30f8f8cSSatish Balay         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Inserting a new nonzero in the matrix"); \
209a30f8f8cSSatish Balay  \
210a30f8f8cSSatish Balay         /* malloc new storage space */ \
211a30f8f8cSSatish Balay         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
212a30f8f8cSSatish Balay         new_a   = (MatScalar*)PetscMalloc(len);CHKPTRQ(new_a); \
213a30f8f8cSSatish Balay         new_j   = (int*)(new_a + bs2*new_nz); \
214a30f8f8cSSatish Balay         new_i   = new_j + new_nz; \
215a30f8f8cSSatish Balay  \
216a30f8f8cSSatish Balay         /* copy over old data into new slots */ \
217a30f8f8cSSatish Balay         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
218a30f8f8cSSatish Balay         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
219a30f8f8cSSatish Balay         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
220a30f8f8cSSatish Balay         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
221a30f8f8cSSatish Balay         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
222a30f8f8cSSatish Balay         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
223a30f8f8cSSatish Balay         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
224a30f8f8cSSatish Balay         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
225a30f8f8cSSatish Balay                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
226a30f8f8cSSatish Balay         /* free up old matrix storage */ \
227a30f8f8cSSatish Balay         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
228a30f8f8cSSatish Balay         if (!b->singlemalloc) { \
229a30f8f8cSSatish Balay           ierr = PetscFree(b->i);CHKERRQ(ierr); \
230a30f8f8cSSatish Balay           ierr = PetscFree(b->j);CHKERRQ(ierr); \
231a30f8f8cSSatish Balay         } \
232a30f8f8cSSatish Balay         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
233a30f8f8cSSatish Balay         b->singlemalloc = PETSC_TRUE; \
234a30f8f8cSSatish Balay  \
235a30f8f8cSSatish Balay         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
236a30f8f8cSSatish Balay         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
237a30f8f8cSSatish Balay         PLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
238a30f8f8cSSatish Balay         b->maxnz += bs2*CHUNKSIZE; \
239a30f8f8cSSatish Balay         b->reallocs++; \
240a30f8f8cSSatish Balay         b->nz++; \
241a30f8f8cSSatish Balay       } \
242a30f8f8cSSatish Balay       N = nrow++ - 1;  \
243a30f8f8cSSatish Balay       /* shift up all the later entries in this row */ \
244a30f8f8cSSatish Balay       for (ii=N; ii>=_i; ii--) { \
245a30f8f8cSSatish Balay         rp[ii+1] = rp[ii]; \
246a30f8f8cSSatish Balay         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
247a30f8f8cSSatish Balay       } \
248a30f8f8cSSatish Balay       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
249a30f8f8cSSatish Balay       rp[_i]                      = bcol;  \
250a30f8f8cSSatish Balay       ap[bs2*_i + bs*cidx + ridx] = value;  \
251a30f8f8cSSatish Balay       b_noinsert:; \
252a30f8f8cSSatish Balay     bilen[brow] = nrow; \
253a30f8f8cSSatish Balay }
254a30f8f8cSSatish Balay #endif
255a30f8f8cSSatish Balay 
256a30f8f8cSSatish Balay #if defined(PETSC_USE_MAT_SINGLE)
257a30f8f8cSSatish Balay #undef __FUNC__
258a30f8f8cSSatish Balay #define __FUNC__ /*<a name="MatSetValues_MPISBAIJ"></a>*/"MatSetValues_MPISBAIJ"
259a30f8f8cSSatish Balay int MatSetValues_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
260a30f8f8cSSatish Balay {
261a30f8f8cSSatish Balay   Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data;
262a30f8f8cSSatish Balay   int         ierr,i,N = m*n;
263a30f8f8cSSatish Balay   MatScalar   *vsingle;
264a30f8f8cSSatish Balay 
265a30f8f8cSSatish Balay   PetscFunctionBegin;
266a30f8f8cSSatish Balay   if (N > b->setvalueslen) {
267a30f8f8cSSatish Balay     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
268a30f8f8cSSatish Balay     b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy);
269a30f8f8cSSatish Balay     b->setvalueslen  = N;
270a30f8f8cSSatish Balay   }
271a30f8f8cSSatish Balay   vsingle = b->setvaluescopy;
272a30f8f8cSSatish Balay 
273a30f8f8cSSatish Balay   for (i=0; i<N; i++) {
274a30f8f8cSSatish Balay     vsingle[i] = v[i];
275a30f8f8cSSatish Balay   }
276a30f8f8cSSatish Balay   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
277a30f8f8cSSatish Balay   PetscFunctionReturn(0);
278a30f8f8cSSatish Balay }
279a30f8f8cSSatish Balay 
280a30f8f8cSSatish Balay #undef __FUNC__
281a30f8f8cSSatish Balay #define __FUNC__ /*<a name="MatSetValuesBlocked_MPISBAIJ"></a>*/"MatSetValuesBlocked_MPISBAIJ"
282a30f8f8cSSatish Balay int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
283a30f8f8cSSatish Balay {
284a30f8f8cSSatish Balay   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
285a30f8f8cSSatish Balay   int         ierr,i,N = m*n*b->bs2;
286a30f8f8cSSatish Balay   MatScalar   *vsingle;
287a30f8f8cSSatish Balay 
288a30f8f8cSSatish Balay   PetscFunctionBegin;
289a30f8f8cSSatish Balay   if (N > b->setvalueslen) {
290a30f8f8cSSatish Balay     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
291a30f8f8cSSatish Balay     b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy);
292a30f8f8cSSatish Balay     b->setvalueslen  = N;
293a30f8f8cSSatish Balay   }
294a30f8f8cSSatish Balay   vsingle = b->setvaluescopy;
295a30f8f8cSSatish Balay   for (i=0; i<N; i++) {
296a30f8f8cSSatish Balay     vsingle[i] = v[i];
297a30f8f8cSSatish Balay   }
298a30f8f8cSSatish Balay   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
299a30f8f8cSSatish Balay   PetscFunctionReturn(0);
300a30f8f8cSSatish Balay }
301a30f8f8cSSatish Balay 
302a30f8f8cSSatish Balay #undef __FUNC__
303a30f8f8cSSatish Balay #define __FUNC__ /*<a name="MatSetValues_MPISBAIJ_HT"></a>*/"MatSetValues_MPISBAIJ_HT"
304a30f8f8cSSatish Balay int MatSetValues_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
305a30f8f8cSSatish Balay {
306a30f8f8cSSatish Balay   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
307a30f8f8cSSatish Balay   int         ierr,i,N = m*n;
308a30f8f8cSSatish Balay   MatScalar   *vsingle;
309a30f8f8cSSatish Balay 
310a30f8f8cSSatish Balay   PetscFunctionBegin;
311a30f8f8cSSatish Balay   if (N > b->setvalueslen) {
312a30f8f8cSSatish Balay     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
313a30f8f8cSSatish Balay     b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy);
314a30f8f8cSSatish Balay     b->setvalueslen  = N;
315a30f8f8cSSatish Balay   }
316a30f8f8cSSatish Balay   vsingle = b->setvaluescopy;
317a30f8f8cSSatish Balay   for (i=0; i<N; i++) {
318a30f8f8cSSatish Balay     vsingle[i] = v[i];
319a30f8f8cSSatish Balay   }
320a30f8f8cSSatish Balay   ierr = MatSetValues_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
321a30f8f8cSSatish Balay   PetscFunctionReturn(0);
322a30f8f8cSSatish Balay }
323a30f8f8cSSatish Balay 
324a30f8f8cSSatish Balay #undef __FUNC__
325a30f8f8cSSatish Balay #define __FUNC__ /*<a name="MatSetValuesBlocked_MPISBAIJ_HT"></a>*/"MatSetValuesBlocked_MPISBAIJ_HT"
326a30f8f8cSSatish Balay int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,Scalar *v,InsertMode addv)
327a30f8f8cSSatish Balay {
328a30f8f8cSSatish Balay   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
329a30f8f8cSSatish Balay   int         ierr,i,N = m*n*b->bs2;
330a30f8f8cSSatish Balay   MatScalar   *vsingle;
331a30f8f8cSSatish Balay 
332a30f8f8cSSatish Balay   PetscFunctionBegin;
333a30f8f8cSSatish Balay   if (N > b->setvalueslen) {
334a30f8f8cSSatish Balay     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
335a30f8f8cSSatish Balay     b->setvaluescopy = (MatScalar*)PetscMalloc(N*sizeof(MatScalar));CHKPTRQ(b->setvaluescopy);
336a30f8f8cSSatish Balay     b->setvalueslen  = N;
337a30f8f8cSSatish Balay   }
338a30f8f8cSSatish Balay   vsingle = b->setvaluescopy;
339a30f8f8cSSatish Balay   for (i=0; i<N; i++) {
340a30f8f8cSSatish Balay     vsingle[i] = v[i];
341a30f8f8cSSatish Balay   }
342a30f8f8cSSatish Balay   ierr = MatSetValuesBlocked_MPIBAIJ_HT_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
343a30f8f8cSSatish Balay   PetscFunctionReturn(0);
344a30f8f8cSSatish Balay }
345a30f8f8cSSatish Balay #endif
346a30f8f8cSSatish Balay 
347a30f8f8cSSatish Balay /* Only add/insert a(i,j) with i<=j (blocks).
348a30f8f8cSSatish Balay    Any a(i,j) with i>j input by user is ingored.
349a30f8f8cSSatish Balay */
350a30f8f8cSSatish Balay #undef __FUNC__
351a30f8f8cSSatish Balay #define __FUNC__ /*<a name="MatSetValues_MPIBAIJ"></a>*/"MatSetValues_MPIBAIJ"
352a30f8f8cSSatish Balay int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
353a30f8f8cSSatish Balay {
354a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
355a30f8f8cSSatish Balay   MatScalar   value;
356a30f8f8cSSatish Balay   int         ierr,i,j,row,col;
357a30f8f8cSSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
358a30f8f8cSSatish Balay   int         rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
359a30f8f8cSSatish Balay   int         cend_orig=baij->cend_bs,bs=baij->bs;
360a30f8f8cSSatish Balay 
361a30f8f8cSSatish Balay   /* Some Variables required in the macro */
362a30f8f8cSSatish Balay   Mat         A = baij->A;
363a30f8f8cSSatish Balay   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data;
364a30f8f8cSSatish Balay   int         *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
365a30f8f8cSSatish Balay   MatScalar   *aa=a->a;
366a30f8f8cSSatish Balay 
367a30f8f8cSSatish Balay   Mat         B = baij->B;
368a30f8f8cSSatish Balay   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)(B)->data;
369a30f8f8cSSatish Balay   int         *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
370a30f8f8cSSatish Balay   MatScalar   *ba=b->a;
371a30f8f8cSSatish Balay 
372a30f8f8cSSatish Balay   int         *rp,ii,nrow,_i,rmax,N,brow,bcol;
373a30f8f8cSSatish Balay   int         low,high,t,ridx,cidx,bs2=a->bs2;
374a30f8f8cSSatish Balay   MatScalar   *ap,*bap;
375a30f8f8cSSatish Balay 
376a30f8f8cSSatish Balay   /* for stash */
377f65c83cfSHong Zhang   int         n_loc, *in_loc=0;
378f65c83cfSHong Zhang   MatScalar   *v_loc=0;
379a30f8f8cSSatish Balay 
380a30f8f8cSSatish Balay   PetscFunctionBegin;
381a30f8f8cSSatish Balay 
382a30f8f8cSSatish Balay   if(!baij->donotstash){
383a30f8f8cSSatish Balay     in_loc = (int*)PetscMalloc(n*sizeof(int)); CHKPTRQ(in_loc);
384a30f8f8cSSatish Balay     v_loc  = (MatScalar*)PetscMalloc(n*sizeof(MatScalar)); CHKPTRQ(v_loc);
385a30f8f8cSSatish Balay   }
386a30f8f8cSSatish Balay 
387a30f8f8cSSatish Balay   for (i=0; i<m; i++) {
388a30f8f8cSSatish Balay     if (im[i] < 0) continue;
389a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
390a30f8f8cSSatish Balay     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
391a30f8f8cSSatish Balay #endif
392a30f8f8cSSatish Balay     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
393a30f8f8cSSatish Balay       row = im[i] - rstart_orig;              /* local row index */
394a30f8f8cSSatish Balay       for (j=0; j<n; j++) {
395f65c83cfSHong Zhang         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
396a30f8f8cSSatish Balay         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
397a30f8f8cSSatish Balay           col = in[j] - cstart_orig;          /* local col index */
398a30f8f8cSSatish Balay           brow = row/bs; bcol = col/bs;
399a30f8f8cSSatish Balay           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
400a30f8f8cSSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
401a30f8f8cSSatish Balay           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
402a30f8f8cSSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
403a30f8f8cSSatish Balay         } else if (in[j] < 0) continue;
404a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
405a30f8f8cSSatish Balay         else if (in[j] >= baij->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Col too large");}
406a30f8f8cSSatish Balay #endif
407a30f8f8cSSatish Balay         else {  /* off-diag entry (B) */
408a30f8f8cSSatish Balay           if (mat->was_assembled) {
409a30f8f8cSSatish Balay             if (!baij->colmap) {
410a30f8f8cSSatish Balay               ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr);
411a30f8f8cSSatish Balay             }
412a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE)
413a30f8f8cSSatish Balay             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
414a30f8f8cSSatish Balay             col  = col - 1 + in[j]%bs;
415a30f8f8cSSatish Balay #else
416a30f8f8cSSatish Balay             col = baij->colmap[in[j]/bs] - 1 + in[j]%bs;
417a30f8f8cSSatish Balay #endif
418a30f8f8cSSatish Balay             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
419a30f8f8cSSatish Balay               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
420a30f8f8cSSatish Balay               col =  in[j];
421a30f8f8cSSatish Balay               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
422a30f8f8cSSatish Balay               B = baij->B;
423a30f8f8cSSatish Balay               b = (Mat_SeqBAIJ*)(B)->data;
424a30f8f8cSSatish Balay               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
425a30f8f8cSSatish Balay               ba=b->a;
426a30f8f8cSSatish Balay             }
427a30f8f8cSSatish Balay           } else col = in[j];
428a30f8f8cSSatish Balay           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
429a30f8f8cSSatish Balay           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
430a30f8f8cSSatish Balay           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
431a30f8f8cSSatish Balay         }
432a30f8f8cSSatish Balay       }
433a30f8f8cSSatish Balay     } else {  /* off processor entry */
434a30f8f8cSSatish Balay       if (!baij->donotstash) {
435a30f8f8cSSatish Balay         n_loc = 0;
436a30f8f8cSSatish Balay         for (j=0; j<n; j++){
437f65c83cfSHong Zhang           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
438a30f8f8cSSatish Balay           in_loc[n_loc] = in[j];
439a30f8f8cSSatish Balay           if (roworiented) {
440a30f8f8cSSatish Balay             v_loc[n_loc] = v[i*n+j];
441a30f8f8cSSatish Balay           } else {
442a30f8f8cSSatish Balay             v_loc[n_loc] = v[j*m+i];
443a30f8f8cSSatish Balay           }
444a30f8f8cSSatish Balay           n_loc++;
445a30f8f8cSSatish Balay         }
446a30f8f8cSSatish Balay         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
447a30f8f8cSSatish Balay       }
448a30f8f8cSSatish Balay     }
449a30f8f8cSSatish Balay   }
450a30f8f8cSSatish Balay 
451a30f8f8cSSatish Balay   if(!baij->donotstash){
452a30f8f8cSSatish Balay     ierr = PetscFree(in_loc);CHKERRQ(ierr);
453a30f8f8cSSatish Balay     ierr = PetscFree(v_loc);CHKERRQ(ierr);
454a30f8f8cSSatish Balay   }
455a30f8f8cSSatish Balay   PetscFunctionReturn(0);
456a30f8f8cSSatish Balay }
457a30f8f8cSSatish Balay 
458a30f8f8cSSatish Balay #undef __FUNC__
459a30f8f8cSSatish Balay #define __FUNC__ /*<a name="MatSetValuesBlocked_MPISBAIJ"></a>*/"MatSetValuesBlocked_MPISBAIJ"
460a30f8f8cSSatish Balay int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
461a30f8f8cSSatish Balay {
462a30f8f8cSSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
463a30f8f8cSSatish Balay   MatScalar   *value,*barray=baij->barray;
464a30f8f8cSSatish Balay   int         ierr,i,j,ii,jj,row,col;
465a30f8f8cSSatish Balay   int         roworiented = baij->roworiented,rstart=baij->rstart ;
466a30f8f8cSSatish Balay   int         rend=baij->rend,cstart=baij->cstart,stepval;
467a30f8f8cSSatish Balay   int         cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
468a30f8f8cSSatish Balay 
469a30f8f8cSSatish Balay   PetscFunctionBegin;
470a30f8f8cSSatish Balay   if(!barray) {
471a30f8f8cSSatish Balay     baij->barray = barray = (MatScalar*)PetscMalloc(bs2*sizeof(MatScalar));CHKPTRQ(barray);
472a30f8f8cSSatish Balay   }
473a30f8f8cSSatish Balay 
474a30f8f8cSSatish Balay   if (roworiented) {
475a30f8f8cSSatish Balay     stepval = (n-1)*bs;
476a30f8f8cSSatish Balay   } else {
477a30f8f8cSSatish Balay     stepval = (m-1)*bs;
478a30f8f8cSSatish Balay   }
479a30f8f8cSSatish Balay   for (i=0; i<m; i++) {
480a30f8f8cSSatish Balay     if (im[i] < 0) continue;
481a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
482a30f8f8cSSatish Balay     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large, row %d max %d",im[i],baij->Mbs);
483a30f8f8cSSatish Balay #endif
484a30f8f8cSSatish Balay     if (im[i] >= rstart && im[i] < rend) {
485a30f8f8cSSatish Balay       row = im[i] - rstart;
486a30f8f8cSSatish Balay       for (j=0; j<n; j++) {
487a30f8f8cSSatish Balay         /* If NumCol = 1 then a copy is not required */
488a30f8f8cSSatish Balay         if ((roworiented) && (n == 1)) {
489a30f8f8cSSatish Balay           barray = v + i*bs2;
490a30f8f8cSSatish Balay         } else if((!roworiented) && (m == 1)) {
491a30f8f8cSSatish Balay           barray = v + j*bs2;
492a30f8f8cSSatish Balay         } else { /* Here a copy is required */
493a30f8f8cSSatish Balay           if (roworiented) {
494a30f8f8cSSatish Balay             value = v + i*(stepval+bs)*bs + j*bs;
495a30f8f8cSSatish Balay           } else {
496a30f8f8cSSatish Balay             value = v + j*(stepval+bs)*bs + i*bs;
497a30f8f8cSSatish Balay           }
498a30f8f8cSSatish Balay           for (ii=0; ii<bs; ii++,value+=stepval) {
499a30f8f8cSSatish Balay             for (jj=0; jj<bs; jj++) {
500a30f8f8cSSatish Balay               *barray++  = *value++;
501a30f8f8cSSatish Balay             }
502a30f8f8cSSatish Balay           }
503a30f8f8cSSatish Balay           barray -=bs2;
504a30f8f8cSSatish Balay         }
505a30f8f8cSSatish Balay 
506a30f8f8cSSatish Balay         if (in[j] >= cstart && in[j] < cend){
507a30f8f8cSSatish Balay           col  = in[j] - cstart;
508a30f8f8cSSatish Balay           ierr = MatSetValuesBlocked_SeqSBAIJ_MatScalar(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
509a30f8f8cSSatish Balay         }
510a30f8f8cSSatish Balay         else if (in[j] < 0) continue;
511a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
512a30f8f8cSSatish Balay         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large, col %d max %d",in[j],baij->Nbs);}
513a30f8f8cSSatish Balay #endif
514a30f8f8cSSatish Balay         else {
515a30f8f8cSSatish Balay           if (mat->was_assembled) {
516a30f8f8cSSatish Balay             if (!baij->colmap) {
517a30f8f8cSSatish Balay               ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr);
518a30f8f8cSSatish Balay             }
519a30f8f8cSSatish Balay 
520a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
521a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE)
522a30f8f8cSSatish Balay             { int data;
523a30f8f8cSSatish Balay               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
524a30f8f8cSSatish Balay               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");
525a30f8f8cSSatish Balay             }
526a30f8f8cSSatish Balay #else
527a30f8f8cSSatish Balay             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,0,"Incorrect colmap");
528a30f8f8cSSatish Balay #endif
529a30f8f8cSSatish Balay #endif
530a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE)
531a30f8f8cSSatish Balay 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
532a30f8f8cSSatish Balay             col  = (col - 1)/bs;
533a30f8f8cSSatish Balay #else
534a30f8f8cSSatish Balay             col = (baij->colmap[in[j]] - 1)/bs;
535a30f8f8cSSatish Balay #endif
536a30f8f8cSSatish Balay             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
537c8407628SSatish Balay               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
538a30f8f8cSSatish Balay               col =  in[j];
539a30f8f8cSSatish Balay             }
540a30f8f8cSSatish Balay           }
541a30f8f8cSSatish Balay           else col = in[j];
542a30f8f8cSSatish Balay           ierr = MatSetValuesBlocked_SeqSBAIJ_MatScalar(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
543a30f8f8cSSatish Balay         }
544a30f8f8cSSatish Balay       }
545a30f8f8cSSatish Balay     } else {
546a30f8f8cSSatish Balay       if (!baij->donotstash) {
547a30f8f8cSSatish Balay         if (roworiented) {
548a30f8f8cSSatish Balay           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
549a30f8f8cSSatish Balay         } else {
550a30f8f8cSSatish Balay           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
551a30f8f8cSSatish Balay         }
552a30f8f8cSSatish Balay       }
553a30f8f8cSSatish Balay     }
554a30f8f8cSSatish Balay   }
555a30f8f8cSSatish Balay   PetscFunctionReturn(0);
556a30f8f8cSSatish Balay }
557a30f8f8cSSatish Balay 
558a30f8f8cSSatish Balay #define HASH_KEY 0.6180339887
559a30f8f8cSSatish Balay #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
560a30f8f8cSSatish Balay /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
561a30f8f8cSSatish Balay /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
562a30f8f8cSSatish Balay #undef __FUNC__
563a30f8f8cSSatish Balay #define __FUNC__ /*<a name="MatSetValues_MPISBAIJ_HT_MatScalar"></a>*/"MatSetValues_MPISBAIJ_HT_MatScalar"
564a30f8f8cSSatish Balay int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
565a30f8f8cSSatish Balay {
566a30f8f8cSSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
567a30f8f8cSSatish Balay   int         ierr,i,j,row,col;
568a30f8f8cSSatish Balay   int         roworiented = baij->roworiented,rstart_orig=baij->rstart_bs ;
569a30f8f8cSSatish Balay   int         rend_orig=baij->rend_bs,Nbs=baij->Nbs;
570a30f8f8cSSatish Balay   int         h1,key,size=baij->ht_size,bs=baij->bs,*HT=baij->ht,idx;
571a30f8f8cSSatish Balay   PetscReal   tmp;
572a30f8f8cSSatish Balay   MatScalar   **HD = baij->hd,value;
573a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
574a30f8f8cSSatish Balay   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
575a30f8f8cSSatish Balay #endif
576a30f8f8cSSatish Balay 
577a30f8f8cSSatish Balay   PetscFunctionBegin;
578a30f8f8cSSatish Balay 
579a30f8f8cSSatish Balay   for (i=0; i<m; i++) {
580a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
581a30f8f8cSSatish Balay     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
582a30f8f8cSSatish Balay     if (im[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
583a30f8f8cSSatish Balay #endif
584a30f8f8cSSatish Balay       row = im[i];
585a30f8f8cSSatish Balay     if (row >= rstart_orig && row < rend_orig) {
586a30f8f8cSSatish Balay       for (j=0; j<n; j++) {
587a30f8f8cSSatish Balay         col = in[j];
588a30f8f8cSSatish Balay         if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
589a30f8f8cSSatish Balay         /* Look up into the Hash Table */
590a30f8f8cSSatish Balay         key = (row/bs)*Nbs+(col/bs)+1;
591a30f8f8cSSatish Balay         h1  = HASH(size,key,tmp);
592a30f8f8cSSatish Balay 
593a30f8f8cSSatish Balay 
594a30f8f8cSSatish Balay         idx = h1;
595a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
596a30f8f8cSSatish Balay         insert_ct++;
597a30f8f8cSSatish Balay         total_ct++;
598a30f8f8cSSatish Balay         if (HT[idx] != key) {
599a30f8f8cSSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
600a30f8f8cSSatish Balay           if (idx == size) {
601a30f8f8cSSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
602a30f8f8cSSatish Balay             if (idx == h1) {
603a30f8f8cSSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
604a30f8f8cSSatish Balay             }
605a30f8f8cSSatish Balay           }
606a30f8f8cSSatish Balay         }
607a30f8f8cSSatish Balay #else
608a30f8f8cSSatish Balay         if (HT[idx] != key) {
609a30f8f8cSSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
610a30f8f8cSSatish Balay           if (idx == size) {
611a30f8f8cSSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
612a30f8f8cSSatish Balay             if (idx == h1) {
613a30f8f8cSSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
614a30f8f8cSSatish Balay             }
615a30f8f8cSSatish Balay           }
616a30f8f8cSSatish Balay         }
617a30f8f8cSSatish Balay #endif
618a30f8f8cSSatish Balay         /* A HASH table entry is found, so insert the values at the correct address */
619a30f8f8cSSatish Balay         if (addv == ADD_VALUES) *(HD[idx]+ (col % bs)*bs + (row % bs)) += value;
620a30f8f8cSSatish Balay         else                    *(HD[idx]+ (col % bs)*bs + (row % bs))  = value;
621a30f8f8cSSatish Balay       }
622a30f8f8cSSatish Balay     } else {
623a30f8f8cSSatish Balay       if (!baij->donotstash) {
624a30f8f8cSSatish Balay         if (roworiented) {
625a30f8f8cSSatish Balay           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n);CHKERRQ(ierr);
626a30f8f8cSSatish Balay         } else {
627a30f8f8cSSatish Balay           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m);CHKERRQ(ierr);
628a30f8f8cSSatish Balay         }
629a30f8f8cSSatish Balay       }
630a30f8f8cSSatish Balay     }
631a30f8f8cSSatish Balay   }
632a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
633a30f8f8cSSatish Balay   baij->ht_total_ct = total_ct;
634a30f8f8cSSatish Balay   baij->ht_insert_ct = insert_ct;
635a30f8f8cSSatish Balay #endif
636a30f8f8cSSatish Balay   PetscFunctionReturn(0);
637a30f8f8cSSatish Balay }
638a30f8f8cSSatish Balay 
639a30f8f8cSSatish Balay #undef __FUNC__
640a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatSetValuesBlocked_MPISBAIJ_HT_MatScalar"
641a30f8f8cSSatish Balay int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
642a30f8f8cSSatish Balay {
643a30f8f8cSSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
644a30f8f8cSSatish Balay   int         ierr,i,j,ii,jj,row,col;
645a30f8f8cSSatish Balay   int         roworiented = baij->roworiented,rstart=baij->rstart ;
646a30f8f8cSSatish Balay   int         rend=baij->rend,stepval,bs=baij->bs,bs2=baij->bs2;
647a30f8f8cSSatish Balay   int         h1,key,size=baij->ht_size,idx,*HT=baij->ht,Nbs=baij->Nbs;
648a30f8f8cSSatish Balay   PetscReal   tmp;
649a30f8f8cSSatish Balay   MatScalar   **HD = baij->hd,*baij_a;
650a30f8f8cSSatish Balay   MatScalar   *v_t,*value;
651a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
652a30f8f8cSSatish Balay   int         total_ct=baij->ht_total_ct,insert_ct=baij->ht_insert_ct;
653a30f8f8cSSatish Balay #endif
654a30f8f8cSSatish Balay 
655a30f8f8cSSatish Balay   PetscFunctionBegin;
656a30f8f8cSSatish Balay 
657a30f8f8cSSatish Balay   if (roworiented) {
658a30f8f8cSSatish Balay     stepval = (n-1)*bs;
659a30f8f8cSSatish Balay   } else {
660a30f8f8cSSatish Balay     stepval = (m-1)*bs;
661a30f8f8cSSatish Balay   }
662a30f8f8cSSatish Balay   for (i=0; i<m; i++) {
663a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
664a30f8f8cSSatish Balay     if (im[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
665a30f8f8cSSatish Balay     if (im[i] >= baij->Mbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
666a30f8f8cSSatish Balay #endif
667a30f8f8cSSatish Balay     row   = im[i];
668a30f8f8cSSatish Balay     v_t   = v + i*bs2;
669a30f8f8cSSatish Balay     if (row >= rstart && row < rend) {
670a30f8f8cSSatish Balay       for (j=0; j<n; j++) {
671a30f8f8cSSatish Balay         col = in[j];
672a30f8f8cSSatish Balay 
673a30f8f8cSSatish Balay         /* Look up into the Hash Table */
674a30f8f8cSSatish Balay         key = row*Nbs+col+1;
675a30f8f8cSSatish Balay         h1  = HASH(size,key,tmp);
676a30f8f8cSSatish Balay 
677a30f8f8cSSatish Balay         idx = h1;
678a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
679a30f8f8cSSatish Balay         total_ct++;
680a30f8f8cSSatish Balay         insert_ct++;
681a30f8f8cSSatish Balay        if (HT[idx] != key) {
682a30f8f8cSSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++,total_ct++);
683a30f8f8cSSatish Balay           if (idx == size) {
684a30f8f8cSSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++,total_ct++);
685a30f8f8cSSatish Balay             if (idx == h1) {
686a30f8f8cSSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
687a30f8f8cSSatish Balay             }
688a30f8f8cSSatish Balay           }
689a30f8f8cSSatish Balay         }
690a30f8f8cSSatish Balay #else
691a30f8f8cSSatish Balay         if (HT[idx] != key) {
692a30f8f8cSSatish Balay           for (idx=h1; (idx<size) && (HT[idx]!=key); idx++);
693a30f8f8cSSatish Balay           if (idx == size) {
694a30f8f8cSSatish Balay             for (idx=0; (idx<h1) && (HT[idx]!=key); idx++);
695a30f8f8cSSatish Balay             if (idx == h1) {
696a30f8f8cSSatish Balay               SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"(row,col) has no entry in the hash table");
697a30f8f8cSSatish Balay             }
698a30f8f8cSSatish Balay           }
699a30f8f8cSSatish Balay         }
700a30f8f8cSSatish Balay #endif
701a30f8f8cSSatish Balay         baij_a = HD[idx];
702a30f8f8cSSatish Balay         if (roworiented) {
703a30f8f8cSSatish Balay           /*value = v + i*(stepval+bs)*bs + j*bs;*/
704a30f8f8cSSatish Balay           /* value = v + (i*(stepval+bs)+j)*bs; */
705a30f8f8cSSatish Balay           value = v_t;
706a30f8f8cSSatish Balay           v_t  += bs;
707a30f8f8cSSatish Balay           if (addv == ADD_VALUES) {
708a30f8f8cSSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval) {
709a30f8f8cSSatish Balay               for (jj=ii; jj<bs2; jj+=bs) {
710a30f8f8cSSatish Balay                 baij_a[jj]  += *value++;
711a30f8f8cSSatish Balay               }
712a30f8f8cSSatish Balay             }
713a30f8f8cSSatish Balay           } else {
714a30f8f8cSSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval) {
715a30f8f8cSSatish Balay               for (jj=ii; jj<bs2; jj+=bs) {
716a30f8f8cSSatish Balay                 baij_a[jj]  = *value++;
717a30f8f8cSSatish Balay               }
718a30f8f8cSSatish Balay             }
719a30f8f8cSSatish Balay           }
720a30f8f8cSSatish Balay         } else {
721a30f8f8cSSatish Balay           value = v + j*(stepval+bs)*bs + i*bs;
722a30f8f8cSSatish Balay           if (addv == ADD_VALUES) {
723a30f8f8cSSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
724a30f8f8cSSatish Balay               for (jj=0; jj<bs; jj++) {
725a30f8f8cSSatish Balay                 baij_a[jj]  += *value++;
726a30f8f8cSSatish Balay               }
727a30f8f8cSSatish Balay             }
728a30f8f8cSSatish Balay           } else {
729a30f8f8cSSatish Balay             for (ii=0; ii<bs; ii++,value+=stepval,baij_a+=bs) {
730a30f8f8cSSatish Balay               for (jj=0; jj<bs; jj++) {
731a30f8f8cSSatish Balay                 baij_a[jj]  = *value++;
732a30f8f8cSSatish Balay               }
733a30f8f8cSSatish Balay             }
734a30f8f8cSSatish Balay           }
735a30f8f8cSSatish Balay         }
736a30f8f8cSSatish Balay       }
737a30f8f8cSSatish Balay     } else {
738a30f8f8cSSatish Balay       if (!baij->donotstash) {
739a30f8f8cSSatish Balay         if (roworiented) {
740a30f8f8cSSatish Balay           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
741a30f8f8cSSatish Balay         } else {
742a30f8f8cSSatish Balay           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
743a30f8f8cSSatish Balay         }
744a30f8f8cSSatish Balay       }
745a30f8f8cSSatish Balay     }
746a30f8f8cSSatish Balay   }
747a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
748a30f8f8cSSatish Balay   baij->ht_total_ct = total_ct;
749a30f8f8cSSatish Balay   baij->ht_insert_ct = insert_ct;
750a30f8f8cSSatish Balay #endif
751a30f8f8cSSatish Balay   PetscFunctionReturn(0);
752a30f8f8cSSatish Balay }
753a30f8f8cSSatish Balay 
754a30f8f8cSSatish Balay #undef __FUNC__
755a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatGetValues_MPISBAIJ"
756a30f8f8cSSatish Balay int MatGetValues_MPISBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,Scalar *v)
757a30f8f8cSSatish Balay {
758a30f8f8cSSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
759a30f8f8cSSatish Balay   int        bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
760a30f8f8cSSatish Balay   int        bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
761a30f8f8cSSatish Balay 
762a30f8f8cSSatish Balay   PetscFunctionBegin;
763a30f8f8cSSatish Balay   for (i=0; i<m; i++) {
764a30f8f8cSSatish Balay     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative row");
765a30f8f8cSSatish Balay     if (idxm[i] >= baij->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Row too large");
766a30f8f8cSSatish Balay     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
767a30f8f8cSSatish Balay       row = idxm[i] - bsrstart;
768a30f8f8cSSatish Balay       for (j=0; j<n; j++) {
769a30f8f8cSSatish Balay         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Negative column");
770a30f8f8cSSatish Balay         if (idxn[j] >= baij->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Column too large");
771a30f8f8cSSatish Balay         if (idxn[j] >= bscstart && idxn[j] < bscend){
772a30f8f8cSSatish Balay           col = idxn[j] - bscstart;
773c8407628SSatish Balay           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
774a30f8f8cSSatish Balay         } else {
775a30f8f8cSSatish Balay           if (!baij->colmap) {
776a30f8f8cSSatish Balay             ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr);
777a30f8f8cSSatish Balay           }
778a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE)
779a30f8f8cSSatish Balay           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
780a30f8f8cSSatish Balay           data --;
781a30f8f8cSSatish Balay #else
782a30f8f8cSSatish Balay           data = baij->colmap[idxn[j]/bs]-1;
783a30f8f8cSSatish Balay #endif
784a30f8f8cSSatish Balay           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
785a30f8f8cSSatish Balay           else {
786a30f8f8cSSatish Balay             col  = data + idxn[j]%bs;
787c8407628SSatish Balay             ierr = MatGetValues_SeqSBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
788a30f8f8cSSatish Balay           }
789a30f8f8cSSatish Balay         }
790a30f8f8cSSatish Balay       }
791a30f8f8cSSatish Balay     } else {
792a30f8f8cSSatish Balay       SETERRQ(PETSC_ERR_SUP,0,"Only local values currently supported");
793a30f8f8cSSatish Balay     }
794a30f8f8cSSatish Balay   }
795a30f8f8cSSatish Balay  PetscFunctionReturn(0);
796a30f8f8cSSatish Balay }
797a30f8f8cSSatish Balay 
798a30f8f8cSSatish Balay #undef __FUNC__
799a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatNorm_MPISBAIJ"
800a30f8f8cSSatish Balay int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
801a30f8f8cSSatish Balay {
802a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
803bc0ba53dSHong Zhang   /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */
804bc0ba53dSHong Zhang   /* Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ*)baij->B->data; */
805a30f8f8cSSatish Balay   int        ierr;
806a30f8f8cSSatish Balay   PetscReal  sum[2],*lnorm2;
807a30f8f8cSSatish Balay 
808a30f8f8cSSatish Balay   PetscFunctionBegin;
809a30f8f8cSSatish Balay   if (baij->size == 1) {
810a30f8f8cSSatish Balay     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
811a30f8f8cSSatish Balay   } else {
812a30f8f8cSSatish Balay     if (type == NORM_FROBENIUS) {
813ee237329SHong Zhang       lnorm2 = (double*)PetscMalloc(2*sizeof(double));CHKPTRQ(lnorm2);
814a30f8f8cSSatish Balay       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
815a30f8f8cSSatish Balay       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
816a30f8f8cSSatish Balay       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
817a30f8f8cSSatish Balay       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
818a30f8f8cSSatish Balay       /*
819a30f8f8cSSatish Balay       ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
820a30f8f8cSSatish Balay       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]);
821a30f8f8cSSatish Balay       */
822a30f8f8cSSatish Balay       ierr = MPI_Allreduce(lnorm2,&sum,2,MPI_DOUBLE,MPI_SUM,mat->comm);CHKERRQ(ierr);
823a30f8f8cSSatish Balay       /*
824a30f8f8cSSatish Balay       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]);
825a30f8f8cSSatish Balay       PetscSynchronizedFlush(PETSC_COMM_WORLD); */
826a30f8f8cSSatish Balay 
827a30f8f8cSSatish Balay       *norm = sqrt(sum[0] + 2*sum[1]);
828a30f8f8cSSatish Balay       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
829a30f8f8cSSatish Balay     } else {
830a30f8f8cSSatish Balay       SETERRQ(PETSC_ERR_SUP,0,"No support for this norm yet");
831a30f8f8cSSatish Balay     }
832a30f8f8cSSatish Balay   }
833a30f8f8cSSatish Balay   PetscFunctionReturn(0);
834a30f8f8cSSatish Balay }
835a30f8f8cSSatish Balay 
836a30f8f8cSSatish Balay /*
837a30f8f8cSSatish Balay   Creates the hash table, and sets the table
838a30f8f8cSSatish Balay   This table is created only once.
839a30f8f8cSSatish Balay   If new entried need to be added to the matrix
840a30f8f8cSSatish Balay   then the hash table has to be destroyed and
841a30f8f8cSSatish Balay   recreated.
842a30f8f8cSSatish Balay */
843a30f8f8cSSatish Balay #undef __FUNC__
844a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatCreateHashTable_MPISBAIJ_Private"
845a30f8f8cSSatish Balay int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor)
846a30f8f8cSSatish Balay {
847a30f8f8cSSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data;
848a30f8f8cSSatish Balay   Mat         A = baij->A,B=baij->B;
849a30f8f8cSSatish Balay   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data,*b=(Mat_SeqBAIJ *)B->data;
850a30f8f8cSSatish Balay   int         i,j,k,nz=a->nz+b->nz,h1,*ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
851a30f8f8cSSatish Balay   int         size,bs2=baij->bs2,rstart=baij->rstart,ierr;
852a30f8f8cSSatish Balay   int         cstart=baij->cstart,*garray=baij->garray,row,col,Nbs=baij->Nbs;
853a30f8f8cSSatish Balay   int         *HT,key;
854a30f8f8cSSatish Balay   MatScalar   **HD;
855a30f8f8cSSatish Balay   PetscReal   tmp;
856a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
857a30f8f8cSSatish Balay   int         ct=0,max=0;
858a30f8f8cSSatish Balay #endif
859a30f8f8cSSatish Balay 
860a30f8f8cSSatish Balay   PetscFunctionBegin;
861a30f8f8cSSatish Balay   baij->ht_size=(int)(factor*nz);
862a30f8f8cSSatish Balay   size = baij->ht_size;
863a30f8f8cSSatish Balay 
864a30f8f8cSSatish Balay   if (baij->ht) {
865a30f8f8cSSatish Balay     PetscFunctionReturn(0);
866a30f8f8cSSatish Balay   }
867a30f8f8cSSatish Balay 
868a30f8f8cSSatish Balay   /* Allocate Memory for Hash Table */
869a30f8f8cSSatish Balay   baij->hd = (MatScalar**)PetscMalloc((size)*(sizeof(int)+sizeof(MatScalar*))+1);CHKPTRQ(baij->hd);
870a30f8f8cSSatish Balay   baij->ht = (int*)(baij->hd + size);
871a30f8f8cSSatish Balay   HD = baij->hd;
872a30f8f8cSSatish Balay   HT = baij->ht;
873a30f8f8cSSatish Balay 
874a30f8f8cSSatish Balay 
875a30f8f8cSSatish Balay   ierr = PetscMemzero(HD,size*(sizeof(int)+sizeof(Scalar*)));CHKERRQ(ierr);
876a30f8f8cSSatish Balay 
877a30f8f8cSSatish Balay 
878a30f8f8cSSatish Balay   /* Loop Over A */
879a30f8f8cSSatish Balay   for (i=0; i<a->mbs; i++) {
880a30f8f8cSSatish Balay     for (j=ai[i]; j<ai[i+1]; j++) {
881a30f8f8cSSatish Balay       row = i+rstart;
882a30f8f8cSSatish Balay       col = aj[j]+cstart;
883a30f8f8cSSatish Balay 
884a30f8f8cSSatish Balay       key = row*Nbs + col + 1;
885a30f8f8cSSatish Balay       h1  = HASH(size,key,tmp);
886a30f8f8cSSatish Balay       for (k=0; k<size; k++){
887a30f8f8cSSatish Balay         if (HT[(h1+k)%size] == 0.0) {
888a30f8f8cSSatish Balay           HT[(h1+k)%size] = key;
889a30f8f8cSSatish Balay           HD[(h1+k)%size] = a->a + j*bs2;
890a30f8f8cSSatish Balay           break;
891a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
892a30f8f8cSSatish Balay         } else {
893a30f8f8cSSatish Balay           ct++;
894a30f8f8cSSatish Balay #endif
895a30f8f8cSSatish Balay         }
896a30f8f8cSSatish Balay       }
897a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
898a30f8f8cSSatish Balay       if (k> max) max = k;
899a30f8f8cSSatish Balay #endif
900a30f8f8cSSatish Balay     }
901a30f8f8cSSatish Balay   }
902a30f8f8cSSatish Balay   /* Loop Over B */
903a30f8f8cSSatish Balay   for (i=0; i<b->mbs; i++) {
904a30f8f8cSSatish Balay     for (j=bi[i]; j<bi[i+1]; j++) {
905a30f8f8cSSatish Balay       row = i+rstart;
906a30f8f8cSSatish Balay       col = garray[bj[j]];
907a30f8f8cSSatish Balay       key = row*Nbs + col + 1;
908a30f8f8cSSatish Balay       h1  = HASH(size,key,tmp);
909a30f8f8cSSatish Balay       for (k=0; k<size; k++){
910a30f8f8cSSatish Balay         if (HT[(h1+k)%size] == 0.0) {
911a30f8f8cSSatish Balay           HT[(h1+k)%size] = key;
912a30f8f8cSSatish Balay           HD[(h1+k)%size] = b->a + j*bs2;
913a30f8f8cSSatish Balay           break;
914a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
915a30f8f8cSSatish Balay         } else {
916a30f8f8cSSatish Balay           ct++;
917a30f8f8cSSatish Balay #endif
918a30f8f8cSSatish Balay         }
919a30f8f8cSSatish Balay       }
920a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
921a30f8f8cSSatish Balay       if (k> max) max = k;
922a30f8f8cSSatish Balay #endif
923a30f8f8cSSatish Balay     }
924a30f8f8cSSatish Balay   }
925a30f8f8cSSatish Balay 
926a30f8f8cSSatish Balay   /* Print Summary */
927a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
928a30f8f8cSSatish Balay   for (i=0,j=0; i<size; i++) {
929a30f8f8cSSatish Balay     if (HT[i]) {j++;}
930a30f8f8cSSatish Balay   }
931a30f8f8cSSatish Balay   PLogInfo(0,"MatCreateHashTable_MPIBAIJ_Private: Average Search = %5.2f,max search = %d\n",(j== 0)? 0.0:((PetscReal)(ct+j))/j,max);
932a30f8f8cSSatish Balay #endif
933a30f8f8cSSatish Balay   PetscFunctionReturn(0);
934a30f8f8cSSatish Balay }
935a30f8f8cSSatish Balay 
936a30f8f8cSSatish Balay #undef __FUNC__
937a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatAssemblyBegin_MPISBAIJ"
938a30f8f8cSSatish Balay int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
939a30f8f8cSSatish Balay {
940a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
941a30f8f8cSSatish Balay   int         ierr,nstash,reallocs;
942a30f8f8cSSatish Balay   InsertMode  addv;
943a30f8f8cSSatish Balay 
944a30f8f8cSSatish Balay   PetscFunctionBegin;
945a30f8f8cSSatish Balay   if (baij->donotstash) {
946a30f8f8cSSatish Balay     PetscFunctionReturn(0);
947a30f8f8cSSatish Balay   }
948a30f8f8cSSatish Balay 
949a30f8f8cSSatish Balay   /* make sure all processors are either in INSERTMODE or ADDMODE */
950a30f8f8cSSatish Balay   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
951a30f8f8cSSatish Balay   if (addv == (ADD_VALUES|INSERT_VALUES)) {
952a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Some processors inserted others added");
953a30f8f8cSSatish Balay   }
954a30f8f8cSSatish Balay   mat->insertmode = addv; /* in case this processor had no cache */
955a30f8f8cSSatish Balay 
956a30f8f8cSSatish Balay   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
957a30f8f8cSSatish Balay   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
958a30f8f8cSSatish Balay   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
959a30f8f8cSSatish Balay   PLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
960a30f8f8cSSatish Balay   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
961a30f8f8cSSatish Balay   PLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
962a30f8f8cSSatish Balay   PetscFunctionReturn(0);
963a30f8f8cSSatish Balay }
964a30f8f8cSSatish Balay 
965a30f8f8cSSatish Balay #undef __FUNC__
966a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatAssemblyEnd_MPISBAIJ"
967a30f8f8cSSatish Balay int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
968a30f8f8cSSatish Balay {
969a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data;
970a30f8f8cSSatish Balay   Mat_SeqSBAIJ  *a=(Mat_SeqSBAIJ*)baij->A->data;
971a30f8f8cSSatish Balay   Mat_SeqBAIJ  *b=(Mat_SeqBAIJ*)baij->B->data;
972a30f8f8cSSatish Balay   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
973a30f8f8cSSatish Balay   int         *row,*col,other_disassembled;
974a30f8f8cSSatish Balay   PetscTruth  r1,r2,r3;
975a30f8f8cSSatish Balay   MatScalar   *val;
976a30f8f8cSSatish Balay   InsertMode  addv = mat->insertmode;
977a30f8f8cSSatish Balay   int         rank;
978a30f8f8cSSatish Balay 
979a30f8f8cSSatish Balay   PetscFunctionBegin;
980a30f8f8cSSatish Balay   /* remove 2 line below later */
981a30f8f8cSSatish Balay   ierr = MPI_Comm_rank(PETSC_COMM_WORLD, &rank);CHKERRQ(ierr);
982a30f8f8cSSatish Balay 
983a30f8f8cSSatish Balay   if (!baij->donotstash) {
984a30f8f8cSSatish Balay     while (1) {
985a30f8f8cSSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
986a30f8f8cSSatish Balay       /*
987a30f8f8cSSatish Balay       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg);
988a30f8f8cSSatish Balay       PetscSynchronizedFlush(PETSC_COMM_WORLD);
989a30f8f8cSSatish Balay       */
990a30f8f8cSSatish Balay       if (!flg) break;
991a30f8f8cSSatish Balay 
992a30f8f8cSSatish Balay       for (i=0; i<n;) {
993a30f8f8cSSatish Balay         /* Now identify the consecutive vals belonging to the same row */
994a30f8f8cSSatish Balay         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
995a30f8f8cSSatish Balay         if (j < n) ncols = j-i;
996a30f8f8cSSatish Balay         else       ncols = n-i;
997a30f8f8cSSatish Balay         /* Now assemble all these values with a single function call */
998a30f8f8cSSatish Balay         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
999a30f8f8cSSatish Balay         i = j;
1000a30f8f8cSSatish Balay       }
1001a30f8f8cSSatish Balay     }
1002a30f8f8cSSatish Balay     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
1003a30f8f8cSSatish Balay     /* Now process the block-stash. Since the values are stashed column-oriented,
1004a30f8f8cSSatish Balay        set the roworiented flag to column oriented, and after MatSetValues()
1005a30f8f8cSSatish Balay        restore the original flags */
1006a30f8f8cSSatish Balay     r1 = baij->roworiented;
1007a30f8f8cSSatish Balay     r2 = a->roworiented;
1008a30f8f8cSSatish Balay     r3 = b->roworiented;
1009a30f8f8cSSatish Balay     baij->roworiented = PETSC_FALSE;
1010a30f8f8cSSatish Balay     a->roworiented    = PETSC_FALSE;
1011a30f8f8cSSatish Balay     b->roworiented    = PETSC_FALSE;
1012a30f8f8cSSatish Balay     while (1) {
1013a30f8f8cSSatish Balay       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
1014a30f8f8cSSatish Balay       if (!flg) break;
1015a30f8f8cSSatish Balay 
1016a30f8f8cSSatish Balay       for (i=0; i<n;) {
1017a30f8f8cSSatish Balay         /* Now identify the consecutive vals belonging to the same row */
1018a30f8f8cSSatish Balay         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
1019a30f8f8cSSatish Balay         if (j < n) ncols = j-i;
1020a30f8f8cSSatish Balay         else       ncols = n-i;
1021a30f8f8cSSatish Balay         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
1022a30f8f8cSSatish Balay         i = j;
1023a30f8f8cSSatish Balay       }
1024a30f8f8cSSatish Balay     }
1025a30f8f8cSSatish Balay     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
1026a30f8f8cSSatish Balay     baij->roworiented = r1;
1027a30f8f8cSSatish Balay     a->roworiented    = r2;
1028a30f8f8cSSatish Balay     b->roworiented    = r3;
1029a30f8f8cSSatish Balay   }
1030a30f8f8cSSatish Balay 
1031a30f8f8cSSatish Balay   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
1032a30f8f8cSSatish Balay   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
1033a30f8f8cSSatish Balay 
1034a30f8f8cSSatish Balay   /* determine if any processor has disassembled, if so we must
1035a30f8f8cSSatish Balay      also disassemble ourselfs, in order that we may reassemble. */
1036a30f8f8cSSatish Balay   /*
1037a30f8f8cSSatish Balay      if nonzero structure of submatrix B cannot change then we know that
1038a30f8f8cSSatish Balay      no processor disassembled thus we can skip this stuff
1039a30f8f8cSSatish Balay   */
1040a30f8f8cSSatish Balay   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
1041a30f8f8cSSatish Balay     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
1042a30f8f8cSSatish Balay     if (mat->was_assembled && !other_disassembled) {
1043c8407628SSatish Balay       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
1044a30f8f8cSSatish Balay     }
1045a30f8f8cSSatish Balay   }
1046a30f8f8cSSatish Balay 
1047a30f8f8cSSatish Balay   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
1048a30f8f8cSSatish Balay     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr);
1049a30f8f8cSSatish Balay   }
1050a30f8f8cSSatish Balay   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
1051a30f8f8cSSatish Balay   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
1052a30f8f8cSSatish Balay 
1053a30f8f8cSSatish Balay #if defined(PETSC_USE_BOPT_g)
1054a30f8f8cSSatish Balay   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
1055a30f8f8cSSatish Balay     PLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((double)baij->ht_total_ct)/baij->ht_insert_ct);
1056a30f8f8cSSatish Balay     baij->ht_total_ct  = 0;
1057a30f8f8cSSatish Balay     baij->ht_insert_ct = 0;
1058a30f8f8cSSatish Balay   }
1059a30f8f8cSSatish Balay #endif
1060a30f8f8cSSatish Balay   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
1061c8407628SSatish Balay     ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
1062a30f8f8cSSatish Balay     mat->ops->setvalues        = MatSetValues_MPISBAIJ_HT;
1063a30f8f8cSSatish Balay     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT;
1064a30f8f8cSSatish Balay   }
1065a30f8f8cSSatish Balay 
1066a30f8f8cSSatish Balay   if (baij->rowvalues) {
1067a30f8f8cSSatish Balay     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
1068a30f8f8cSSatish Balay     baij->rowvalues = 0;
1069a30f8f8cSSatish Balay   }
1070a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1071a30f8f8cSSatish Balay }
1072a30f8f8cSSatish Balay 
1073a30f8f8cSSatish Balay #undef __FUNC__
1074a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatView_MPISBAIJ_ASCIIorDraworSocket"
1075a30f8f8cSSatish Balay static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,Viewer viewer)
1076a30f8f8cSSatish Balay {
1077a30f8f8cSSatish Balay   Mat_MPISBAIJ  *baij = (Mat_MPISBAIJ*)mat->data;
1078a30f8f8cSSatish Balay   int          ierr,format,bs = baij->bs,size = baij->size,rank = baij->rank;
1079a30f8f8cSSatish Balay   PetscTruth   isascii,isdraw;
108065d70643SHong Zhang   Viewer       sviewer;
1081a30f8f8cSSatish Balay 
1082a30f8f8cSSatish Balay   PetscFunctionBegin;
1083a30f8f8cSSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
1084a30f8f8cSSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
1085a30f8f8cSSatish Balay   if (isascii) {
1086a30f8f8cSSatish Balay     ierr = ViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1087a30f8f8cSSatish Balay     if (format == VIEWER_FORMAT_ASCII_INFO_LONG) {
1088a30f8f8cSSatish Balay       MatInfo info;
1089a30f8f8cSSatish Balay       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
1090a30f8f8cSSatish Balay       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1091a30f8f8cSSatish Balay       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
1092a30f8f8cSSatish Balay               rank,baij->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
1093a30f8f8cSSatish Balay               baij->bs,(int)info.memory);CHKERRQ(ierr);
1094a30f8f8cSSatish Balay       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1095a30f8f8cSSatish Balay       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1096a30f8f8cSSatish Balay       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1097a30f8f8cSSatish Balay       ierr = ViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
1098a30f8f8cSSatish Balay       ierr = ViewerFlush(viewer);CHKERRQ(ierr);
1099a30f8f8cSSatish Balay       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
1100a30f8f8cSSatish Balay       PetscFunctionReturn(0);
1101a30f8f8cSSatish Balay     } else if (format == VIEWER_FORMAT_ASCII_INFO) {
1102a30f8f8cSSatish Balay       ierr = ViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
1103a30f8f8cSSatish Balay       PetscFunctionReturn(0);
1104a30f8f8cSSatish Balay     }
1105a30f8f8cSSatish Balay   }
1106a30f8f8cSSatish Balay 
1107a30f8f8cSSatish Balay   if (isdraw) {
1108a30f8f8cSSatish Balay     Draw       draw;
1109a30f8f8cSSatish Balay     PetscTruth isnull;
1110a30f8f8cSSatish Balay     ierr = ViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1111a30f8f8cSSatish Balay     ierr = DrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1112a30f8f8cSSatish Balay   }
1113a30f8f8cSSatish Balay 
1114a30f8f8cSSatish Balay   if (size == 1) {
1115a30f8f8cSSatish Balay     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
1116a30f8f8cSSatish Balay   } else {
1117a30f8f8cSSatish Balay     /* assemble the entire matrix onto first processor. */
1118a30f8f8cSSatish Balay     Mat         A;
111965d70643SHong Zhang     Mat_SeqSBAIJ *Aloc;
112065d70643SHong Zhang     Mat_SeqBAIJ *Bloc;
1121a30f8f8cSSatish Balay     int         M = baij->M,N = baij->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
1122a30f8f8cSSatish Balay     MatScalar   *a;
1123a30f8f8cSSatish Balay 
1124a30f8f8cSSatish Balay     if (!rank) {
1125bc0ba53dSHong Zhang       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1126a30f8f8cSSatish Balay     } else {
1127f65c83cfSHong Zhang       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
1128a30f8f8cSSatish Balay     }
1129a30f8f8cSSatish Balay     PLogObjectParent(mat,A);
1130a30f8f8cSSatish Balay 
1131a30f8f8cSSatish Balay     /* copy over the A part */
113265d70643SHong Zhang     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
1133a30f8f8cSSatish Balay     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
1134a30f8f8cSSatish Balay     rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
1135a30f8f8cSSatish Balay 
1136a30f8f8cSSatish Balay     for (i=0; i<mbs; i++) {
1137a30f8f8cSSatish Balay       rvals[0] = bs*(baij->rstart + i);
1138a30f8f8cSSatish Balay       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1139a30f8f8cSSatish Balay       for (j=ai[i]; j<ai[i+1]; j++) {
1140a30f8f8cSSatish Balay         col = (baij->cstart+aj[j])*bs;
1141a30f8f8cSSatish Balay         for (k=0; k<bs; k++) {
1142a30f8f8cSSatish Balay           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1143a30f8f8cSSatish Balay           col++; a += bs;
1144a30f8f8cSSatish Balay         }
1145a30f8f8cSSatish Balay       }
1146a30f8f8cSSatish Balay     }
1147a30f8f8cSSatish Balay     /* copy over the B part */
114865d70643SHong Zhang     Bloc = (Mat_SeqBAIJ*)baij->B->data;
114965d70643SHong Zhang     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
1150a30f8f8cSSatish Balay     for (i=0; i<mbs; i++) {
1151a30f8f8cSSatish Balay       rvals[0] = bs*(baij->rstart + i);
1152a30f8f8cSSatish Balay       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1153a30f8f8cSSatish Balay       for (j=ai[i]; j<ai[i+1]; j++) {
1154a30f8f8cSSatish Balay         col = baij->garray[aj[j]]*bs;
1155a30f8f8cSSatish Balay         for (k=0; k<bs; k++) {
1156a30f8f8cSSatish Balay           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
1157a30f8f8cSSatish Balay           col++; a += bs;
1158a30f8f8cSSatish Balay         }
1159a30f8f8cSSatish Balay       }
1160a30f8f8cSSatish Balay     }
1161a30f8f8cSSatish Balay     ierr = PetscFree(rvals);CHKERRQ(ierr);
1162a30f8f8cSSatish Balay     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1163a30f8f8cSSatish Balay     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1164a30f8f8cSSatish Balay     /*
1165a30f8f8cSSatish Balay        Everyone has to call to draw the matrix since the graphics waits are
1166a30f8f8cSSatish Balay        synchronized across all processors that share the Draw object
1167a30f8f8cSSatish Balay     */
1168a30f8f8cSSatish Balay     ierr = ViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
116965d70643SHong Zhang     if (!rank) {
1170a30f8f8cSSatish Balay       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
1171a30f8f8cSSatish Balay     }
117265d70643SHong Zhang     ierr = ViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
1173a30f8f8cSSatish Balay     ierr = MatDestroy(A);CHKERRQ(ierr);
1174a30f8f8cSSatish Balay   }
1175a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1176a30f8f8cSSatish Balay }
1177a30f8f8cSSatish Balay 
1178a30f8f8cSSatish Balay #undef __FUNC__
1179a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatView_MPISBAIJ"
1180a30f8f8cSSatish Balay int MatView_MPISBAIJ(Mat mat,Viewer viewer)
1181a30f8f8cSSatish Balay {
1182a30f8f8cSSatish Balay   int        ierr;
1183a30f8f8cSSatish Balay   PetscTruth isascii,isdraw,issocket,isbinary;
1184a30f8f8cSSatish Balay 
1185a30f8f8cSSatish Balay   PetscFunctionBegin;
1186a30f8f8cSSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,ASCII_VIEWER,&isascii);CHKERRQ(ierr);
1187a30f8f8cSSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,DRAW_VIEWER,&isdraw);CHKERRQ(ierr);
1188a30f8f8cSSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,SOCKET_VIEWER,&issocket);CHKERRQ(ierr);
1189a30f8f8cSSatish Balay   ierr = PetscTypeCompare((PetscObject)viewer,BINARY_VIEWER,&isbinary);CHKERRQ(ierr);
1190a30f8f8cSSatish Balay   if (isascii || isdraw || issocket || isbinary) {
1191a30f8f8cSSatish Balay     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1192a30f8f8cSSatish Balay   } else {
1193a30f8f8cSSatish Balay     SETERRQ1(1,1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
1194a30f8f8cSSatish Balay   }
1195a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1196a30f8f8cSSatish Balay }
1197a30f8f8cSSatish Balay 
1198a30f8f8cSSatish Balay #undef __FUNC__
1199a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatDestroy_MPISBAIJ"
1200a30f8f8cSSatish Balay int MatDestroy_MPISBAIJ(Mat mat)
1201a30f8f8cSSatish Balay {
1202a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1203a30f8f8cSSatish Balay   int         ierr;
1204a30f8f8cSSatish Balay 
1205a30f8f8cSSatish Balay   PetscFunctionBegin;
1206a30f8f8cSSatish Balay 
1207a30f8f8cSSatish Balay   if (mat->mapping) {
1208a30f8f8cSSatish Balay     ierr = ISLocalToGlobalMappingDestroy(mat->mapping);CHKERRQ(ierr);
1209a30f8f8cSSatish Balay   }
1210a30f8f8cSSatish Balay   if (mat->bmapping) {
1211a30f8f8cSSatish Balay     ierr = ISLocalToGlobalMappingDestroy(mat->bmapping);CHKERRQ(ierr);
1212a30f8f8cSSatish Balay   }
1213a30f8f8cSSatish Balay   if (mat->rmap) {
1214a30f8f8cSSatish Balay     ierr = MapDestroy(mat->rmap);CHKERRQ(ierr);
1215a30f8f8cSSatish Balay   }
1216a30f8f8cSSatish Balay   if (mat->cmap) {
1217a30f8f8cSSatish Balay     ierr = MapDestroy(mat->cmap);CHKERRQ(ierr);
1218a30f8f8cSSatish Balay   }
1219a30f8f8cSSatish Balay #if defined(PETSC_USE_LOG)
1220a30f8f8cSSatish Balay   PLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",baij->M,baij->N);
1221a30f8f8cSSatish Balay #endif
1222a30f8f8cSSatish Balay 
1223a30f8f8cSSatish Balay   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1224a30f8f8cSSatish Balay   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
1225a30f8f8cSSatish Balay 
1226a30f8f8cSSatish Balay   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1227a30f8f8cSSatish Balay   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1228a30f8f8cSSatish Balay   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1229a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE)
1230a30f8f8cSSatish Balay   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1231a30f8f8cSSatish Balay #else
1232a30f8f8cSSatish Balay   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1233a30f8f8cSSatish Balay #endif
1234a30f8f8cSSatish Balay   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1235a30f8f8cSSatish Balay   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1236a30f8f8cSSatish Balay   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1237a30f8f8cSSatish Balay   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
1238a30f8f8cSSatish Balay   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
1239a30f8f8cSSatish Balay   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
1240a30f8f8cSSatish Balay #if defined(PETSC_USE_MAT_SINGLE)
1241a30f8f8cSSatish Balay   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
1242a30f8f8cSSatish Balay #endif
1243a30f8f8cSSatish Balay   ierr = PetscFree(baij);CHKERRQ(ierr);
1244a30f8f8cSSatish Balay   PLogObjectDestroy(mat);
1245a30f8f8cSSatish Balay   PetscHeaderDestroy(mat);
1246a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1247a30f8f8cSSatish Balay }
1248a30f8f8cSSatish Balay 
1249a30f8f8cSSatish Balay #undef __FUNC__
1250a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatMult_MPISBAIJ"
1251a30f8f8cSSatish Balay int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
1252a30f8f8cSSatish Balay {
1253a30f8f8cSSatish Balay   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1254a30f8f8cSSatish Balay   int         ierr,nt;
1255a30f8f8cSSatish Balay 
1256a30f8f8cSSatish Balay   PetscFunctionBegin;
1257a30f8f8cSSatish Balay   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1258a30f8f8cSSatish Balay   if (nt != a->n) {
1259a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible partition of A and xx");
1260a30f8f8cSSatish Balay   }
1261a30f8f8cSSatish Balay   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1262a30f8f8cSSatish Balay   if (nt != a->m) {
1263a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_ARG_SIZ,0,"Incompatible parition of A and yy");
1264a30f8f8cSSatish Balay   }
126565d70643SHong Zhang 
1266b941877fSHong Zhang   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1267b941877fSHong Zhang   /* do diagonal part */
1268b941877fSHong Zhang   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1269b941877fSHong Zhang   /* do supperdiagonal part */
1270b941877fSHong Zhang   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1271b941877fSHong Zhang   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1272b941877fSHong Zhang   /* do subdiagonal part */
1273b941877fSHong Zhang   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1274b941877fSHong Zhang   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1275b941877fSHong Zhang   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
127665d70643SHong Zhang 
1277a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1278a30f8f8cSSatish Balay }
1279a30f8f8cSSatish Balay 
1280a30f8f8cSSatish Balay #undef __FUNC__
1281a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatMultAdd_MPISBAIJ"
1282a30f8f8cSSatish Balay int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1283a30f8f8cSSatish Balay {
1284de8b6608SHong Zhang   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1285aa482183SHong Zhang   int        ierr;
1286a30f8f8cSSatish Balay 
1287a30f8f8cSSatish Balay   PetscFunctionBegin;
1288b941877fSHong Zhang   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1289b941877fSHong Zhang   /* do diagonal part */
1290b941877fSHong Zhang   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1291b941877fSHong Zhang   /* do supperdiagonal part */
1292b941877fSHong Zhang   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1293de8b6608SHong Zhang   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1294de8b6608SHong Zhang 
1295b941877fSHong Zhang   /* do subdiagonal part */
1296a30f8f8cSSatish Balay   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1297a30f8f8cSSatish Balay   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1298a30f8f8cSSatish Balay   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1299aa482183SHong Zhang 
1300a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1301a30f8f8cSSatish Balay }
1302a30f8f8cSSatish Balay 
1303a30f8f8cSSatish Balay #undef __FUNC__
1304a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatMultTranspose_MPISBAIJ"
1305a30f8f8cSSatish Balay int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
1306a30f8f8cSSatish Balay {
1307a30f8f8cSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1308a30f8f8cSSatish Balay   int         ierr;
1309a30f8f8cSSatish Balay 
1310a30f8f8cSSatish Balay   PetscFunctionBegin;
1311a30f8f8cSSatish Balay   /* do nondiagonal part */
1312a30f8f8cSSatish Balay   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1313a30f8f8cSSatish Balay   /* send it on its way */
1314a30f8f8cSSatish Balay   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1315a30f8f8cSSatish Balay   /* do local part */
1316a30f8f8cSSatish Balay   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1317a30f8f8cSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
1318a30f8f8cSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
1319a30f8f8cSSatish Balay   /* but is not perhaps always true. */
1320a30f8f8cSSatish Balay   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1321a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1322a30f8f8cSSatish Balay }
1323a30f8f8cSSatish Balay 
1324a30f8f8cSSatish Balay #undef __FUNC__
1325a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatMultTransposeAdd_MPISBAIJ"
1326a30f8f8cSSatish Balay int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1327a30f8f8cSSatish Balay {
1328a30f8f8cSSatish Balay   Mat_MPIBAIJ *a = (Mat_MPIBAIJ*)A->data;
1329a30f8f8cSSatish Balay   int         ierr;
1330a30f8f8cSSatish Balay 
1331a30f8f8cSSatish Balay   PetscFunctionBegin;
1332a30f8f8cSSatish Balay   /* do nondiagonal part */
1333a30f8f8cSSatish Balay   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1334a30f8f8cSSatish Balay   /* send it on its way */
1335a30f8f8cSSatish Balay   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1336a30f8f8cSSatish Balay   /* do local part */
1337a30f8f8cSSatish Balay   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1338a30f8f8cSSatish Balay   /* receive remote parts: note this assumes the values are not actually */
1339a30f8f8cSSatish Balay   /* inserted in yy until the next line, which is true for my implementation*/
1340a30f8f8cSSatish Balay   /* but is not perhaps always true. */
1341a30f8f8cSSatish Balay   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1342a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1343a30f8f8cSSatish Balay }
1344a30f8f8cSSatish Balay 
1345a30f8f8cSSatish Balay /*
1346a30f8f8cSSatish Balay   This only works correctly for square matrices where the subblock A->A is the
1347a30f8f8cSSatish Balay    diagonal block
1348a30f8f8cSSatish Balay */
1349a30f8f8cSSatish Balay #undef __FUNC__
1350a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonal_MPISBAIJ"
1351a30f8f8cSSatish Balay int MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1352a30f8f8cSSatish Balay {
1353a30f8f8cSSatish Balay   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1354a30f8f8cSSatish Balay   int         ierr;
1355a30f8f8cSSatish Balay 
1356a30f8f8cSSatish Balay   PetscFunctionBegin;
1357a30f8f8cSSatish Balay   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,0,"Supports only square matrix where A->A is diag block"); */
1358a30f8f8cSSatish Balay   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1359a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1360a30f8f8cSSatish Balay }
1361a30f8f8cSSatish Balay 
1362a30f8f8cSSatish Balay #undef __FUNC__
1363a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatScale_MPISBAIJ"
1364a30f8f8cSSatish Balay int MatScale_MPISBAIJ(Scalar *aa,Mat A)
1365a30f8f8cSSatish Balay {
1366a30f8f8cSSatish Balay   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1367a30f8f8cSSatish Balay   int         ierr;
1368a30f8f8cSSatish Balay 
1369a30f8f8cSSatish Balay   PetscFunctionBegin;
1370a30f8f8cSSatish Balay   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1371a30f8f8cSSatish Balay   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1372a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1373a30f8f8cSSatish Balay }
1374a30f8f8cSSatish Balay 
1375a30f8f8cSSatish Balay #undef __FUNC__
1376a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatGetSize_MPISBAIJ"
1377a30f8f8cSSatish Balay int MatGetSize_MPISBAIJ(Mat matin,int *m,int *n)
1378a30f8f8cSSatish Balay {
1379a30f8f8cSSatish Balay   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;
1380a30f8f8cSSatish Balay 
1381a30f8f8cSSatish Balay   PetscFunctionBegin;
1382a30f8f8cSSatish Balay   if (m) *m = mat->M;
1383a30f8f8cSSatish Balay   if (n) *n = mat->N;
1384a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1385a30f8f8cSSatish Balay }
1386a30f8f8cSSatish Balay 
1387a30f8f8cSSatish Balay #undef __FUNC__
1388a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatGetLocalSize_MPISBAIJ"
1389a30f8f8cSSatish Balay int MatGetLocalSize_MPISBAIJ(Mat matin,int *m,int *n)
1390a30f8f8cSSatish Balay {
1391a30f8f8cSSatish Balay   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;
1392a30f8f8cSSatish Balay 
1393a30f8f8cSSatish Balay   PetscFunctionBegin;
1394a30f8f8cSSatish Balay   *m = mat->m; *n = mat->n;
1395a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1396a30f8f8cSSatish Balay }
1397a30f8f8cSSatish Balay 
1398a30f8f8cSSatish Balay #undef __FUNC__
1399a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatGetOwnershipRange_MPISBAIJ"
1400a30f8f8cSSatish Balay int MatGetOwnershipRange_MPISBAIJ(Mat matin,int *m,int *n)
1401a30f8f8cSSatish Balay {
1402a30f8f8cSSatish Balay   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;
1403a30f8f8cSSatish Balay 
1404a30f8f8cSSatish Balay   PetscFunctionBegin;
1405a30f8f8cSSatish Balay   if (m) *m = mat->rstart*mat->bs;
1406a30f8f8cSSatish Balay   if (n) *n = mat->rend*mat->bs;
1407a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1408a30f8f8cSSatish Balay }
1409a30f8f8cSSatish Balay 
1410a30f8f8cSSatish Balay #undef __FUNC__
1411a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatGetRow_MPISBAIJ"
1412a30f8f8cSSatish Balay int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,Scalar **v)
1413a30f8f8cSSatish Balay {
1414a30f8f8cSSatish Balay   Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)matin->data;
1415a30f8f8cSSatish Balay   Scalar     *vworkA,*vworkB,**pvA,**pvB,*v_p;
1416a30f8f8cSSatish Balay   int        bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1417a30f8f8cSSatish Balay   int        nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1418a30f8f8cSSatish Balay   int        *cmap,*idx_p,cstart = mat->cstart;
1419a30f8f8cSSatish Balay 
1420a30f8f8cSSatish Balay   PetscFunctionBegin;
1421a30f8f8cSSatish Balay   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"Already active");
1422a30f8f8cSSatish Balay   mat->getrowactive = PETSC_TRUE;
1423a30f8f8cSSatish Balay 
1424a30f8f8cSSatish Balay   if (!mat->rowvalues && (idx || v)) {
1425a30f8f8cSSatish Balay     /*
1426a30f8f8cSSatish Balay         allocate enough space to hold information from the longest row.
1427a30f8f8cSSatish Balay     */
1428a30f8f8cSSatish Balay     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1429a30f8f8cSSatish Balay     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1430a30f8f8cSSatish Balay     int     max = 1,mbs = mat->mbs,tmp;
1431a30f8f8cSSatish Balay     for (i=0; i<mbs; i++) {
1432a30f8f8cSSatish Balay       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1433a30f8f8cSSatish Balay       if (max < tmp) { max = tmp; }
1434a30f8f8cSSatish Balay     }
1435a30f8f8cSSatish Balay     mat->rowvalues = (Scalar*)PetscMalloc(max*bs2*(sizeof(int)+sizeof(Scalar)));CHKPTRQ(mat->rowvalues);
1436a30f8f8cSSatish Balay     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1437a30f8f8cSSatish Balay   }
1438a30f8f8cSSatish Balay 
1439a30f8f8cSSatish Balay   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,0,"Only local rows")
1440a30f8f8cSSatish Balay   lrow = row - brstart;  /* local row index */
1441a30f8f8cSSatish Balay 
1442a30f8f8cSSatish Balay   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1443a30f8f8cSSatish Balay   if (!v)   {pvA = 0; pvB = 0;}
1444a30f8f8cSSatish Balay   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1445a30f8f8cSSatish Balay   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1446a30f8f8cSSatish Balay   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1447a30f8f8cSSatish Balay   nztot = nzA + nzB;
1448a30f8f8cSSatish Balay 
1449a30f8f8cSSatish Balay   cmap  = mat->garray;
1450a30f8f8cSSatish Balay   if (v  || idx) {
1451a30f8f8cSSatish Balay     if (nztot) {
1452a30f8f8cSSatish Balay       /* Sort by increasing column numbers, assuming A and B already sorted */
1453a30f8f8cSSatish Balay       int imark = -1;
1454a30f8f8cSSatish Balay       if (v) {
1455a30f8f8cSSatish Balay         *v = v_p = mat->rowvalues;
1456a30f8f8cSSatish Balay         for (i=0; i<nzB; i++) {
1457a30f8f8cSSatish Balay           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1458a30f8f8cSSatish Balay           else break;
1459a30f8f8cSSatish Balay         }
1460a30f8f8cSSatish Balay         imark = i;
1461a30f8f8cSSatish Balay         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1462a30f8f8cSSatish Balay         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1463a30f8f8cSSatish Balay       }
1464a30f8f8cSSatish Balay       if (idx) {
1465a30f8f8cSSatish Balay         *idx = idx_p = mat->rowindices;
1466a30f8f8cSSatish Balay         if (imark > -1) {
1467a30f8f8cSSatish Balay           for (i=0; i<imark; i++) {
1468a30f8f8cSSatish Balay             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1469a30f8f8cSSatish Balay           }
1470a30f8f8cSSatish Balay         } else {
1471a30f8f8cSSatish Balay           for (i=0; i<nzB; i++) {
1472a30f8f8cSSatish Balay             if (cmap[cworkB[i]/bs] < cstart)
1473a30f8f8cSSatish Balay               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1474a30f8f8cSSatish Balay             else break;
1475a30f8f8cSSatish Balay           }
1476a30f8f8cSSatish Balay           imark = i;
1477a30f8f8cSSatish Balay         }
1478a30f8f8cSSatish Balay         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1479a30f8f8cSSatish Balay         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1480a30f8f8cSSatish Balay       }
1481a30f8f8cSSatish Balay     } else {
1482a30f8f8cSSatish Balay       if (idx) *idx = 0;
1483a30f8f8cSSatish Balay       if (v)   *v   = 0;
1484a30f8f8cSSatish Balay     }
1485a30f8f8cSSatish Balay   }
1486a30f8f8cSSatish Balay   *nz = nztot;
1487a30f8f8cSSatish Balay   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1488a30f8f8cSSatish Balay   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1489a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1490a30f8f8cSSatish Balay }
1491a30f8f8cSSatish Balay 
1492a30f8f8cSSatish Balay #undef __FUNC__
1493a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatRestoreRow_MPISBAIJ"
1494a30f8f8cSSatish Balay int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,Scalar **v)
1495a30f8f8cSSatish Balay {
1496a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1497a30f8f8cSSatish Balay 
1498a30f8f8cSSatish Balay   PetscFunctionBegin;
1499a30f8f8cSSatish Balay   if (baij->getrowactive == PETSC_FALSE) {
1500a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,0,"MatGetRow not called");
1501a30f8f8cSSatish Balay   }
1502a30f8f8cSSatish Balay   baij->getrowactive = PETSC_FALSE;
1503a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1504a30f8f8cSSatish Balay }
1505a30f8f8cSSatish Balay 
1506a30f8f8cSSatish Balay #undef __FUNC__
1507a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatGetBlockSize_MPISBAIJ"
1508a30f8f8cSSatish Balay int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs)
1509a30f8f8cSSatish Balay {
1510a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1511a30f8f8cSSatish Balay 
1512a30f8f8cSSatish Balay   PetscFunctionBegin;
1513a30f8f8cSSatish Balay   *bs = baij->bs;
1514a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1515a30f8f8cSSatish Balay }
1516a30f8f8cSSatish Balay 
1517a30f8f8cSSatish Balay #undef __FUNC__
1518a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatZeroEntries_MPISBAIJ"
1519a30f8f8cSSatish Balay int MatZeroEntries_MPISBAIJ(Mat A)
1520a30f8f8cSSatish Balay {
1521a30f8f8cSSatish Balay   Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data;
1522a30f8f8cSSatish Balay   int         ierr;
1523a30f8f8cSSatish Balay 
1524a30f8f8cSSatish Balay   PetscFunctionBegin;
1525a30f8f8cSSatish Balay   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1526a30f8f8cSSatish Balay   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1527a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1528a30f8f8cSSatish Balay }
1529a30f8f8cSSatish Balay 
1530a30f8f8cSSatish Balay #undef __FUNC__
1531a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatGetInfo_MPISBAIJ"
1532a30f8f8cSSatish Balay int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1533a30f8f8cSSatish Balay {
1534a30f8f8cSSatish Balay   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data;
1535a30f8f8cSSatish Balay   Mat         A = a->A,B = a->B;
1536a30f8f8cSSatish Balay   int         ierr;
1537a30f8f8cSSatish Balay   PetscReal   isend[5],irecv[5];
1538a30f8f8cSSatish Balay 
1539a30f8f8cSSatish Balay   PetscFunctionBegin;
1540a30f8f8cSSatish Balay   info->block_size     = (double)a->bs;
1541a30f8f8cSSatish Balay   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1542a30f8f8cSSatish Balay   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1543a30f8f8cSSatish Balay   isend[3] = info->memory;  isend[4] = info->mallocs;
1544a30f8f8cSSatish Balay   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1545a30f8f8cSSatish Balay   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1546a30f8f8cSSatish Balay   isend[3] += info->memory;  isend[4] += info->mallocs;
1547a30f8f8cSSatish Balay   if (flag == MAT_LOCAL) {
1548a30f8f8cSSatish Balay     info->nz_used      = isend[0];
1549a30f8f8cSSatish Balay     info->nz_allocated = isend[1];
1550a30f8f8cSSatish Balay     info->nz_unneeded  = isend[2];
1551a30f8f8cSSatish Balay     info->memory       = isend[3];
1552a30f8f8cSSatish Balay     info->mallocs      = isend[4];
1553a30f8f8cSSatish Balay   } else if (flag == MAT_GLOBAL_MAX) {
1554a30f8f8cSSatish Balay     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_MAX,matin->comm);CHKERRQ(ierr);
1555a30f8f8cSSatish Balay     info->nz_used      = irecv[0];
1556a30f8f8cSSatish Balay     info->nz_allocated = irecv[1];
1557a30f8f8cSSatish Balay     info->nz_unneeded  = irecv[2];
1558a30f8f8cSSatish Balay     info->memory       = irecv[3];
1559a30f8f8cSSatish Balay     info->mallocs      = irecv[4];
1560a30f8f8cSSatish Balay   } else if (flag == MAT_GLOBAL_SUM) {
1561a30f8f8cSSatish Balay     ierr = MPI_Allreduce(isend,irecv,5,MPI_DOUBLE,MPI_SUM,matin->comm);CHKERRQ(ierr);
1562a30f8f8cSSatish Balay     info->nz_used      = irecv[0];
1563a30f8f8cSSatish Balay     info->nz_allocated = irecv[1];
1564a30f8f8cSSatish Balay     info->nz_unneeded  = irecv[2];
1565a30f8f8cSSatish Balay     info->memory       = irecv[3];
1566a30f8f8cSSatish Balay     info->mallocs      = irecv[4];
1567a30f8f8cSSatish Balay   } else {
1568a30f8f8cSSatish Balay     SETERRQ1(1,1,"Unknown MatInfoType argument %d",flag);
1569a30f8f8cSSatish Balay   }
1570a30f8f8cSSatish Balay   info->rows_global       = (double)a->M;
1571a30f8f8cSSatish Balay   info->columns_global    = (double)a->N;
1572a30f8f8cSSatish Balay   info->rows_local        = (double)a->m;
1573a30f8f8cSSatish Balay   info->columns_local     = (double)a->N;
1574a30f8f8cSSatish Balay   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1575a30f8f8cSSatish Balay   info->fill_ratio_needed = 0;
1576a30f8f8cSSatish Balay   info->factor_mallocs    = 0;
1577a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1578a30f8f8cSSatish Balay }
1579a30f8f8cSSatish Balay 
1580a30f8f8cSSatish Balay #undef __FUNC__
1581a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatSetOption_MPISBAIJ"
1582a30f8f8cSSatish Balay int MatSetOption_MPISBAIJ(Mat A,MatOption op)
1583a30f8f8cSSatish Balay {
1584a30f8f8cSSatish Balay   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1585a30f8f8cSSatish Balay   int         ierr;
1586a30f8f8cSSatish Balay 
1587a30f8f8cSSatish Balay   PetscFunctionBegin;
1588a30f8f8cSSatish Balay   if (op == MAT_NO_NEW_NONZERO_LOCATIONS ||
1589a30f8f8cSSatish Balay       op == MAT_YES_NEW_NONZERO_LOCATIONS ||
1590a30f8f8cSSatish Balay       op == MAT_COLUMNS_UNSORTED ||
1591a30f8f8cSSatish Balay       op == MAT_COLUMNS_SORTED ||
1592a30f8f8cSSatish Balay       op == MAT_NEW_NONZERO_ALLOCATION_ERR ||
1593a30f8f8cSSatish Balay       op == MAT_KEEP_ZEROED_ROWS ||
1594a30f8f8cSSatish Balay       op == MAT_NEW_NONZERO_LOCATION_ERR) {
1595a30f8f8cSSatish Balay         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1596a30f8f8cSSatish Balay         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1597a30f8f8cSSatish Balay   } else if (op == MAT_ROW_ORIENTED) {
1598a30f8f8cSSatish Balay         a->roworiented = PETSC_TRUE;
1599a30f8f8cSSatish Balay         ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1600a30f8f8cSSatish Balay         ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1601a30f8f8cSSatish Balay   } else if (op == MAT_ROWS_SORTED ||
1602a30f8f8cSSatish Balay              op == MAT_ROWS_UNSORTED ||
1603a30f8f8cSSatish Balay              op == MAT_SYMMETRIC ||
1604a30f8f8cSSatish Balay              op == MAT_STRUCTURALLY_SYMMETRIC ||
1605a30f8f8cSSatish Balay              op == MAT_YES_NEW_DIAGONALS ||
1606a30f8f8cSSatish Balay              op == MAT_USE_HASH_TABLE) {
1607a30f8f8cSSatish Balay     PLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1608a30f8f8cSSatish Balay   } else if (op == MAT_COLUMN_ORIENTED) {
1609a30f8f8cSSatish Balay     a->roworiented = PETSC_FALSE;
1610a30f8f8cSSatish Balay     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1611a30f8f8cSSatish Balay     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1612a30f8f8cSSatish Balay   } else if (op == MAT_IGNORE_OFF_PROC_ENTRIES) {
1613a30f8f8cSSatish Balay     a->donotstash = PETSC_TRUE;
1614a30f8f8cSSatish Balay   } else if (op == MAT_NO_NEW_DIAGONALS) {
1615a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_SUP,0,"MAT_NO_NEW_DIAGONALS");
1616a30f8f8cSSatish Balay   } else if (op == MAT_USE_HASH_TABLE) {
1617a30f8f8cSSatish Balay     a->ht_flag = PETSC_TRUE;
1618a30f8f8cSSatish Balay   } else {
1619a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_SUP,0,"unknown option");
1620a30f8f8cSSatish Balay   }
1621a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1622a30f8f8cSSatish Balay }
1623a30f8f8cSSatish Balay 
1624a30f8f8cSSatish Balay #undef __FUNC__
1625a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatTranspose_MPISBAIJ("
1626a30f8f8cSSatish Balay int MatTranspose_MPISBAIJ(Mat A,Mat *matout)
1627a30f8f8cSSatish Balay {
1628a30f8f8cSSatish Balay   Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data;
1629a30f8f8cSSatish Balay   Mat_SeqBAIJ *Aloc;
1630a30f8f8cSSatish Balay   Mat         B;
1631a30f8f8cSSatish Balay   int         ierr,M=baij->M,N=baij->N,*ai,*aj,i,*rvals,j,k,col;
1632a30f8f8cSSatish Balay   int         bs=baij->bs,mbs=baij->mbs;
1633a30f8f8cSSatish Balay   MatScalar   *a;
1634a30f8f8cSSatish Balay 
1635a30f8f8cSSatish Balay   PetscFunctionBegin;
1636a30f8f8cSSatish Balay   if (!matout && M != N) SETERRQ(PETSC_ERR_ARG_SIZ,0,"Square matrix only for in-place");
1637a30f8f8cSSatish Balay   ierr = MatCreateMPIBAIJ(A->comm,baij->bs,baij->n,baij->m,N,M,0,PETSC_NULL,0,PETSC_NULL,&B);CHKERRQ(ierr);
1638a30f8f8cSSatish Balay 
1639a30f8f8cSSatish Balay   /* copy over the A part */
1640a30f8f8cSSatish Balay   Aloc = (Mat_SeqBAIJ*)baij->A->data;
1641a30f8f8cSSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1642a30f8f8cSSatish Balay   rvals = (int*)PetscMalloc(bs*sizeof(int));CHKPTRQ(rvals);
1643a30f8f8cSSatish Balay 
1644a30f8f8cSSatish Balay   for (i=0; i<mbs; i++) {
1645a30f8f8cSSatish Balay     rvals[0] = bs*(baij->rstart + i);
1646a30f8f8cSSatish Balay     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1647a30f8f8cSSatish Balay     for (j=ai[i]; j<ai[i+1]; j++) {
1648a30f8f8cSSatish Balay       col = (baij->cstart+aj[j])*bs;
1649a30f8f8cSSatish Balay       for (k=0; k<bs; k++) {
1650a30f8f8cSSatish Balay         ierr = MatSetValues_MPISBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1651a30f8f8cSSatish Balay         col++; a += bs;
1652a30f8f8cSSatish Balay       }
1653a30f8f8cSSatish Balay     }
1654a30f8f8cSSatish Balay   }
1655a30f8f8cSSatish Balay   /* copy over the B part */
1656a30f8f8cSSatish Balay   Aloc = (Mat_SeqBAIJ*)baij->B->data;
1657a30f8f8cSSatish Balay   ai = Aloc->i; aj = Aloc->j; a = Aloc->a;
1658a30f8f8cSSatish Balay   for (i=0; i<mbs; i++) {
1659a30f8f8cSSatish Balay     rvals[0] = bs*(baij->rstart + i);
1660a30f8f8cSSatish Balay     for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
1661a30f8f8cSSatish Balay     for (j=ai[i]; j<ai[i+1]; j++) {
1662a30f8f8cSSatish Balay       col = baij->garray[aj[j]]*bs;
1663a30f8f8cSSatish Balay       for (k=0; k<bs; k++) {
1664a30f8f8cSSatish Balay         ierr = MatSetValues_MPISBAIJ_MatScalar(B,1,&col,bs,rvals,a,INSERT_VALUES);CHKERRQ(ierr);
1665a30f8f8cSSatish Balay         col++; a += bs;
1666a30f8f8cSSatish Balay       }
1667a30f8f8cSSatish Balay     }
1668a30f8f8cSSatish Balay   }
1669a30f8f8cSSatish Balay   ierr = PetscFree(rvals);CHKERRQ(ierr);
1670a30f8f8cSSatish Balay   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1671a30f8f8cSSatish Balay   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1672a30f8f8cSSatish Balay 
1673a30f8f8cSSatish Balay   if (matout) {
1674a30f8f8cSSatish Balay     *matout = B;
1675a30f8f8cSSatish Balay   } else {
1676a30f8f8cSSatish Balay     PetscOps *Abops;
1677a30f8f8cSSatish Balay     MatOps   Aops;
1678a30f8f8cSSatish Balay 
1679a30f8f8cSSatish Balay     /* This isn't really an in-place transpose .... but free data structures from baij */
1680a30f8f8cSSatish Balay     ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
1681a30f8f8cSSatish Balay     ierr = MatDestroy(baij->A);CHKERRQ(ierr);
1682a30f8f8cSSatish Balay     ierr = MatDestroy(baij->B);CHKERRQ(ierr);
1683a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE)
1684a30f8f8cSSatish Balay     if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
1685a30f8f8cSSatish Balay #else
1686a30f8f8cSSatish Balay     if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
1687a30f8f8cSSatish Balay #endif
1688a30f8f8cSSatish Balay     if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
1689a30f8f8cSSatish Balay     if (baij->lvec) {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
1690a30f8f8cSSatish Balay     if (baij->Mvctx) {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
1691a30f8f8cSSatish Balay     ierr = PetscFree(baij);CHKERRQ(ierr);
1692a30f8f8cSSatish Balay 
1693a30f8f8cSSatish Balay     /*
1694a30f8f8cSSatish Balay        This is horrible, horrible code. We need to keep the
1695a30f8f8cSSatish Balay       A pointers for the bops and ops but copy everything
1696a30f8f8cSSatish Balay       else from C.
1697a30f8f8cSSatish Balay     */
1698a30f8f8cSSatish Balay     Abops   = A->bops;
1699a30f8f8cSSatish Balay     Aops    = A->ops;
1700a30f8f8cSSatish Balay     ierr    = PetscMemcpy(A,B,sizeof(struct _p_Mat));CHKERRQ(ierr);
1701a30f8f8cSSatish Balay     A->bops = Abops;
1702a30f8f8cSSatish Balay     A->ops  = Aops;
1703a30f8f8cSSatish Balay 
1704a30f8f8cSSatish Balay     PetscHeaderDestroy(B);
1705a30f8f8cSSatish Balay   }
1706a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1707a30f8f8cSSatish Balay }
1708a30f8f8cSSatish Balay 
1709a30f8f8cSSatish Balay #undef __FUNC__
1710a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatDiagonalScale_MPISBAIJ"
1711a30f8f8cSSatish Balay int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1712a30f8f8cSSatish Balay {
1713a30f8f8cSSatish Balay   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1714a30f8f8cSSatish Balay   Mat         a = baij->A,b = baij->B;
1715a30f8f8cSSatish Balay   int         ierr,s1,s2,s3;
1716a30f8f8cSSatish Balay 
1717a30f8f8cSSatish Balay   PetscFunctionBegin;
1718a30f8f8cSSatish Balay   if (ll != rr) {
1719a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"For symmetric format, left and right scaling vectors must be same\n");
1720a30f8f8cSSatish Balay   }
1721a30f8f8cSSatish Balay   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1722a30f8f8cSSatish Balay   if (rr) {
1723a30f8f8cSSatish Balay     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1724a30f8f8cSSatish Balay     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,0,"right vector non-conforming local size");
1725a30f8f8cSSatish Balay     /* Overlap communication with computation. */
1726a30f8f8cSSatish Balay     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1727a30f8f8cSSatish Balay     /*} if (ll) { */
1728a30f8f8cSSatish Balay     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1729a30f8f8cSSatish Balay     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,0,"left vector non-conforming local size");
1730a30f8f8cSSatish Balay     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1731a30f8f8cSSatish Balay     /* } */
1732a30f8f8cSSatish Balay   /* scale  the diagonal block */
1733a30f8f8cSSatish Balay   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1734a30f8f8cSSatish Balay 
1735a30f8f8cSSatish Balay   /* if (rr) { */
1736a30f8f8cSSatish Balay     /* Do a scatter end and then right scale the off-diagonal block */
1737a30f8f8cSSatish Balay     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1738a30f8f8cSSatish Balay     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1739a30f8f8cSSatish Balay   }
1740a30f8f8cSSatish Balay 
1741a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1742a30f8f8cSSatish Balay }
1743a30f8f8cSSatish Balay 
1744a30f8f8cSSatish Balay #undef __FUNC__
1745a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatZeroRows_MPISBAIJ"
1746a30f8f8cSSatish Balay int MatZeroRows_MPISBAIJ(Mat A,IS is,Scalar *diag)
1747a30f8f8cSSatish Balay {
1748a30f8f8cSSatish Balay   Mat_MPISBAIJ    *l = (Mat_MPISBAIJ*)A->data;
1749a30f8f8cSSatish Balay   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
1750a30f8f8cSSatish Balay   int            *procs,*nprocs,j,found,idx,nsends,*work,row;
1751a30f8f8cSSatish Balay   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1752a30f8f8cSSatish Balay   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1753a30f8f8cSSatish Balay   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1754a30f8f8cSSatish Balay   MPI_Comm       comm = A->comm;
1755a30f8f8cSSatish Balay   MPI_Request    *send_waits,*recv_waits;
1756a30f8f8cSSatish Balay   MPI_Status     recv_status,*send_status;
1757a30f8f8cSSatish Balay   IS             istmp;
1758a30f8f8cSSatish Balay 
1759a30f8f8cSSatish Balay   PetscFunctionBegin;
1760a30f8f8cSSatish Balay   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
1761a30f8f8cSSatish Balay   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1762a30f8f8cSSatish Balay 
1763a30f8f8cSSatish Balay   /*  first count number of contributors to each processor */
1764a30f8f8cSSatish Balay   nprocs = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(nprocs);
1765a30f8f8cSSatish Balay   ierr   = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
1766a30f8f8cSSatish Balay   procs  = nprocs + size;
1767a30f8f8cSSatish Balay   owner  = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(owner); /* see note*/
1768a30f8f8cSSatish Balay   for (i=0; i<N; i++) {
1769a30f8f8cSSatish Balay     idx   = rows[i];
1770a30f8f8cSSatish Balay     found = 0;
1771a30f8f8cSSatish Balay     for (j=0; j<size; j++) {
1772a30f8f8cSSatish Balay       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1773a30f8f8cSSatish Balay         nprocs[j]++; procs[j] = 1; owner[i] = j; found = 1; break;
1774a30f8f8cSSatish Balay       }
1775a30f8f8cSSatish Balay     }
1776a30f8f8cSSatish Balay     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Index out of range");
1777a30f8f8cSSatish Balay   }
1778a30f8f8cSSatish Balay   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
1779a30f8f8cSSatish Balay 
1780a30f8f8cSSatish Balay   /* inform other processors of number of messages and max length*/
1781a30f8f8cSSatish Balay   work   = (int*)PetscMalloc(2*size*sizeof(int));CHKPTRQ(work);
1782a30f8f8cSSatish Balay   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
1783a30f8f8cSSatish Balay   nmax   = work[rank];
1784a30f8f8cSSatish Balay   nrecvs = work[size+rank];
1785a30f8f8cSSatish Balay   ierr = PetscFree(work);CHKERRQ(ierr);
1786a30f8f8cSSatish Balay 
1787a30f8f8cSSatish Balay   /* post receives:   */
1788a30f8f8cSSatish Balay   rvalues = (int*)PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int));CHKPTRQ(rvalues);
1789a30f8f8cSSatish Balay   recv_waits = (MPI_Request*)PetscMalloc((nrecvs+1)*sizeof(MPI_Request));CHKPTRQ(recv_waits);
1790a30f8f8cSSatish Balay   for (i=0; i<nrecvs; i++) {
1791a30f8f8cSSatish Balay     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1792a30f8f8cSSatish Balay   }
1793a30f8f8cSSatish Balay 
1794a30f8f8cSSatish Balay   /* do sends:
1795a30f8f8cSSatish Balay      1) starts[i] gives the starting index in svalues for stuff going to
1796a30f8f8cSSatish Balay      the ith processor
1797a30f8f8cSSatish Balay   */
1798a30f8f8cSSatish Balay   svalues    = (int*)PetscMalloc((N+1)*sizeof(int));CHKPTRQ(svalues);
1799a30f8f8cSSatish Balay   send_waits = (MPI_Request*)PetscMalloc((nsends+1)*sizeof(MPI_Request));CHKPTRQ(send_waits);
1800a30f8f8cSSatish Balay   starts     = (int*)PetscMalloc((size+1)*sizeof(int));CHKPTRQ(starts);
1801a30f8f8cSSatish Balay   starts[0]  = 0;
1802a30f8f8cSSatish Balay   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1803a30f8f8cSSatish Balay   for (i=0; i<N; i++) {
1804a30f8f8cSSatish Balay     svalues[starts[owner[i]]++] = rows[i];
1805a30f8f8cSSatish Balay   }
1806a30f8f8cSSatish Balay   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
1807a30f8f8cSSatish Balay 
1808a30f8f8cSSatish Balay   starts[0] = 0;
1809a30f8f8cSSatish Balay   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1810a30f8f8cSSatish Balay   count = 0;
1811a30f8f8cSSatish Balay   for (i=0; i<size; i++) {
1812a30f8f8cSSatish Balay     if (procs[i]) {
1813a30f8f8cSSatish Balay       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1814a30f8f8cSSatish Balay     }
1815a30f8f8cSSatish Balay   }
1816a30f8f8cSSatish Balay   ierr = PetscFree(starts);CHKERRQ(ierr);
1817a30f8f8cSSatish Balay 
1818a30f8f8cSSatish Balay   base = owners[rank]*bs;
1819a30f8f8cSSatish Balay 
1820a30f8f8cSSatish Balay   /*  wait on receives */
1821a30f8f8cSSatish Balay   lens   = (int*)PetscMalloc(2*(nrecvs+1)*sizeof(int));CHKPTRQ(lens);
1822a30f8f8cSSatish Balay   source = lens + nrecvs;
1823a30f8f8cSSatish Balay   count  = nrecvs; slen = 0;
1824a30f8f8cSSatish Balay   while (count) {
1825a30f8f8cSSatish Balay     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1826a30f8f8cSSatish Balay     /* unpack receives into our local space */
1827a30f8f8cSSatish Balay     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1828a30f8f8cSSatish Balay     source[imdex]  = recv_status.MPI_SOURCE;
1829a30f8f8cSSatish Balay     lens[imdex]    = n;
1830a30f8f8cSSatish Balay     slen          += n;
1831a30f8f8cSSatish Balay     count--;
1832a30f8f8cSSatish Balay   }
1833a30f8f8cSSatish Balay   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1834a30f8f8cSSatish Balay 
1835a30f8f8cSSatish Balay   /* move the data into the send scatter */
1836a30f8f8cSSatish Balay   lrows = (int*)PetscMalloc((slen+1)*sizeof(int));CHKPTRQ(lrows);
1837a30f8f8cSSatish Balay   count = 0;
1838a30f8f8cSSatish Balay   for (i=0; i<nrecvs; i++) {
1839a30f8f8cSSatish Balay     values = rvalues + i*nmax;
1840a30f8f8cSSatish Balay     for (j=0; j<lens[i]; j++) {
1841a30f8f8cSSatish Balay       lrows[count++] = values[j] - base;
1842a30f8f8cSSatish Balay     }
1843a30f8f8cSSatish Balay   }
1844a30f8f8cSSatish Balay   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1845a30f8f8cSSatish Balay   ierr = PetscFree(lens);CHKERRQ(ierr);
1846a30f8f8cSSatish Balay   ierr = PetscFree(owner);CHKERRQ(ierr);
1847a30f8f8cSSatish Balay   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1848a30f8f8cSSatish Balay 
1849a30f8f8cSSatish Balay   /* actually zap the local rows */
1850a30f8f8cSSatish Balay   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1851a30f8f8cSSatish Balay   PLogObjectParent(A,istmp);
1852a30f8f8cSSatish Balay 
1853a30f8f8cSSatish Balay   /*
1854a30f8f8cSSatish Balay         Zero the required rows. If the "diagonal block" of the matrix
1855a30f8f8cSSatish Balay      is square and the user wishes to set the diagonal we use seperate
1856a30f8f8cSSatish Balay      code so that MatSetValues() is not called for each diagonal allocating
1857a30f8f8cSSatish Balay      new memory, thus calling lots of mallocs and slowing things down.
1858a30f8f8cSSatish Balay 
1859a30f8f8cSSatish Balay        Contributed by: Mathew Knepley
1860a30f8f8cSSatish Balay   */
1861a30f8f8cSSatish Balay   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1862c8407628SSatish Balay   ierr = MatZeroRows_SeqSBAIJ(l->B,istmp,0);CHKERRQ(ierr);
1863a30f8f8cSSatish Balay   if (diag && (l->A->M == l->A->N)) {
1864a30f8f8cSSatish Balay     ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,diag);CHKERRQ(ierr);
1865a30f8f8cSSatish Balay   } else if (diag) {
1866a30f8f8cSSatish Balay     ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1867a30f8f8cSSatish Balay     if (((Mat_SeqSBAIJ*)l->A->data)->nonew) {
1868a30f8f8cSSatish Balay       SETERRQ(PETSC_ERR_SUP,0,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1869a30f8f8cSSatish Balay MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1870a30f8f8cSSatish Balay     }
1871a30f8f8cSSatish Balay     for (i=0; i<slen; i++) {
1872a30f8f8cSSatish Balay       row  = lrows[i] + rstart_bs;
1873a30f8f8cSSatish Balay       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1874a30f8f8cSSatish Balay     }
1875a30f8f8cSSatish Balay     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1876a30f8f8cSSatish Balay     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1877a30f8f8cSSatish Balay   } else {
1878a30f8f8cSSatish Balay     ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1879a30f8f8cSSatish Balay   }
1880a30f8f8cSSatish Balay 
1881a30f8f8cSSatish Balay   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1882a30f8f8cSSatish Balay   ierr = PetscFree(lrows);CHKERRQ(ierr);
1883a30f8f8cSSatish Balay 
1884a30f8f8cSSatish Balay   /* wait on sends */
1885a30f8f8cSSatish Balay   if (nsends) {
1886a30f8f8cSSatish Balay     send_status = (MPI_Status*)PetscMalloc(nsends*sizeof(MPI_Status));CHKPTRQ(send_status);
1887a30f8f8cSSatish Balay     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1888a30f8f8cSSatish Balay     ierr        = PetscFree(send_status);CHKERRQ(ierr);
1889a30f8f8cSSatish Balay   }
1890a30f8f8cSSatish Balay   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1891a30f8f8cSSatish Balay   ierr = PetscFree(svalues);CHKERRQ(ierr);
1892a30f8f8cSSatish Balay 
1893a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1894a30f8f8cSSatish Balay }
1895a30f8f8cSSatish Balay 
1896a30f8f8cSSatish Balay #undef __FUNC__
1897a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatPrintHelp_MPISBAIJ"
1898a30f8f8cSSatish Balay int MatPrintHelp_MPISBAIJ(Mat A)
1899a30f8f8cSSatish Balay {
1900a30f8f8cSSatish Balay   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1901a30f8f8cSSatish Balay   MPI_Comm    comm = A->comm;
1902a30f8f8cSSatish Balay   static int  called = 0;
1903a30f8f8cSSatish Balay   int         ierr;
1904a30f8f8cSSatish Balay 
1905a30f8f8cSSatish Balay   PetscFunctionBegin;
1906a30f8f8cSSatish Balay   if (!a->rank) {
1907a30f8f8cSSatish Balay     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1908a30f8f8cSSatish Balay   }
1909a30f8f8cSSatish Balay   if (called) {PetscFunctionReturn(0);} else called = 1;
1910a30f8f8cSSatish Balay   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1911a30f8f8cSSatish Balay   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1912a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1913a30f8f8cSSatish Balay }
1914a30f8f8cSSatish Balay 
1915a30f8f8cSSatish Balay #undef __FUNC__
1916a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatSetUnfactored_MPISBAIJ"
1917a30f8f8cSSatish Balay int MatSetUnfactored_MPISBAIJ(Mat A)
1918a30f8f8cSSatish Balay {
1919a30f8f8cSSatish Balay   Mat_MPIBAIJ *a   = (Mat_MPIBAIJ*)A->data;
1920a30f8f8cSSatish Balay   int         ierr;
1921a30f8f8cSSatish Balay 
1922a30f8f8cSSatish Balay   PetscFunctionBegin;
1923a30f8f8cSSatish Balay   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1924a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1925a30f8f8cSSatish Balay }
1926a30f8f8cSSatish Balay 
1927a30f8f8cSSatish Balay static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1928a30f8f8cSSatish Balay 
1929a30f8f8cSSatish Balay #undef __FUNC__
1930a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatEqual_MPISBAIJ"
1931a30f8f8cSSatish Balay int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1932a30f8f8cSSatish Balay {
1933a30f8f8cSSatish Balay   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1934a30f8f8cSSatish Balay   Mat         a,b,c,d;
1935a30f8f8cSSatish Balay   PetscTruth  flg;
1936a30f8f8cSSatish Balay   int         ierr;
1937a30f8f8cSSatish Balay 
1938a30f8f8cSSatish Balay   PetscFunctionBegin;
1939a30f8f8cSSatish Balay   if (B->type != MATMPISBAIJ) SETERRQ(PETSC_ERR_ARG_INCOMP,0,"Matrices must be same type");
1940a30f8f8cSSatish Balay   a = matA->A; b = matA->B;
1941a30f8f8cSSatish Balay   c = matB->A; d = matB->B;
1942a30f8f8cSSatish Balay 
1943a30f8f8cSSatish Balay   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1944a30f8f8cSSatish Balay   if (flg == PETSC_TRUE) {
1945a30f8f8cSSatish Balay     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1946a30f8f8cSSatish Balay   }
1947a30f8f8cSSatish Balay   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1948a30f8f8cSSatish Balay   PetscFunctionReturn(0);
1949a30f8f8cSSatish Balay }
1950a30f8f8cSSatish Balay 
1951a30f8f8cSSatish Balay /* -------------------------------------------------------------------*/
1952a30f8f8cSSatish Balay static struct _MatOps MatOps_Values = {
1953a30f8f8cSSatish Balay   MatSetValues_MPISBAIJ,
1954a30f8f8cSSatish Balay   MatGetRow_MPISBAIJ,
1955a30f8f8cSSatish Balay   MatRestoreRow_MPISBAIJ,
1956a30f8f8cSSatish Balay   MatMult_MPISBAIJ,
1957a30f8f8cSSatish Balay   MatMultAdd_MPISBAIJ,
1958a30f8f8cSSatish Balay   MatMultTranspose_MPISBAIJ,
1959a30f8f8cSSatish Balay   MatMultTransposeAdd_MPISBAIJ,
1960a30f8f8cSSatish Balay   0,
1961a30f8f8cSSatish Balay   0,
1962a30f8f8cSSatish Balay   0,
1963a30f8f8cSSatish Balay   0,
1964a30f8f8cSSatish Balay   0,
1965a30f8f8cSSatish Balay   0,
1966a30f8f8cSSatish Balay   0,
1967a30f8f8cSSatish Balay   MatTranspose_MPISBAIJ,
1968a30f8f8cSSatish Balay   MatGetInfo_MPISBAIJ,
1969a30f8f8cSSatish Balay   MatEqual_MPISBAIJ,
1970a30f8f8cSSatish Balay   MatGetDiagonal_MPISBAIJ,
1971a30f8f8cSSatish Balay   MatDiagonalScale_MPISBAIJ,
1972a30f8f8cSSatish Balay   MatNorm_MPISBAIJ,
1973a30f8f8cSSatish Balay   MatAssemblyBegin_MPISBAIJ,
1974a30f8f8cSSatish Balay   MatAssemblyEnd_MPISBAIJ,
1975a30f8f8cSSatish Balay   0,
1976a30f8f8cSSatish Balay   MatSetOption_MPISBAIJ,
1977a30f8f8cSSatish Balay   MatZeroEntries_MPISBAIJ,
1978a30f8f8cSSatish Balay   MatZeroRows_MPISBAIJ,
1979a30f8f8cSSatish Balay   0,
1980a30f8f8cSSatish Balay   0,
1981a30f8f8cSSatish Balay   0,
1982a30f8f8cSSatish Balay   0,
1983a30f8f8cSSatish Balay   MatGetSize_MPISBAIJ,
1984a30f8f8cSSatish Balay   MatGetLocalSize_MPISBAIJ,
1985a30f8f8cSSatish Balay   MatGetOwnershipRange_MPISBAIJ,
1986a30f8f8cSSatish Balay   0,
1987a30f8f8cSSatish Balay   0,
1988a30f8f8cSSatish Balay   0,
1989a30f8f8cSSatish Balay   0,
1990a30f8f8cSSatish Balay   MatDuplicate_MPISBAIJ,
1991a30f8f8cSSatish Balay   0,
1992a30f8f8cSSatish Balay   0,
1993a30f8f8cSSatish Balay   0,
1994a30f8f8cSSatish Balay   0,
1995a30f8f8cSSatish Balay   0,
1996a30f8f8cSSatish Balay   MatGetSubMatrices_MPISBAIJ,
1997a30f8f8cSSatish Balay   MatIncreaseOverlap_MPISBAIJ,
1998a30f8f8cSSatish Balay   MatGetValues_MPISBAIJ,
1999a30f8f8cSSatish Balay   0,
2000a30f8f8cSSatish Balay   MatPrintHelp_MPISBAIJ,
2001a30f8f8cSSatish Balay   MatScale_MPISBAIJ,
2002a30f8f8cSSatish Balay   0,
2003a30f8f8cSSatish Balay   0,
2004a30f8f8cSSatish Balay   0,
2005a30f8f8cSSatish Balay   MatGetBlockSize_MPISBAIJ,
2006a30f8f8cSSatish Balay   0,
2007a30f8f8cSSatish Balay   0,
2008a30f8f8cSSatish Balay   0,
2009a30f8f8cSSatish Balay   0,
2010a30f8f8cSSatish Balay   0,
2011a30f8f8cSSatish Balay   0,
2012a30f8f8cSSatish Balay   MatSetUnfactored_MPISBAIJ,
2013a30f8f8cSSatish Balay   0,
2014a30f8f8cSSatish Balay   MatSetValuesBlocked_MPISBAIJ,
2015a30f8f8cSSatish Balay   0,
2016a30f8f8cSSatish Balay   0,
2017a30f8f8cSSatish Balay   0,
2018a30f8f8cSSatish Balay   MatGetMaps_Petsc};
2019a30f8f8cSSatish Balay 
2020a30f8f8cSSatish Balay 
2021a30f8f8cSSatish Balay EXTERN_C_BEGIN
2022a30f8f8cSSatish Balay #undef __FUNC__
2023a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatGetDiagonalBlock_MPISBAIJ"
2024a30f8f8cSSatish Balay int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
2025a30f8f8cSSatish Balay {
2026a30f8f8cSSatish Balay   PetscFunctionBegin;
2027a30f8f8cSSatish Balay   *a      = ((Mat_MPISBAIJ *)A->data)->A;
2028a30f8f8cSSatish Balay   *iscopy = PETSC_FALSE;
2029a30f8f8cSSatish Balay   PetscFunctionReturn(0);
2030a30f8f8cSSatish Balay }
2031a30f8f8cSSatish Balay EXTERN_C_END
2032a30f8f8cSSatish Balay 
2033a30f8f8cSSatish Balay #undef __FUNC__
2034a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatCreateMPISBAIJ"
2035a30f8f8cSSatish Balay /*@C
2036a30f8f8cSSatish Balay    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2037a30f8f8cSSatish Balay    (block compressed row).  For good matrix assembly performance
2038a30f8f8cSSatish Balay    the user should preallocate the matrix storage by setting the parameters
2039a30f8f8cSSatish Balay    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2040a30f8f8cSSatish Balay    performance can be increased by more than a factor of 50.
2041a30f8f8cSSatish Balay 
2042a30f8f8cSSatish Balay    Collective on MPI_Comm
2043a30f8f8cSSatish Balay 
2044a30f8f8cSSatish Balay    Input Parameters:
2045a30f8f8cSSatish Balay +  comm - MPI communicator
2046a30f8f8cSSatish Balay .  bs   - size of blockk
2047a30f8f8cSSatish Balay .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2048a30f8f8cSSatish Balay            This value should be the same as the local size used in creating the
2049a30f8f8cSSatish Balay            y vector for the matrix-vector product y = Ax.
2050a30f8f8cSSatish Balay .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2051a30f8f8cSSatish Balay            This value should be the same as the local size used in creating the
2052a30f8f8cSSatish Balay            x vector for the matrix-vector product y = Ax.
2053a30f8f8cSSatish Balay .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2054a30f8f8cSSatish Balay .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2055a30f8f8cSSatish Balay .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2056a30f8f8cSSatish Balay            submatrix  (same for all local rows)
2057a30f8f8cSSatish Balay .  d_nnz - array containing the number of block nonzeros in the various block rows
2058a30f8f8cSSatish Balay            of the in diagonal portion of the local (possibly different for each block
2059a30f8f8cSSatish Balay            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
2060a30f8f8cSSatish Balay .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2061a30f8f8cSSatish Balay            submatrix (same for all local rows).
2062a30f8f8cSSatish Balay -  o_nnz - array containing the number of nonzeros in the various block rows of the
2063a30f8f8cSSatish Balay            off-diagonal portion of the local submatrix (possibly different for
2064a30f8f8cSSatish Balay            each block row) or PETSC_NULL.
2065a30f8f8cSSatish Balay 
2066a30f8f8cSSatish Balay    Output Parameter:
2067a30f8f8cSSatish Balay .  A - the matrix
2068a30f8f8cSSatish Balay 
2069a30f8f8cSSatish Balay    Options Database Keys:
2070a30f8f8cSSatish Balay .   -mat_no_unroll - uses code that does not unroll the loops in the
2071a30f8f8cSSatish Balay                      block calculations (much slower)
2072a30f8f8cSSatish Balay .   -mat_block_size - size of the blocks to use
2073a30f8f8cSSatish Balay .   -mat_mpi - use the parallel matrix data structures even on one processor
2074a30f8f8cSSatish Balay                (defaults to using SeqBAIJ format on one processor)
2075a30f8f8cSSatish Balay 
2076a30f8f8cSSatish Balay    Notes:
2077a30f8f8cSSatish Balay    The user MUST specify either the local or global matrix dimensions
2078a30f8f8cSSatish Balay    (possibly both).
2079a30f8f8cSSatish Balay 
2080a30f8f8cSSatish Balay    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2081a30f8f8cSSatish Balay    than it must be used on all processors that share the object for that argument.
2082a30f8f8cSSatish Balay 
2083a30f8f8cSSatish Balay    Storage Information:
2084a30f8f8cSSatish Balay    For a square global matrix we define each processor's diagonal portion
2085a30f8f8cSSatish Balay    to be its local rows and the corresponding columns (a square submatrix);
2086a30f8f8cSSatish Balay    each processor's off-diagonal portion encompasses the remainder of the
2087a30f8f8cSSatish Balay    local matrix (a rectangular submatrix).
2088a30f8f8cSSatish Balay 
2089a30f8f8cSSatish Balay    The user can specify preallocated storage for the diagonal part of
2090a30f8f8cSSatish Balay    the local submatrix with either d_nz or d_nnz (not both).  Set
2091a30f8f8cSSatish Balay    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2092a30f8f8cSSatish Balay    memory allocation.  Likewise, specify preallocated storage for the
2093a30f8f8cSSatish Balay    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2094a30f8f8cSSatish Balay 
2095a30f8f8cSSatish Balay    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2096a30f8f8cSSatish Balay    the figure below we depict these three local rows and all columns (0-11).
2097a30f8f8cSSatish Balay 
2098a30f8f8cSSatish Balay .vb
2099a30f8f8cSSatish Balay            0 1 2 3 4 5 6 7 8 9 10 11
2100a30f8f8cSSatish Balay           -------------------
2101a30f8f8cSSatish Balay    row 3  |  o o o d d d o o o o o o
2102a30f8f8cSSatish Balay    row 4  |  o o o d d d o o o o o o
2103a30f8f8cSSatish Balay    row 5  |  o o o d d d o o o o o o
2104a30f8f8cSSatish Balay           -------------------
2105a30f8f8cSSatish Balay .ve
2106a30f8f8cSSatish Balay 
2107a30f8f8cSSatish Balay    Thus, any entries in the d locations are stored in the d (diagonal)
2108a30f8f8cSSatish Balay    submatrix, and any entries in the o locations are stored in the
2109a30f8f8cSSatish Balay    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
2110a30f8f8cSSatish Balay    stored simply in the MATSEQBAIJ format for compressed row storage.
2111a30f8f8cSSatish Balay 
2112a30f8f8cSSatish Balay    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
2113a30f8f8cSSatish Balay    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2114a30f8f8cSSatish Balay    In general, for PDE problems in which most nonzeros are near the diagonal,
2115a30f8f8cSSatish Balay    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2116a30f8f8cSSatish Balay    or you will get TERRIBLE performance; see the users' manual chapter on
2117a30f8f8cSSatish Balay    matrices.
2118a30f8f8cSSatish Balay 
2119a30f8f8cSSatish Balay    Level: intermediate
2120a30f8f8cSSatish Balay 
2121a30f8f8cSSatish Balay .keywords: matrix, block, aij, compressed row, sparse, parallel
2122a30f8f8cSSatish Balay 
2123a30f8f8cSSatish Balay .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPISBAIJ()
2124a30f8f8cSSatish Balay @*/
2125a30f8f8cSSatish Balay 
2126a30f8f8cSSatish Balay int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
2127a30f8f8cSSatish Balay {
2128a30f8f8cSSatish Balay   Mat          B;
2129a30f8f8cSSatish Balay   Mat_MPISBAIJ  *b;
2130f65c83cfSHong Zhang   int          ierr,i,sum[1],work[1],mbs,Mbs=PETSC_DECIDE,size;
2131a30f8f8cSSatish Balay   PetscTruth   flag1,flag2,flg;
2132a30f8f8cSSatish Balay 
2133a30f8f8cSSatish Balay   PetscFunctionBegin;
2134a30f8f8cSSatish Balay   if (M != N || m != n){ /* N and n are not used after this */
2135a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"For symmetric format, set M=N and m=n");
2136a30f8f8cSSatish Balay   }
2137a30f8f8cSSatish Balay   ierr = OptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2138a30f8f8cSSatish Balay 
2139a30f8f8cSSatish Balay   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Invalid block size specified, must be positive");
2140a30f8f8cSSatish Balay   if (d_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nz cannot be less than -2: value %d",d_nz);
2141a30f8f8cSSatish Balay   if (o_nz < -2) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nz cannot be less than -2: value %d",o_nz);
2142a30f8f8cSSatish Balay   if (d_nnz) {
2143a30f8f8cSSatish Balay     for (i=0; i<m/bs; i++) {
2144a30f8f8cSSatish Balay       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
2145a30f8f8cSSatish Balay     }
2146a30f8f8cSSatish Balay   }
2147a30f8f8cSSatish Balay   if (o_nnz) {
2148a30f8f8cSSatish Balay     for (i=0; i<m/bs; i++) {
2149a30f8f8cSSatish Balay       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,0,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
2150a30f8f8cSSatish Balay     }
2151a30f8f8cSSatish Balay   }
2152a30f8f8cSSatish Balay 
2153a30f8f8cSSatish Balay   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2154a30f8f8cSSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-mat_mpisbaij",&flag1);CHKERRQ(ierr);
2155a30f8f8cSSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-mat_mpi",&flag2);CHKERRQ(ierr);
2156a30f8f8cSSatish Balay   if (!flag1 && !flag2 && size == 1) {
2157a30f8f8cSSatish Balay     if (M == PETSC_DECIDE) M = m;
2158a30f8f8cSSatish Balay     ierr = MatCreateSeqSBAIJ(comm,bs,M,M,d_nz,d_nnz,A);CHKERRQ(ierr);
2159a30f8f8cSSatish Balay     PetscFunctionReturn(0);
2160a30f8f8cSSatish Balay   }
2161a30f8f8cSSatish Balay 
2162a30f8f8cSSatish Balay   *A = 0;
2163a30f8f8cSSatish Balay   PetscHeaderCreate(B,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPISBAIJ,"Mat",comm,MatDestroy,MatView);
2164a30f8f8cSSatish Balay   PLogObjectCreate(B);
2165a30f8f8cSSatish Balay   B->data = (void*)(b = PetscNew(Mat_MPISBAIJ));CHKPTRQ(b);
2166a30f8f8cSSatish Balay   ierr    = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
2167a30f8f8cSSatish Balay   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2168a30f8f8cSSatish Balay 
2169a30f8f8cSSatish Balay   B->ops->destroy    = MatDestroy_MPISBAIJ;
2170a30f8f8cSSatish Balay   B->ops->view       = MatView_MPISBAIJ;
2171a30f8f8cSSatish Balay   B->mapping    = 0;
2172a30f8f8cSSatish Balay   B->factor     = 0;
2173a30f8f8cSSatish Balay   B->assembled  = PETSC_FALSE;
2174a30f8f8cSSatish Balay 
2175a30f8f8cSSatish Balay   B->insertmode = NOT_SET_VALUES;
2176a30f8f8cSSatish Balay   ierr = MPI_Comm_rank(comm,&b->rank);CHKERRQ(ierr);
2177a30f8f8cSSatish Balay   ierr = MPI_Comm_size(comm,&b->size);CHKERRQ(ierr);
2178a30f8f8cSSatish Balay 
2179a30f8f8cSSatish Balay   if (m == PETSC_DECIDE && (d_nnz || o_nnz)) {
2180a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"Cannot have PETSC_DECIDE rows but set d_nnz or o_nnz");
2181a30f8f8cSSatish Balay   }
2182a30f8f8cSSatish Balay   if (M == PETSC_DECIDE && m == PETSC_DECIDE) {
2183a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,0,"either M or m should be specified");
2184a30f8f8cSSatish Balay   }
2185a30f8f8cSSatish Balay   if (M != PETSC_DECIDE && m != PETSC_DECIDE) M = PETSC_DECIDE;
2186a30f8f8cSSatish Balay 
2187a30f8f8cSSatish Balay   if (M == PETSC_DECIDE) {
2188a30f8f8cSSatish Balay     work[0] = m; mbs = m/bs;
2189a30f8f8cSSatish Balay     ierr = MPI_Allreduce(work,sum,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr);
2190a30f8f8cSSatish Balay     M = sum[0]; Mbs = M/bs;
2191a30f8f8cSSatish Balay   } else { /* M is specified */
2192a30f8f8cSSatish Balay     Mbs = M/bs;
2193a30f8f8cSSatish Balay     if (Mbs*bs != M) SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of global rows must be divisible by blocksize");
2194a30f8f8cSSatish Balay     mbs = Mbs/b->size + ((Mbs % b->size) > b->rank);
2195a30f8f8cSSatish Balay     m   = mbs*bs;
2196a30f8f8cSSatish Balay   }
2197a30f8f8cSSatish Balay 
2198a30f8f8cSSatish Balay   if (mbs*bs != m) {
2199a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_ARG_SIZ,0,"No of local rows/cols must be divisible by blocksize");
2200a30f8f8cSSatish Balay   }
2201a30f8f8cSSatish Balay 
2202a30f8f8cSSatish Balay   b->m = m; B->m = m;
2203a30f8f8cSSatish Balay   b->n = m; B->n = m;
2204a30f8f8cSSatish Balay   b->N = M; B->N = M;
2205a30f8f8cSSatish Balay   b->M = M;
2206a30f8f8cSSatish Balay   B->M = M;
2207a30f8f8cSSatish Balay   b->bs  = bs;
2208a30f8f8cSSatish Balay   b->bs2 = bs*bs;
2209a30f8f8cSSatish Balay   b->mbs = mbs;
2210a30f8f8cSSatish Balay   b->nbs = mbs;
2211a30f8f8cSSatish Balay   b->Mbs = Mbs;
2212a30f8f8cSSatish Balay   b->Nbs = Mbs;
2213a30f8f8cSSatish Balay 
2214a30f8f8cSSatish Balay   /* the information in the maps duplicates the information computed below, eventually
2215a30f8f8cSSatish Balay      we should remove the duplicate information that is not contained in the maps */
2216a30f8f8cSSatish Balay   ierr = MapCreateMPI(B->comm,m,M,&B->rmap);CHKERRQ(ierr);
2217a30f8f8cSSatish Balay   ierr = MapCreateMPI(B->comm,m,M,&B->cmap);CHKERRQ(ierr);
2218a30f8f8cSSatish Balay 
2219a30f8f8cSSatish Balay   /* build local table of row and column ownerships */
2220a30f8f8cSSatish Balay   b->rowners = (int*)PetscMalloc(3*(b->size+2)*sizeof(int));CHKPTRQ(b->rowners);
2221a30f8f8cSSatish Balay   PLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
2222a30f8f8cSSatish Balay   b->cowners    = b->rowners + b->size + 2;
2223a30f8f8cSSatish Balay   b->rowners_bs = b->cowners + b->size + 2;
2224a30f8f8cSSatish Balay   ierr = MPI_Allgather(&mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2225a30f8f8cSSatish Balay   b->rowners[0]    = 0;
2226a30f8f8cSSatish Balay   for (i=2; i<=b->size; i++) {
2227a30f8f8cSSatish Balay     b->rowners[i] += b->rowners[i-1];
2228a30f8f8cSSatish Balay   }
2229a30f8f8cSSatish Balay   for (i=0; i<=b->size; i++) {
2230a30f8f8cSSatish Balay     b->rowners_bs[i] = b->rowners[i]*bs;
2231a30f8f8cSSatish Balay   }
2232a30f8f8cSSatish Balay   b->rstart    = b->rowners[b->rank];
2233a30f8f8cSSatish Balay   b->rend      = b->rowners[b->rank+1];
2234a30f8f8cSSatish Balay   b->rstart_bs = b->rstart * bs;
2235a30f8f8cSSatish Balay   b->rend_bs   = b->rend * bs;
2236a30f8f8cSSatish Balay 
2237a30f8f8cSSatish Balay   b->cstart    = b->rstart;
2238a30f8f8cSSatish Balay   b->cend      = b->rend;
2239a30f8f8cSSatish Balay   b->cstart_bs = b->cstart * bs;
2240a30f8f8cSSatish Balay   b->cend_bs   = b->cend * bs;
2241a30f8f8cSSatish Balay 
2242a30f8f8cSSatish Balay 
2243a30f8f8cSSatish Balay   if (d_nz == PETSC_DEFAULT) d_nz = 5;
2244a30f8f8cSSatish Balay   ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,m,m,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
2245a30f8f8cSSatish Balay   PLogObjectParent(B,b->A);
2246a30f8f8cSSatish Balay   if (o_nz == PETSC_DEFAULT) o_nz = 0;
2247a30f8f8cSSatish Balay   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,m,M,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
2248a30f8f8cSSatish Balay   PLogObjectParent(B,b->B);
2249a30f8f8cSSatish Balay 
2250a30f8f8cSSatish Balay   /* build cache for off array entries formed */
2251a30f8f8cSSatish Balay   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
2252a30f8f8cSSatish Balay   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
2253a30f8f8cSSatish Balay   b->donotstash  = PETSC_FALSE;
2254a30f8f8cSSatish Balay   b->colmap      = PETSC_NULL;
2255a30f8f8cSSatish Balay   b->garray      = PETSC_NULL;
2256a30f8f8cSSatish Balay   b->roworiented = PETSC_TRUE;
2257a30f8f8cSSatish Balay 
2258a30f8f8cSSatish Balay #if defined(PEYSC_USE_MAT_SINGLE)
2259a30f8f8cSSatish Balay   /* stuff for MatSetValues_XXX in single precision */
2260a30f8f8cSSatish Balay   b->lensetvalues     = 0;
2261a30f8f8cSSatish Balay   b->setvaluescopy    = PETSC_NULL;
2262a30f8f8cSSatish Balay #endif
2263a30f8f8cSSatish Balay 
2264a30f8f8cSSatish Balay   /* stuff used in block assembly */
2265a30f8f8cSSatish Balay   b->barray       = 0;
2266a30f8f8cSSatish Balay 
2267a30f8f8cSSatish Balay   /* stuff used for matrix vector multiply */
2268a30f8f8cSSatish Balay   b->lvec         = 0;
2269a30f8f8cSSatish Balay   b->Mvctx        = 0;
2270a30f8f8cSSatish Balay 
2271a30f8f8cSSatish Balay   /* stuff for MatGetRow() */
2272a30f8f8cSSatish Balay   b->rowindices   = 0;
2273a30f8f8cSSatish Balay   b->rowvalues    = 0;
2274a30f8f8cSSatish Balay   b->getrowactive = PETSC_FALSE;
2275a30f8f8cSSatish Balay 
2276a30f8f8cSSatish Balay   /* hash table stuff */
2277a30f8f8cSSatish Balay   b->ht           = 0;
2278a30f8f8cSSatish Balay   b->hd           = 0;
2279a30f8f8cSSatish Balay   b->ht_size      = 0;
2280a30f8f8cSSatish Balay   b->ht_flag      = PETSC_FALSE;
2281a30f8f8cSSatish Balay   b->ht_fact      = 0;
2282a30f8f8cSSatish Balay   b->ht_total_ct  = 0;
2283a30f8f8cSSatish Balay   b->ht_insert_ct = 0;
2284a30f8f8cSSatish Balay 
2285a30f8f8cSSatish Balay   *A = B;
2286a30f8f8cSSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
2287a30f8f8cSSatish Balay   if (flg) {
2288a30f8f8cSSatish Balay     double fact = 1.39;
2289a30f8f8cSSatish Balay     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
2290a30f8f8cSSatish Balay     ierr = OptionsGetDouble(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
2291a30f8f8cSSatish Balay     if (fact <= 1.0) fact = 1.39;
2292a30f8f8cSSatish Balay     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
2293a30f8f8cSSatish Balay     PLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact);
2294a30f8f8cSSatish Balay   }
2295a30f8f8cSSatish Balay   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2296a30f8f8cSSatish Balay                                      "MatStoreValues_MPISBAIJ",
2297a30f8f8cSSatish Balay                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
2298a30f8f8cSSatish Balay   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2299a30f8f8cSSatish Balay                                      "MatRetrieveValues_MPISBAIJ",
2300a30f8f8cSSatish Balay                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
2301a30f8f8cSSatish Balay   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
2302a30f8f8cSSatish Balay                                      "MatGetDiagonalBlock_MPISBAIJ",
2303a30f8f8cSSatish Balay                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
2304a30f8f8cSSatish Balay   PetscFunctionReturn(0);
2305a30f8f8cSSatish Balay }
2306a30f8f8cSSatish Balay 
2307a30f8f8cSSatish Balay 
2308a30f8f8cSSatish Balay #undef __FUNC__
2309a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatDuplicate_MPISBAIJ"
2310a30f8f8cSSatish Balay static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2311a30f8f8cSSatish Balay {
2312a30f8f8cSSatish Balay   Mat         mat;
2313a30f8f8cSSatish Balay   Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2314a30f8f8cSSatish Balay   int         ierr,len=0;
2315a30f8f8cSSatish Balay   PetscTruth  flg;
2316a30f8f8cSSatish Balay 
2317a30f8f8cSSatish Balay   PetscFunctionBegin;
2318a30f8f8cSSatish Balay   *newmat       = 0;
2319a30f8f8cSSatish Balay   PetscHeaderCreate(mat,_p_Mat,struct _MatOps,MAT_COOKIE,MATMPISBAIJ,"Mat",matin->comm,MatDestroy,MatView);
2320a30f8f8cSSatish Balay   PLogObjectCreate(mat);
2321a30f8f8cSSatish Balay   mat->data         = (void*)(a = PetscNew(Mat_MPISBAIJ));CHKPTRQ(a);
2322a30f8f8cSSatish Balay   ierr              = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2323a30f8f8cSSatish Balay   mat->ops->destroy = MatDestroy_MPISBAIJ;
2324a30f8f8cSSatish Balay   mat->ops->view    = MatView_MPISBAIJ;
2325a30f8f8cSSatish Balay   mat->factor       = matin->factor;
2326a30f8f8cSSatish Balay   mat->assembled    = PETSC_TRUE;
2327a30f8f8cSSatish Balay   mat->insertmode   = NOT_SET_VALUES;
2328a30f8f8cSSatish Balay 
2329a30f8f8cSSatish Balay   a->m = mat->m   = oldmat->m;
2330a30f8f8cSSatish Balay   a->n = mat->n   = oldmat->n;
2331a30f8f8cSSatish Balay   a->M = mat->M   = oldmat->M;
2332a30f8f8cSSatish Balay   a->N = mat->N   = oldmat->N;
2333a30f8f8cSSatish Balay 
2334a30f8f8cSSatish Balay   a->bs  = oldmat->bs;
2335a30f8f8cSSatish Balay   a->bs2 = oldmat->bs2;
2336a30f8f8cSSatish Balay   a->mbs = oldmat->mbs;
2337a30f8f8cSSatish Balay   a->nbs = oldmat->nbs;
2338a30f8f8cSSatish Balay   a->Mbs = oldmat->Mbs;
2339a30f8f8cSSatish Balay   a->Nbs = oldmat->Nbs;
2340a30f8f8cSSatish Balay 
2341a30f8f8cSSatish Balay   a->rstart       = oldmat->rstart;
2342a30f8f8cSSatish Balay   a->rend         = oldmat->rend;
2343a30f8f8cSSatish Balay   a->cstart       = oldmat->cstart;
2344a30f8f8cSSatish Balay   a->cend         = oldmat->cend;
2345a30f8f8cSSatish Balay   a->size         = oldmat->size;
2346a30f8f8cSSatish Balay   a->rank         = oldmat->rank;
2347a30f8f8cSSatish Balay   a->donotstash   = oldmat->donotstash;
2348a30f8f8cSSatish Balay   a->roworiented  = oldmat->roworiented;
2349a30f8f8cSSatish Balay   a->rowindices   = 0;
2350a30f8f8cSSatish Balay   a->rowvalues    = 0;
2351a30f8f8cSSatish Balay   a->getrowactive = PETSC_FALSE;
2352a30f8f8cSSatish Balay   a->barray       = 0;
2353a30f8f8cSSatish Balay   a->rstart_bs    = oldmat->rstart_bs;
2354a30f8f8cSSatish Balay   a->rend_bs      = oldmat->rend_bs;
2355a30f8f8cSSatish Balay   a->cstart_bs    = oldmat->cstart_bs;
2356a30f8f8cSSatish Balay   a->cend_bs      = oldmat->cend_bs;
2357a30f8f8cSSatish Balay 
2358a30f8f8cSSatish Balay   /* hash table stuff */
2359a30f8f8cSSatish Balay   a->ht           = 0;
2360a30f8f8cSSatish Balay   a->hd           = 0;
2361a30f8f8cSSatish Balay   a->ht_size      = 0;
2362a30f8f8cSSatish Balay   a->ht_flag      = oldmat->ht_flag;
2363a30f8f8cSSatish Balay   a->ht_fact      = oldmat->ht_fact;
2364a30f8f8cSSatish Balay   a->ht_total_ct  = 0;
2365a30f8f8cSSatish Balay   a->ht_insert_ct = 0;
2366a30f8f8cSSatish Balay 
2367a30f8f8cSSatish Balay 
2368a30f8f8cSSatish Balay   a->rowners = (int*)PetscMalloc(3*(a->size+2)*sizeof(int));CHKPTRQ(a->rowners);
2369a30f8f8cSSatish Balay   PLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
2370a30f8f8cSSatish Balay   a->cowners    = a->rowners + a->size + 2;
2371a30f8f8cSSatish Balay   a->rowners_bs = a->cowners + a->size + 2;
2372a30f8f8cSSatish Balay   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2373a30f8f8cSSatish Balay   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2374a30f8f8cSSatish Balay   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2375a30f8f8cSSatish Balay   if (oldmat->colmap) {
2376a30f8f8cSSatish Balay #if defined (PETSC_USE_CTABLE)
2377a30f8f8cSSatish Balay   ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2378a30f8f8cSSatish Balay #else
2379a30f8f8cSSatish Balay     a->colmap = (int*)PetscMalloc((a->Nbs)*sizeof(int));CHKPTRQ(a->colmap);
2380a30f8f8cSSatish Balay     PLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2381a30f8f8cSSatish Balay     ierr      = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2382a30f8f8cSSatish Balay #endif
2383a30f8f8cSSatish Balay   } else a->colmap = 0;
2384a30f8f8cSSatish Balay   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2385a30f8f8cSSatish Balay     a->garray = (int*)PetscMalloc(len*sizeof(int));CHKPTRQ(a->garray);
2386a30f8f8cSSatish Balay     PLogObjectMemory(mat,len*sizeof(int));
2387a30f8f8cSSatish Balay     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2388a30f8f8cSSatish Balay   } else a->garray = 0;
2389a30f8f8cSSatish Balay 
2390a30f8f8cSSatish Balay   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2391a30f8f8cSSatish Balay   PLogObjectParent(mat,a->lvec);
2392a30f8f8cSSatish Balay   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2393a30f8f8cSSatish Balay 
2394a30f8f8cSSatish Balay   PLogObjectParent(mat,a->Mvctx);
2395a30f8f8cSSatish Balay   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2396a30f8f8cSSatish Balay   PLogObjectParent(mat,a->A);
2397a30f8f8cSSatish Balay   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2398a30f8f8cSSatish Balay   PLogObjectParent(mat,a->B);
2399a30f8f8cSSatish Balay   ierr = OptionsHasName(PETSC_NULL,"-help",&flg);CHKERRQ(ierr);
2400a30f8f8cSSatish Balay   ierr = FListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2401a30f8f8cSSatish Balay   if (flg) {
2402a30f8f8cSSatish Balay     ierr = MatPrintHelp(mat);CHKERRQ(ierr);
2403a30f8f8cSSatish Balay   }
2404a30f8f8cSSatish Balay   *newmat = mat;
2405a30f8f8cSSatish Balay   PetscFunctionReturn(0);
2406a30f8f8cSSatish Balay }
2407a30f8f8cSSatish Balay 
2408a30f8f8cSSatish Balay #include "petscsys.h"
2409a30f8f8cSSatish Balay 
2410a30f8f8cSSatish Balay #undef __FUNC__
2411a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatLoad_MPISBAIJ"
2412a30f8f8cSSatish Balay int MatLoad_MPISBAIJ(Viewer viewer,MatType type,Mat *newmat)
2413a30f8f8cSSatish Balay {
2414a30f8f8cSSatish Balay   Mat          A;
2415a30f8f8cSSatish Balay   int          i,nz,ierr,j,rstart,rend,fd;
2416a30f8f8cSSatish Balay   Scalar       *vals,*buf;
2417a30f8f8cSSatish Balay   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2418a30f8f8cSSatish Balay   MPI_Status   status;
2419a30f8f8cSSatish Balay   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2420a30f8f8cSSatish Balay   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2421a30f8f8cSSatish Balay   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2422a30f8f8cSSatish Balay   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2423a30f8f8cSSatish Balay   int          dcount,kmax,k,nzcount,tmp;
2424a30f8f8cSSatish Balay 
2425a30f8f8cSSatish Balay   PetscFunctionBegin;
2426a30f8f8cSSatish Balay   ierr = OptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2427a30f8f8cSSatish Balay 
2428a30f8f8cSSatish Balay   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2429a30f8f8cSSatish Balay   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2430a30f8f8cSSatish Balay   if (!rank) {
2431a30f8f8cSSatish Balay     ierr = ViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2432a30f8f8cSSatish Balay     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2433a30f8f8cSSatish Balay     if (header[0] != MAT_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"not matrix object");
2434a30f8f8cSSatish Balay     if (header[3] < 0) {
2435a30f8f8cSSatish Balay       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,1,"Matrix stored in special format, cannot load as MPISBAIJ");
2436a30f8f8cSSatish Balay     }
2437a30f8f8cSSatish Balay   }
2438a30f8f8cSSatish Balay 
2439a30f8f8cSSatish Balay   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2440a30f8f8cSSatish Balay   M = header[1]; N = header[2];
2441a30f8f8cSSatish Balay 
2442a30f8f8cSSatish Balay   if (M != N) SETERRQ(PETSC_ERR_SUP,0,"Can only do square matrices");
2443a30f8f8cSSatish Balay 
2444a30f8f8cSSatish Balay   /*
2445a30f8f8cSSatish Balay      This code adds extra rows to make sure the number of rows is
2446a30f8f8cSSatish Balay      divisible by the blocksize
2447a30f8f8cSSatish Balay   */
2448a30f8f8cSSatish Balay   Mbs        = M/bs;
2449a30f8f8cSSatish Balay   extra_rows = bs - M + bs*(Mbs);
2450a30f8f8cSSatish Balay   if (extra_rows == bs) extra_rows = 0;
2451a30f8f8cSSatish Balay   else                  Mbs++;
2452a30f8f8cSSatish Balay   if (extra_rows &&!rank) {
2453a30f8f8cSSatish Balay     PLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n");
2454a30f8f8cSSatish Balay   }
2455a30f8f8cSSatish Balay 
2456a30f8f8cSSatish Balay   /* determine ownership of all rows */
2457a30f8f8cSSatish Balay   mbs = Mbs/size + ((Mbs % size) > rank);
2458a30f8f8cSSatish Balay   m   = mbs * bs;
2459a30f8f8cSSatish Balay   rowners = (int*)PetscMalloc(2*(size+2)*sizeof(int));CHKPTRQ(rowners);
2460a30f8f8cSSatish Balay   browners = rowners + size + 1;
2461a30f8f8cSSatish Balay   ierr = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2462a30f8f8cSSatish Balay   rowners[0] = 0;
2463a30f8f8cSSatish Balay   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2464a30f8f8cSSatish Balay   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2465a30f8f8cSSatish Balay   rstart = rowners[rank];
2466a30f8f8cSSatish Balay   rend   = rowners[rank+1];
2467a30f8f8cSSatish Balay 
2468a30f8f8cSSatish Balay   /* distribute row lengths to all processors */
2469a30f8f8cSSatish Balay   locrowlens = (int*)PetscMalloc((rend-rstart)*bs*sizeof(int));CHKPTRQ(locrowlens);
2470a30f8f8cSSatish Balay   if (!rank) {
2471a30f8f8cSSatish Balay     rowlengths = (int*)PetscMalloc((M+extra_rows)*sizeof(int));CHKPTRQ(rowlengths);
2472a30f8f8cSSatish Balay     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2473a30f8f8cSSatish Balay     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2474a30f8f8cSSatish Balay     sndcounts = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(sndcounts);
2475a30f8f8cSSatish Balay     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2476a30f8f8cSSatish Balay     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2477a30f8f8cSSatish Balay     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2478a30f8f8cSSatish Balay   } else {
2479a30f8f8cSSatish Balay     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2480a30f8f8cSSatish Balay   }
2481a30f8f8cSSatish Balay 
2482a30f8f8cSSatish Balay   if (!rank) {
2483a30f8f8cSSatish Balay     /* calculate the number of nonzeros on each processor */
2484a30f8f8cSSatish Balay     procsnz = (int*)PetscMalloc(size*sizeof(int));CHKPTRQ(procsnz);
2485a30f8f8cSSatish Balay     ierr    = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2486a30f8f8cSSatish Balay     for (i=0; i<size; i++) {
2487a30f8f8cSSatish Balay       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2488a30f8f8cSSatish Balay         procsnz[i] += rowlengths[j];
2489a30f8f8cSSatish Balay       }
2490a30f8f8cSSatish Balay     }
2491a30f8f8cSSatish Balay     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2492a30f8f8cSSatish Balay 
2493a30f8f8cSSatish Balay     /* determine max buffer needed and allocate it */
2494a30f8f8cSSatish Balay     maxnz = 0;
2495a30f8f8cSSatish Balay     for (i=0; i<size; i++) {
2496a30f8f8cSSatish Balay       maxnz = PetscMax(maxnz,procsnz[i]);
2497a30f8f8cSSatish Balay     }
2498a30f8f8cSSatish Balay     cols = (int*)PetscMalloc(maxnz*sizeof(int));CHKPTRQ(cols);
2499a30f8f8cSSatish Balay 
2500a30f8f8cSSatish Balay     /* read in my part of the matrix column indices  */
2501a30f8f8cSSatish Balay     nz = procsnz[0];
2502a30f8f8cSSatish Balay     ibuf = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf);
2503a30f8f8cSSatish Balay     mycols = ibuf;
2504a30f8f8cSSatish Balay     if (size == 1)  nz -= extra_rows;
2505a30f8f8cSSatish Balay     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2506a30f8f8cSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2507a30f8f8cSSatish Balay 
2508a30f8f8cSSatish Balay     /* read in every ones (except the last) and ship off */
2509a30f8f8cSSatish Balay     for (i=1; i<size-1; i++) {
2510a30f8f8cSSatish Balay       nz   = procsnz[i];
2511a30f8f8cSSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2512a30f8f8cSSatish Balay       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2513a30f8f8cSSatish Balay     }
2514a30f8f8cSSatish Balay     /* read in the stuff for the last proc */
2515a30f8f8cSSatish Balay     if (size != 1) {
2516a30f8f8cSSatish Balay       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2517a30f8f8cSSatish Balay       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2518a30f8f8cSSatish Balay       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2519a30f8f8cSSatish Balay       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2520a30f8f8cSSatish Balay     }
2521a30f8f8cSSatish Balay     ierr = PetscFree(cols);CHKERRQ(ierr);
2522a30f8f8cSSatish Balay   } else {
2523a30f8f8cSSatish Balay     /* determine buffer space needed for message */
2524a30f8f8cSSatish Balay     nz = 0;
2525a30f8f8cSSatish Balay     for (i=0; i<m; i++) {
2526a30f8f8cSSatish Balay       nz += locrowlens[i];
2527a30f8f8cSSatish Balay     }
2528a30f8f8cSSatish Balay     ibuf   = (int*)PetscMalloc(nz*sizeof(int));CHKPTRQ(ibuf);
2529a30f8f8cSSatish Balay     mycols = ibuf;
2530a30f8f8cSSatish Balay     /* receive message of column indices*/
2531a30f8f8cSSatish Balay     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2532a30f8f8cSSatish Balay     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2533a30f8f8cSSatish Balay     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2534a30f8f8cSSatish Balay   }
2535a30f8f8cSSatish Balay 
2536a30f8f8cSSatish Balay   /* loop over local rows, determining number of off diagonal entries */
2537a30f8f8cSSatish Balay   dlens  = (int*)PetscMalloc(2*(rend-rstart+1)*sizeof(int));CHKPTRQ(dlens);
2538a30f8f8cSSatish Balay   odlens = dlens + (rend-rstart);
2539a30f8f8cSSatish Balay   mask   = (int*)PetscMalloc(3*Mbs*sizeof(int));CHKPTRQ(mask);
2540a30f8f8cSSatish Balay   ierr   = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2541a30f8f8cSSatish Balay   masked1 = mask    + Mbs;
2542a30f8f8cSSatish Balay   masked2 = masked1 + Mbs;
2543a30f8f8cSSatish Balay   rowcount = 0; nzcount = 0;
2544a30f8f8cSSatish Balay   for (i=0; i<mbs; i++) {
2545a30f8f8cSSatish Balay     dcount  = 0;
2546a30f8f8cSSatish Balay     odcount = 0;
2547a30f8f8cSSatish Balay     for (j=0; j<bs; j++) {
2548a30f8f8cSSatish Balay       kmax = locrowlens[rowcount];
2549a30f8f8cSSatish Balay       for (k=0; k<kmax; k++) {
2550a30f8f8cSSatish Balay         tmp = mycols[nzcount++]/bs;
2551a30f8f8cSSatish Balay         if (!mask[tmp]) {
2552a30f8f8cSSatish Balay           mask[tmp] = 1;
2553a30f8f8cSSatish Balay           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp;
2554a30f8f8cSSatish Balay           else masked1[dcount++] = tmp;
2555a30f8f8cSSatish Balay         }
2556a30f8f8cSSatish Balay       }
2557a30f8f8cSSatish Balay       rowcount++;
2558a30f8f8cSSatish Balay     }
2559a30f8f8cSSatish Balay 
2560a30f8f8cSSatish Balay     dlens[i]  = dcount;
2561a30f8f8cSSatish Balay     odlens[i] = odcount;
2562a30f8f8cSSatish Balay 
2563a30f8f8cSSatish Balay     /* zero out the mask elements we set */
2564a30f8f8cSSatish Balay     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2565a30f8f8cSSatish Balay     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2566a30f8f8cSSatish Balay   }
2567a30f8f8cSSatish Balay 
2568a30f8f8cSSatish Balay   /* create our matrix */
2569a30f8f8cSSatish Balay   ierr = MatCreateMPISBAIJ(comm,bs,m,PETSC_DECIDE,M+extra_rows,N+extra_rows,0,dlens,0,odlens,newmat);CHKERRQ(ierr);
2570a30f8f8cSSatish Balay   A = *newmat;
2571a30f8f8cSSatish Balay   MatSetOption(A,MAT_COLUMNS_SORTED);
2572a30f8f8cSSatish Balay 
2573a30f8f8cSSatish Balay   if (!rank) {
2574a30f8f8cSSatish Balay     buf = (Scalar*)PetscMalloc(maxnz*sizeof(Scalar));CHKPTRQ(buf);
2575a30f8f8cSSatish Balay     /* read in my part of the matrix numerical values  */
2576a30f8f8cSSatish Balay     nz = procsnz[0];
2577a30f8f8cSSatish Balay     vals = buf;
2578a30f8f8cSSatish Balay     mycols = ibuf;
2579a30f8f8cSSatish Balay     if (size == 1)  nz -= extra_rows;
2580a30f8f8cSSatish Balay     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2581a30f8f8cSSatish Balay     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2582a30f8f8cSSatish Balay 
2583a30f8f8cSSatish Balay     /* insert into matrix */
2584a30f8f8cSSatish Balay     jj      = rstart*bs;
2585a30f8f8cSSatish Balay     for (i=0; i<m; i++) {
2586a30f8f8cSSatish Balay       ierr = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2587a30f8f8cSSatish Balay       mycols += locrowlens[i];
2588a30f8f8cSSatish Balay       vals   += locrowlens[i];
2589a30f8f8cSSatish Balay       jj++;
2590a30f8f8cSSatish Balay     }
2591a30f8f8cSSatish Balay     /* read in other processors (except the last one) and ship out */
2592a30f8f8cSSatish Balay     for (i=1; i<size-1; i++) {
2593a30f8f8cSSatish Balay       nz   = procsnz[i];
2594a30f8f8cSSatish Balay       vals = buf;
2595a30f8f8cSSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2596a30f8f8cSSatish Balay       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2597a30f8f8cSSatish Balay     }
2598a30f8f8cSSatish Balay     /* the last proc */
2599a30f8f8cSSatish Balay     if (size != 1){
2600a30f8f8cSSatish Balay       nz   = procsnz[i] - extra_rows;
2601a30f8f8cSSatish Balay       vals = buf;
2602a30f8f8cSSatish Balay       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2603a30f8f8cSSatish Balay       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2604a30f8f8cSSatish Balay       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2605a30f8f8cSSatish Balay     }
2606a30f8f8cSSatish Balay     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2607a30f8f8cSSatish Balay   } else {
2608a30f8f8cSSatish Balay     /* receive numeric values */
2609a30f8f8cSSatish Balay     buf = (Scalar*)PetscMalloc(nz*sizeof(Scalar));CHKPTRQ(buf);
2610a30f8f8cSSatish Balay 
2611a30f8f8cSSatish Balay     /* receive message of values*/
2612a30f8f8cSSatish Balay     vals   = buf;
2613a30f8f8cSSatish Balay     mycols = ibuf;
2614a30f8f8cSSatish Balay     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2615a30f8f8cSSatish Balay     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2616a30f8f8cSSatish Balay     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,0,"something is wrong with file");
2617a30f8f8cSSatish Balay 
2618a30f8f8cSSatish Balay     /* insert into matrix */
2619a30f8f8cSSatish Balay     jj      = rstart*bs;
2620a30f8f8cSSatish Balay     for (i=0; i<m; i++) {
2621a30f8f8cSSatish Balay       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2622a30f8f8cSSatish Balay       mycols += locrowlens[i];
2623a30f8f8cSSatish Balay       vals   += locrowlens[i];
2624a30f8f8cSSatish Balay       jj++;
2625a30f8f8cSSatish Balay     }
2626a30f8f8cSSatish Balay   }
2627a30f8f8cSSatish Balay   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2628a30f8f8cSSatish Balay   ierr = PetscFree(buf);CHKERRQ(ierr);
2629a30f8f8cSSatish Balay   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2630a30f8f8cSSatish Balay   ierr = PetscFree(rowners);CHKERRQ(ierr);
2631a30f8f8cSSatish Balay   ierr = PetscFree(dlens);CHKERRQ(ierr);
2632a30f8f8cSSatish Balay   ierr = PetscFree(mask);CHKERRQ(ierr);
2633a30f8f8cSSatish Balay   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2634a30f8f8cSSatish Balay   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2635a30f8f8cSSatish Balay   PetscFunctionReturn(0);
2636a30f8f8cSSatish Balay }
2637a30f8f8cSSatish Balay 
2638a30f8f8cSSatish Balay #undef __FUNC__
2639a30f8f8cSSatish Balay #define __FUNC__ /*<a name=""></a>*/"MatMPISBAIJSetHashTableFactor"
2640a30f8f8cSSatish Balay /*@
2641a30f8f8cSSatish Balay    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2642a30f8f8cSSatish Balay 
2643a30f8f8cSSatish Balay    Input Parameters:
2644a30f8f8cSSatish Balay .  mat  - the matrix
2645a30f8f8cSSatish Balay .  fact - factor
2646a30f8f8cSSatish Balay 
2647a30f8f8cSSatish Balay    Collective on Mat
2648a30f8f8cSSatish Balay 
2649a30f8f8cSSatish Balay    Level: advanced
2650a30f8f8cSSatish Balay 
2651a30f8f8cSSatish Balay   Notes:
2652a30f8f8cSSatish Balay    This can also be set by the command line option: -mat_use_hash_table fact
2653a30f8f8cSSatish Balay 
2654a30f8f8cSSatish Balay .keywords: matrix, hashtable, factor, HT
2655a30f8f8cSSatish Balay 
2656a30f8f8cSSatish Balay .seealso: MatSetOption()
2657a30f8f8cSSatish Balay @*/
2658a30f8f8cSSatish Balay int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2659a30f8f8cSSatish Balay {
2660a30f8f8cSSatish Balay   Mat_MPIBAIJ *baij;
2661a30f8f8cSSatish Balay 
2662a30f8f8cSSatish Balay   PetscFunctionBegin;
2663a30f8f8cSSatish Balay   PetscValidHeaderSpecific(mat,MAT_COOKIE);
2664a30f8f8cSSatish Balay   if (mat->type != MATMPIBAIJ) {
2665a30f8f8cSSatish Balay     SETERRQ(PETSC_ERR_ARG_WRONG,1,"Incorrect matrix type. Use MPIBAIJ only.");
2666a30f8f8cSSatish Balay   }
2667a30f8f8cSSatish Balay   baij = (Mat_MPIBAIJ*)mat->data;
2668a30f8f8cSSatish Balay   baij->ht_fact = fact;
2669a30f8f8cSSatish Balay   PetscFunctionReturn(0);
2670a30f8f8cSSatish Balay }
2671