xref: /petsc/src/mat/impls/baij/seq/baij.h (revision 4c000e2e275449c4c357c2b196fd81973fa62105)
12593348eSBarry Smith 
235aab85fSBarry Smith #if !defined(__BAIJ_H)
335aab85fSBarry Smith #define __BAIJ_H
47c4f633dSBarry Smith #include "private/matimpl.h"
57c4f633dSBarry Smith #include "../src/mat/impls/aij/seq/aij.h"
6a4005a5dSBarry Smith #include "../src/mat/impls/baij/seq/ftn-kernels/fsolvebaij.h"
72593348eSBarry Smith 
82593348eSBarry Smith /*
935aab85fSBarry Smith   MATSEQBAIJ format - Block compressed row storage. The i[] and j[]
10e24b481bSBarry Smith   arrays start at 0.
112593348eSBarry Smith */
122593348eSBarry Smith 
136c6c5352SBarry Smith /* This header is shared by the SeqSBAIJ matrix */
146c6c5352SBarry Smith #define SEQBAIJHEADER \
15521d7252SBarry Smith   PetscInt         bs2;              /*  square of block size */                                     \
16690b6cddSBarry Smith   PetscInt         mbs,nbs;          /* rows/bs, columns/bs */                                       \
176c6c5352SBarry Smith   PetscScalar      *mult_work;       /* work array for matrix vector product*/                       \
18dd6ea824SBarry Smith   MatScalar        *saved_values;                                                                    \
196c6c5352SBarry Smith                                                                                                      \
206ad2eaddSHong Zhang   Mat              sbaijMat;         /* mat in sbaij format */                                       \
216c6c5352SBarry Smith                                                                                                      \
226c6c5352SBarry Smith   PetscTruth       pivotinblocks;    /* pivot inside factorization of each diagonal block */         \
236c6c5352SBarry Smith                                                                                                      \
24dd6ea824SBarry Smith   MatScalar        *idiag;           /* inverse of block diagonal  */                                \
25e6b907acSBarry Smith   PetscTruth       idiagvalid       /* if above has correct/current values */
26e6b907acSBarry Smith 
276c6c5352SBarry Smith 
286c6c5352SBarry Smith typedef struct {
29dd6ea824SBarry Smith   SEQAIJHEADER(MatScalar);
30e6b907acSBarry Smith   SEQBAIJHEADER;
3135aab85fSBarry Smith } Mat_SeqBAIJ;
322593348eSBarry Smith 
33f349cc81SSatish Balay EXTERN_C_BEGIN
34ab93d7beSBarry Smith EXTERN PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat,PetscInt,PetscInt,PetscInt*);
35f349cc81SSatish Balay EXTERN_C_END
360481f469SBarry Smith EXTERN PetscErrorCode MatILUFactorSymbolic_SeqBAIJ(Mat,Mat,IS,IS,const MatFactorInfo*);
370481f469SBarry Smith EXTERN PetscErrorCode MatICCFactorSymbolic_SeqBAIJ(Mat,Mat,IS,const MatFactorInfo*);
380481f469SBarry Smith EXTERN PetscErrorCode MatCholeskyFactorSymbolic_SeqBAIJ(Mat,Mat,IS,const MatFactorInfo*);
390481f469SBarry Smith EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N(Mat,Mat,const MatFactorInfo*);
400481f469SBarry Smith EXTERN PetscErrorCode MatCholeskyFactorNumeric_SeqBAIJ_N_NaturalOrdering(Mat,Mat,const MatFactorInfo*);
41dfbe8321SBarry Smith EXTERN PetscErrorCode MatDuplicate_SeqBAIJ(Mat,MatDuplicateOption,Mat*);
422af78befSBarry Smith EXTERN PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat,PetscTruth*,PetscInt*);
43dfbe8321SBarry Smith EXTERN PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat);
44c8342467SHong Zhang EXTERN PetscErrorCode MatILUDTFactor_SeqBAIJ(Mat,IS,IS,const MatFactorInfo*,Mat*);
45be3590efSBarry Smith 
460481f469SBarry Smith EXTERN PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat,Mat,IS,IS,const MatFactorInfo*);
470481f469SBarry Smith EXTERN PetscErrorCode MatLUFactor_SeqBAIJ(Mat,IS,IS,const MatFactorInfo*);
48690b6cddSBarry Smith EXTERN PetscErrorCode MatIncreaseOverlap_SeqBAIJ(Mat,PetscInt,IS*,PetscInt);
494aa3045dSJed Brown EXTERN PetscErrorCode MatGetSubMatrix_SeqBAIJ(Mat,IS,IS,MatReuse,Mat*);
50690b6cddSBarry Smith EXTERN PetscErrorCode MatGetSubMatrices_SeqBAIJ(Mat,PetscInt,const IS[],const IS[],MatReuse,Mat*[]);
51dfbe8321SBarry Smith EXTERN PetscErrorCode MatMultTranspose_SeqBAIJ(Mat,Vec,Vec);
52dfbe8321SBarry Smith EXTERN PetscErrorCode MatMultTransposeAdd_SeqBAIJ(Mat,Vec,Vec,Vec);
53f4df32b1SMatthew Knepley EXTERN PetscErrorCode MatScale_SeqBAIJ(Mat,PetscScalar);
54dfbe8321SBarry Smith EXTERN PetscErrorCode MatNorm_SeqBAIJ(Mat,NormType,PetscReal *);
55dfbe8321SBarry Smith EXTERN PetscErrorCode MatEqual_SeqBAIJ(Mat,Mat,PetscTruth*);
56dfbe8321SBarry Smith EXTERN PetscErrorCode MatGetDiagonal_SeqBAIJ(Mat,Vec);
57dfbe8321SBarry Smith EXTERN PetscErrorCode MatDiagonalScale_SeqBAIJ(Mat,Vec,Vec);
58dfbe8321SBarry Smith EXTERN PetscErrorCode MatGetInfo_SeqBAIJ(Mat,MatInfoType,MatInfo *);
59dfbe8321SBarry Smith EXTERN PetscErrorCode MatZeroEntries_SeqBAIJ(Mat);
6054138f6bSKris Buschelman 
61dfbe8321SBarry Smith EXTERN PetscErrorCode MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(Mat);
6254138f6bSKris Buschelman 
63dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolve_SeqBAIJ_1(Mat,Vec,Vec);
64dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolve_SeqBAIJ_1_NaturalOrdering(Mat,Vec,Vec);
65dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolve_SeqBAIJ_2(Mat,Vec,Vec);
668bfb2e2bSShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_2_newdatastruct(Mat,Vec,Vec);
67dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolve_SeqBAIJ_2_NaturalOrdering(Mat,Vec,Vec);
688bfb2e2bSShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_2_NaturalOrdering_newdatastruct(Mat,Vec,Vec);
698bfb2e2bSShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_3_NaturalOrdering_newdatastruct(Mat,Vec,Vec);
70dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolve_SeqBAIJ_3(Mat,Vec,Vec);
718bfb2e2bSShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_3_newdatastruct(Mat,Vec,Vec);
72dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolve_SeqBAIJ_3_NaturalOrdering(Mat,Vec,Vec);
738bfb2e2bSShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_3_NaturalOrdering_newdatastruct(Mat,Vec,Vec);
74dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolve_SeqBAIJ_4(Mat,Vec,Vec);
758bfb2e2bSShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_4_newdatastruct(Mat,Vec,Vec);
76dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolve_SeqBAIJ_4_NaturalOrdering(Mat,Vec,Vec);
778bfb2e2bSShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_4_NaturalOrdering_newdatastruct(Mat,Vec,Vec);
7854138f6bSKris Buschelman #if defined(PETSC_HAVE_SSE)
79dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolve_SeqBAIJ_4_SSE_Demotion(Mat,Vec,Vec);
80dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolve_SeqBAIJ_4_NaturalOrdering_SSE_Demotion(Mat,Vec,Vec);
81dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolve_SeqBAIJ_4_NaturalOrdering_SSE_Demotion_usj(Mat,Vec,Vec);
824110d75bSKris Buschelman #endif
83dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolve_SeqBAIJ_5(Mat,Vec,Vec);
848bfb2e2bSShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_5_newdatastruct(Mat,Vec,Vec);
85dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolve_SeqBAIJ_5_NaturalOrdering(Mat,Vec,Vec);
868bfb2e2bSShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_5_NaturalOrdering_newdatastruct(Mat,Vec,Vec);
87dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolve_SeqBAIJ_6(Mat,Vec,Vec);
888bfb2e2bSShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_6_newdatastruct(Mat,Vec,Vec);
89dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolve_SeqBAIJ_6_NaturalOrdering(Mat,Vec,Vec);
908bfb2e2bSShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_6_NaturalOrdering_newdatastruct(Mat,Vec,Vec);
91dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolve_SeqBAIJ_7(Mat,Vec,Vec);
928bfb2e2bSShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_7_newdatastruct(Mat,Vec,Vec);
93dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolve_SeqBAIJ_7_NaturalOrdering(Mat,Vec,Vec);
948bfb2e2bSShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_7_NaturalOrdering_newdatastruct(Mat,Vec,Vec);
95dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolve_SeqBAIJ_N(Mat,Vec,Vec);
968bfb2e2bSShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_N_newdatastruct(Mat,Vec,Vec);
978bfb2e2bSShri Abhyankar EXTERN PetscErrorCode MatSolve_SeqBAIJ_N_NaturalOrdering_newdatastruct(Mat,Vec,Vec);
98dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_Update(Mat,Vec,Vec);
99dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_1(Mat,Vec,Vec);
100dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_1_NaturalOrdering(Mat,Vec,Vec);
101dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_2(Mat,Vec,Vec);
102dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_2_NaturalOrdering(Mat,Vec,Vec);
103dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_3(Mat,Vec,Vec);
104dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_3_NaturalOrdering(Mat,Vec,Vec);
105dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_4(Mat,Vec,Vec);
106dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_4_NaturalOrdering(Mat,Vec,Vec);
107dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_5(Mat,Vec,Vec);
108dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_5_NaturalOrdering(Mat,Vec,Vec);
109dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_6(Mat,Vec,Vec);
110dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_6_NaturalOrdering(Mat,Vec,Vec);
111dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_7(Mat,Vec,Vec);
112dfbe8321SBarry Smith EXTERN PetscErrorCode MatSolveTranspose_SeqBAIJ_7_NaturalOrdering(Mat,Vec,Vec);
11354138f6bSKris Buschelman 
114faca2338SShri Abhyankar EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_N_newdatastruct(Mat,Mat,const MatFactorInfo*);
115faca2338SShri Abhyankar 
1160481f469SBarry Smith EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_1(Mat,Mat,const MatFactorInfo*);
1170481f469SBarry Smith EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2(Mat,Mat,const MatFactorInfo*);
118b588c5a2SHong Zhang EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_newdatastruct(Mat,Mat,const MatFactorInfo*);
1190481f469SBarry Smith EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering(Mat,Mat,const MatFactorInfo*);
120*4c000e2eSHong Zhang EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_newdatastruct(Mat,Mat,const MatFactorInfo*);
121*4c000e2eSHong Zhang 
1220481f469SBarry Smith EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_3(Mat,Mat,const MatFactorInfo*);
1230481f469SBarry Smith EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering(Mat,Mat,const MatFactorInfo*);
1240481f469SBarry Smith EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_4(Mat,Mat,const MatFactorInfo*);
1250481f469SBarry Smith EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering(Mat,Mat,const MatFactorInfo*);
12654138f6bSKris Buschelman #if defined(PETSC_HAVE_SSE)
1270481f469SBarry Smith EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE(Mat,Mat,const MatFactorInfo*);
1280481f469SBarry Smith EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj(Mat,Mat,const MatFactorInfo*);
12954138f6bSKris Buschelman #else
13054138f6bSKris Buschelman #endif
1310481f469SBarry Smith EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_5(Mat,Mat,const MatFactorInfo*);
1320481f469SBarry Smith EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering(Mat,Mat,const MatFactorInfo*);
1330481f469SBarry Smith EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_6(Mat,Mat,const MatFactorInfo*);
1340481f469SBarry Smith EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering(Mat,Mat,const MatFactorInfo*);
1350481f469SBarry Smith EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_7(Mat,Mat,const MatFactorInfo*);
1360481f469SBarry Smith EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering(Mat,Mat,const MatFactorInfo*);
1370481f469SBarry Smith EXTERN PetscErrorCode MatLUFactorNumeric_SeqBAIJ_N(Mat,Mat,const MatFactorInfo*);
13854138f6bSKris Buschelman 
139dfbe8321SBarry Smith EXTERN PetscErrorCode MatMult_SeqBAIJ_1(Mat,Vec,Vec);
140dfbe8321SBarry Smith EXTERN PetscErrorCode MatMult_SeqBAIJ_2(Mat,Vec,Vec);
141dfbe8321SBarry Smith EXTERN PetscErrorCode MatMult_SeqBAIJ_3(Mat,Vec,Vec);
142dfbe8321SBarry Smith EXTERN PetscErrorCode MatMult_SeqBAIJ_4(Mat,Vec,Vec);
143dfbe8321SBarry Smith EXTERN PetscErrorCode MatMult_SeqBAIJ_5(Mat,Vec,Vec);
144dfbe8321SBarry Smith EXTERN PetscErrorCode MatMult_SeqBAIJ_6(Mat,Vec,Vec);
145dfbe8321SBarry Smith EXTERN PetscErrorCode MatMult_SeqBAIJ_7(Mat,Vec,Vec);
146dfbe8321SBarry Smith EXTERN PetscErrorCode MatMult_SeqBAIJ_N(Mat,Vec,Vec);
14754138f6bSKris Buschelman 
148dfbe8321SBarry Smith EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_1(Mat,Vec,Vec,Vec);
149dfbe8321SBarry Smith EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_2(Mat,Vec,Vec,Vec);
150dfbe8321SBarry Smith EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_3(Mat,Vec,Vec,Vec);
151dfbe8321SBarry Smith EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_4(Mat,Vec,Vec,Vec);
152dfbe8321SBarry Smith EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_5(Mat,Vec,Vec,Vec);
153dfbe8321SBarry Smith EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_6(Mat,Vec,Vec,Vec);
154dfbe8321SBarry Smith EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_7(Mat,Vec,Vec,Vec);
155dfbe8321SBarry Smith EXTERN PetscErrorCode MatMultAdd_SeqBAIJ_N(Mat,Vec,Vec,Vec);
156a313700dSBarry Smith EXTERN PetscErrorCode MatLoad_SeqBAIJ(PetscViewer, const MatType,Mat*);
1572593348eSBarry Smith 
158*4c000e2eSHong Zhang /*
159*4c000e2eSHong Zhang   Kernel_A_gets_A_times_B_2: A = A * B with size bs=2
160*4c000e2eSHong Zhang 
161*4c000e2eSHong Zhang   Input Parameters:
162*4c000e2eSHong Zhang +  A,B - square bs by bs arrays stored in column major order
163*4c000e2eSHong Zhang -  W   - bs*bs work arrary
164*4c000e2eSHong Zhang 
165*4c000e2eSHong Zhang   Output Parameter:
166*4c000e2eSHong Zhang .  A = A * B
167*4c000e2eSHong Zhang */
168*4c000e2eSHong Zhang 
169*4c000e2eSHong Zhang #define Kernel_A_gets_A_times_B_2(A,B,W) 0;\
170*4c000e2eSHong Zhang {\
171*4c000e2eSHong Zhang   PetscMemcpy(W,A,4*sizeof(MatScalar));\
172*4c000e2eSHong Zhang   A[0] = W[0]*B[0] + W[2]*B[1];\
173*4c000e2eSHong Zhang   A[1] = W[1]*B[0] + W[3]*B[1];\
174*4c000e2eSHong Zhang   A[2] = W[0]*B[2] + W[2]*B[3];\
175*4c000e2eSHong Zhang   A[3] = W[1]*B[2] + W[3]*B[3];\
176*4c000e2eSHong Zhang }
177*4c000e2eSHong Zhang 
178*4c000e2eSHong Zhang /*
179*4c000e2eSHong Zhang   Kernel_A_gets_A_minus_B_times_C_2: A = A - B * C with size bs=2
180*4c000e2eSHong Zhang 
181*4c000e2eSHong Zhang   Input Parameters:
182*4c000e2eSHong Zhang +  A,B,C - square bs by bs arrays stored in column major order
183*4c000e2eSHong Zhang 
184*4c000e2eSHong Zhang   Output Parameter:
185*4c000e2eSHong Zhang .  A = A - B*C
186*4c000e2eSHong Zhang */
187*4c000e2eSHong Zhang 
188*4c000e2eSHong Zhang #define Kernel_A_gets_A_minus_B_times_C_2(A,B,C) 0;\
189*4c000e2eSHong Zhang {\
190*4c000e2eSHong Zhang   A[0] -= B[0]*C[0] + B[2]*C[1];\
191*4c000e2eSHong Zhang   A[1] -= B[1]*C[0] + B[3]*C[1];\
192*4c000e2eSHong Zhang   A[2] -= B[0]*C[2] + B[2]*C[3];\
193*4c000e2eSHong Zhang   A[3] -= B[1]*C[2] + B[3]*C[3];\
194*4c000e2eSHong Zhang }
195*4c000e2eSHong Zhang 
1962593348eSBarry Smith #endif
197