xref: /petsc/src/mat/tutorials/ex18.c (revision 735d7f9063f91127bea1586e8c97a70d4dba06ba)
1*735d7f90SBarry Smith static char help[] = "Demonstrates the use of the COO interface to PETSc matrices for finite element computations\n\n";
2*735d7f90SBarry Smith 
3*735d7f90SBarry Smith /*
4*735d7f90SBarry Smith      The COO interface for PETSc matrices provides a convenient way to provide finite element element stiffness matrices to PETSc matrix that should work
5*735d7f90SBarry Smith    well on both CPUs and GPUs. It is an alternative to using MatSetValues()
6*735d7f90SBarry Smith 
7*735d7f90SBarry Smith      This example is intended for people who are NOT using DMPLEX or libCEED or any other higher-level infrastructure for finite elements;
8*735d7f90SBarry Smith    it is only to demonstrate the concepts in a simple way for those people who are interested and for those people who are using PETSc for
9*735d7f90SBarry Smith    linear algebra solvers but are managing their own finite element process.
10*735d7f90SBarry Smith 
11*735d7f90SBarry Smith      Please do NOT use this example as a starting point to writing your own finite element code from scratch!
12*735d7f90SBarry Smith 
13*735d7f90SBarry Smith      Each element in this example has three vertices; hence the the usage below needs to be adjusted for elements of a different number of vertices.
14*735d7f90SBarry Smith */
15*735d7f90SBarry Smith 
16*735d7f90SBarry Smith #include <petscmat.h>
17*735d7f90SBarry Smith #include "ex18.h"
18*735d7f90SBarry Smith 
19*735d7f90SBarry Smith static PetscErrorCode CreateFEStruct(FEStruct *fe)
20*735d7f90SBarry Smith {
21*735d7f90SBarry Smith   PetscErrorCode ierr;
22*735d7f90SBarry Smith 
23*735d7f90SBarry Smith   PetscFunctionBeginUser;
24*735d7f90SBarry Smith   fe->Nv = 5;
25*735d7f90SBarry Smith   fe->Ne = 3;
26*735d7f90SBarry Smith   ierr = PetscMalloc1(3*fe->Ne,&fe->vertices);CHKERRQ(ierr);
27*735d7f90SBarry Smith   /* the three vertices associated with each element in order of element */
28*735d7f90SBarry Smith   fe->vertices[0 + 0] = 0;
29*735d7f90SBarry Smith   fe->vertices[0 + 1] = 1;
30*735d7f90SBarry Smith   fe->vertices[0 + 2] = 2;
31*735d7f90SBarry Smith   fe->vertices[3 + 0] = 2;
32*735d7f90SBarry Smith   fe->vertices[3 + 1] = 1;
33*735d7f90SBarry Smith   fe->vertices[3 + 2] = 3;
34*735d7f90SBarry Smith   fe->vertices[6 + 0] = 2;
35*735d7f90SBarry Smith   fe->vertices[6 + 1] = 4;
36*735d7f90SBarry Smith   fe->vertices[6 + 2] = 3;
37*735d7f90SBarry Smith   fe->n  = 5;
38*735d7f90SBarry Smith   PetscFunctionReturn(0);
39*735d7f90SBarry Smith }
40*735d7f90SBarry Smith 
41*735d7f90SBarry Smith static PetscErrorCode DestroyFEStruct(FEStruct *fe)
42*735d7f90SBarry Smith {
43*735d7f90SBarry Smith   PetscErrorCode ierr;
44*735d7f90SBarry Smith 
45*735d7f90SBarry Smith   PetscFunctionBeginUser;
46*735d7f90SBarry Smith   ierr = PetscFree(fe->vertices);CHKERRQ(ierr);
47*735d7f90SBarry Smith   ierr = PetscFree(fe->coo);CHKERRQ(ierr);
48*735d7f90SBarry Smith   PetscFunctionReturn(0);
49*735d7f90SBarry Smith }
50*735d7f90SBarry Smith 
51*735d7f90SBarry Smith static PetscErrorCode CreateMatrix(FEStruct *fe,Mat *A)
52*735d7f90SBarry Smith {
53*735d7f90SBarry Smith   PetscErrorCode ierr;
54*735d7f90SBarry Smith   PetscInt       *oor,*ooc,cnt = 0;
55*735d7f90SBarry Smith 
56*735d7f90SBarry Smith   PetscFunctionBeginUser;
57*735d7f90SBarry Smith   ierr = MatCreate(PETSC_COMM_WORLD,A);CHKERRQ(ierr);
58*735d7f90SBarry Smith   ierr = MatSetSizes(*A,fe->n,fe->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr);
59*735d7f90SBarry Smith   ierr = MatSetFromOptions(*A);CHKERRQ(ierr);
60*735d7f90SBarry Smith 
61*735d7f90SBarry Smith   /* determine for each entry in each element stiffness matrix the global row and colum */
62*735d7f90SBarry Smith   /* since the element is triangular with piecewise linear basis functions there are three degrees of freedom per element, one for each vertex */
63*735d7f90SBarry Smith   ierr = PetscMalloc2(3*3*fe->Ne,&oor,3*3*fe->Ne,&ooc);CHKERRQ(ierr);
64*735d7f90SBarry Smith   for (PetscInt e=0; e<fe->Ne; e++) {
65*735d7f90SBarry Smith     for (PetscInt vi=0; vi<3; vi++) {
66*735d7f90SBarry Smith       for (PetscInt vj=0; vj<3; vj++) {
67*735d7f90SBarry Smith         oor[cnt]   = fe->vertices[3*e+vi];
68*735d7f90SBarry Smith         ooc[cnt++] = fe->vertices[3*e+vj];
69*735d7f90SBarry Smith       }
70*735d7f90SBarry Smith     }
71*735d7f90SBarry Smith   }
72*735d7f90SBarry Smith   ierr = MatSetPreallocationCOO(*A,3*3*fe->Ne,oor,ooc);CHKERRQ(ierr);
73*735d7f90SBarry Smith   ierr = PetscFree2(oor,ooc);CHKERRQ(ierr);
74*735d7f90SBarry Smith 
75*735d7f90SBarry Smith   /* determine the offset into the COO value array the offset of each element stiffness; there are 9 = 3*3 entries for each element stiffness */
76*735d7f90SBarry Smith   /* for lists of elements with different numbers of degrees of freedom assocated with each element the offsets will not be uniform */
77*735d7f90SBarry Smith   ierr = PetscMalloc1(fe->Ne,&fe->coo);CHKERRQ(ierr);
78*735d7f90SBarry Smith   fe->coo[0] = 0;
79*735d7f90SBarry Smith   for (PetscInt e=1; e<fe->Ne; e++) {
80*735d7f90SBarry Smith     fe->coo[e] = fe->coo[e-1] + 3*3;
81*735d7f90SBarry Smith   }
82*735d7f90SBarry Smith   PetscFunctionReturn(0);
83*735d7f90SBarry Smith }
84*735d7f90SBarry Smith 
85*735d7f90SBarry Smith static PetscErrorCode FillMatrixCPU(FEStruct *fe,Mat A)
86*735d7f90SBarry Smith {
87*735d7f90SBarry Smith   PetscErrorCode ierr;
88*735d7f90SBarry Smith   PetscScalar    s[9];
89*735d7f90SBarry Smith 
90*735d7f90SBarry Smith   PetscFunctionBeginUser;
91*735d7f90SBarry Smith   /* simulation of traditional PETSc CPU based finite assembly process */
92*735d7f90SBarry Smith   for (PetscInt e=0; e<fe->Ne; e++) {
93*735d7f90SBarry Smith     for (PetscInt vi=0; vi<3; vi++) {
94*735d7f90SBarry Smith       for (PetscInt vj=0; vj<3; vj++) {
95*735d7f90SBarry Smith         s[3*vi+vj] = vi+2*vj;
96*735d7f90SBarry Smith       }
97*735d7f90SBarry Smith     }
98*735d7f90SBarry Smith     ierr = MatSetValues(A,3,fe->vertices + 3*e,3, fe->vertices + 3*e,s,ADD_VALUES);CHKERRQ(ierr);
99*735d7f90SBarry Smith   }
100*735d7f90SBarry Smith   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
101*735d7f90SBarry Smith   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
102*735d7f90SBarry Smith   PetscFunctionReturn(0);
103*735d7f90SBarry Smith }
104*735d7f90SBarry Smith 
105*735d7f90SBarry Smith /*
106*735d7f90SBarry Smith    Shows an example of tracking element offsets explicitly, which allows for
107*735d7f90SBarry Smith    mixed-topology meshes and combining both volume and surface parts into the weak form.
108*735d7f90SBarry Smith */
109*735d7f90SBarry Smith static PetscErrorCode FillMatrixCPUCOO(FEStruct *fe,Mat A)
110*735d7f90SBarry Smith {
111*735d7f90SBarry Smith   PetscErrorCode ierr;
112*735d7f90SBarry Smith   PetscScalar    *v,*s;
113*735d7f90SBarry Smith 
114*735d7f90SBarry Smith   PetscFunctionBeginUser;
115*735d7f90SBarry Smith   /* simulation of CPU based finite assembly process with COO */
116*735d7f90SBarry Smith   ierr = PetscMalloc1(3*3*fe->Ne,&v);CHKERRQ(ierr);
117*735d7f90SBarry Smith   for (PetscInt e=0; e<fe->Ne; e++) {
118*735d7f90SBarry Smith     s = v + fe->coo[e]; /* point to location in COO of current element stiffness */
119*735d7f90SBarry Smith     for (PetscInt vi=0; vi<3; vi++) {
120*735d7f90SBarry Smith       for (PetscInt vj=0; vj<3; vj++) {
121*735d7f90SBarry Smith         s[3*vi+vj] = vi+2*vj;
122*735d7f90SBarry Smith       }
123*735d7f90SBarry Smith     }
124*735d7f90SBarry Smith   }
125*735d7f90SBarry Smith   ierr = MatSetValuesCOO(A,v,ADD_VALUES);CHKERRQ(ierr);
126*735d7f90SBarry Smith   ierr = PetscFree(v);CHKERRQ(ierr);
127*735d7f90SBarry Smith   PetscFunctionReturn(0);
128*735d7f90SBarry Smith }
129*735d7f90SBarry Smith 
130*735d7f90SBarry Smith /*
131*735d7f90SBarry Smith   Uses a multi-dimensional indexing technique that works for homogeneous meshes
132*735d7f90SBarry Smith   such as single-topology with volume integral only.
133*735d7f90SBarry Smith */
134*735d7f90SBarry Smith static PetscErrorCode FillMatrixCPUCOO3d(FEStruct *fe,Mat A)
135*735d7f90SBarry Smith {
136*735d7f90SBarry Smith   PetscErrorCode ierr;
137*735d7f90SBarry Smith   PetscScalar    (*s)[3][3];
138*735d7f90SBarry Smith 
139*735d7f90SBarry Smith   PetscFunctionBeginUser;
140*735d7f90SBarry Smith   /* simulation of CPU based finite assembly process with COO */
141*735d7f90SBarry Smith   ierr = PetscMalloc1(fe->Ne,&s);CHKERRQ(ierr);
142*735d7f90SBarry Smith   for (PetscInt e=0; e<fe->Ne; e++) {
143*735d7f90SBarry Smith     for (PetscInt vi=0; vi<3; vi++) {
144*735d7f90SBarry Smith       for (PetscInt vj=0; vj<3; vj++) {
145*735d7f90SBarry Smith         s[e][vi][vj] = vi+2*vj;
146*735d7f90SBarry Smith       }
147*735d7f90SBarry Smith     }
148*735d7f90SBarry Smith   }
149*735d7f90SBarry Smith   ierr = MatSetValuesCOO(A,(PetscScalar*)s,INSERT_VALUES);CHKERRQ(ierr);
150*735d7f90SBarry Smith   ierr = PetscFree(s);CHKERRQ(ierr);
151*735d7f90SBarry Smith   PetscFunctionReturn(0);
152*735d7f90SBarry Smith }
153*735d7f90SBarry Smith 
154*735d7f90SBarry Smith int main(int argc, char **args)
155*735d7f90SBarry Smith {
156*735d7f90SBarry Smith   Mat             A;
157*735d7f90SBarry Smith   PetscErrorCode  ierr;
158*735d7f90SBarry Smith   FEStruct        fe;
159*735d7f90SBarry Smith   PetscMPIInt     size;
160*735d7f90SBarry Smith   PetscBool       is_kokkos,is_cuda;
161*735d7f90SBarry Smith 
162*735d7f90SBarry Smith   ierr = PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr;
163*735d7f90SBarry Smith   ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRMPI(ierr);
164*735d7f90SBarry Smith   if (size > 1) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_WRONG_MPI_SIZE,"Demonstration is only for sequential runs");
165*735d7f90SBarry Smith 
166*735d7f90SBarry Smith   ierr = CreateFEStruct(&fe);CHKERRQ(ierr);
167*735d7f90SBarry Smith   ierr = CreateMatrix(&fe,&A);CHKERRQ(ierr);
168*735d7f90SBarry Smith 
169*735d7f90SBarry Smith   ierr = FillMatrixCPU(&fe,A);CHKERRQ(ierr);
170*735d7f90SBarry Smith   ierr = MatView(A,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
171*735d7f90SBarry Smith 
172*735d7f90SBarry Smith   ierr = MatZeroEntries(A);CHKERRQ(ierr);
173*735d7f90SBarry Smith   ierr = FillMatrixCPUCOO(&fe,A);CHKERRQ(ierr);
174*735d7f90SBarry Smith   ierr = MatView(A,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
175*735d7f90SBarry Smith 
176*735d7f90SBarry Smith   ierr = MatZeroEntries(A);CHKERRQ(ierr);
177*735d7f90SBarry Smith   ierr = FillMatrixCPUCOO3d(&fe,A);CHKERRQ(ierr);
178*735d7f90SBarry Smith   ierr = MatView(A,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
179*735d7f90SBarry Smith 
180*735d7f90SBarry Smith   ierr = MatZeroEntries(A);CHKERRQ(ierr);
181*735d7f90SBarry Smith   ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJKOKKOS,&is_kokkos);CHKERRQ(ierr);
182*735d7f90SBarry Smith   ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJCUSPARSE,&is_cuda);CHKERRQ(ierr);
183*735d7f90SBarry Smith  #if defined(PETSC_HAVE_KOKKOS)
184*735d7f90SBarry Smith   if (is_kokkos) {ierr = FillMatrixKokkosCOO(&fe,A);CHKERRQ(ierr);}
185*735d7f90SBarry Smith  #endif
186*735d7f90SBarry Smith  #if defined(PETSC_HAVE_CUDA)
187*735d7f90SBarry Smith   if (is_cuda) {ierr = FillMatrixCUDACOO(&fe,A);CHKERRQ(ierr);}
188*735d7f90SBarry Smith  #endif
189*735d7f90SBarry Smith   ierr = MatView(A,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
190*735d7f90SBarry Smith 
191*735d7f90SBarry Smith   ierr = MatDestroy(&A);CHKERRQ(ierr);
192*735d7f90SBarry Smith   ierr = DestroyFEStruct(&fe);CHKERRQ(ierr);
193*735d7f90SBarry Smith   ierr = PetscFinalize();
194*735d7f90SBarry Smith   return ierr;
195*735d7f90SBarry Smith }
196*735d7f90SBarry Smith 
197*735d7f90SBarry Smith /*TEST
198*735d7f90SBarry Smith   build:
199*735d7f90SBarry Smith     requires: cuda kokkos_kernels
200*735d7f90SBarry Smith     depends: ex18cu.cu ex18kok.kokkos.cxx
201*735d7f90SBarry Smith 
202*735d7f90SBarry Smith   testset:
203*735d7f90SBarry Smith     filter: grep -v "type"
204*735d7f90SBarry Smith     output_file: output/ex18_1.out
205*735d7f90SBarry Smith 
206*735d7f90SBarry Smith     test:
207*735d7f90SBarry Smith       suffix: kok
208*735d7f90SBarry Smith       requires: kokkos_kernels
209*735d7f90SBarry Smith       args: -mat_type aijkokkos
210*735d7f90SBarry Smith 
211*735d7f90SBarry Smith     test:
212*735d7f90SBarry Smith       suffix: cuda
213*735d7f90SBarry Smith       requires: cuda
214*735d7f90SBarry Smith       args: -mat_type aijcusparse
215*735d7f90SBarry Smith 
216*735d7f90SBarry Smith TEST*/
217