1*735d7f90SBarry Smith static char help[] = "Demonstrates the use of the COO interface to PETSc matrices for finite element computations\n\n"; 2*735d7f90SBarry Smith 3*735d7f90SBarry Smith /* 4*735d7f90SBarry Smith The COO interface for PETSc matrices provides a convenient way to provide finite element element stiffness matrices to PETSc matrix that should work 5*735d7f90SBarry Smith well on both CPUs and GPUs. It is an alternative to using MatSetValues() 6*735d7f90SBarry Smith 7*735d7f90SBarry Smith This example is intended for people who are NOT using DMPLEX or libCEED or any other higher-level infrastructure for finite elements; 8*735d7f90SBarry Smith it is only to demonstrate the concepts in a simple way for those people who are interested and for those people who are using PETSc for 9*735d7f90SBarry Smith linear algebra solvers but are managing their own finite element process. 10*735d7f90SBarry Smith 11*735d7f90SBarry Smith Please do NOT use this example as a starting point to writing your own finite element code from scratch! 12*735d7f90SBarry Smith 13*735d7f90SBarry Smith Each element in this example has three vertices; hence the the usage below needs to be adjusted for elements of a different number of vertices. 14*735d7f90SBarry Smith */ 15*735d7f90SBarry Smith 16*735d7f90SBarry Smith #include <petscmat.h> 17*735d7f90SBarry Smith #include "ex18.h" 18*735d7f90SBarry Smith 19*735d7f90SBarry Smith static PetscErrorCode CreateFEStruct(FEStruct *fe) 20*735d7f90SBarry Smith { 21*735d7f90SBarry Smith PetscErrorCode ierr; 22*735d7f90SBarry Smith 23*735d7f90SBarry Smith PetscFunctionBeginUser; 24*735d7f90SBarry Smith fe->Nv = 5; 25*735d7f90SBarry Smith fe->Ne = 3; 26*735d7f90SBarry Smith ierr = PetscMalloc1(3*fe->Ne,&fe->vertices);CHKERRQ(ierr); 27*735d7f90SBarry Smith /* the three vertices associated with each element in order of element */ 28*735d7f90SBarry Smith fe->vertices[0 + 0] = 0; 29*735d7f90SBarry Smith fe->vertices[0 + 1] = 1; 30*735d7f90SBarry Smith fe->vertices[0 + 2] = 2; 31*735d7f90SBarry Smith fe->vertices[3 + 0] = 2; 32*735d7f90SBarry Smith fe->vertices[3 + 1] = 1; 33*735d7f90SBarry Smith fe->vertices[3 + 2] = 3; 34*735d7f90SBarry Smith fe->vertices[6 + 0] = 2; 35*735d7f90SBarry Smith fe->vertices[6 + 1] = 4; 36*735d7f90SBarry Smith fe->vertices[6 + 2] = 3; 37*735d7f90SBarry Smith fe->n = 5; 38*735d7f90SBarry Smith PetscFunctionReturn(0); 39*735d7f90SBarry Smith } 40*735d7f90SBarry Smith 41*735d7f90SBarry Smith static PetscErrorCode DestroyFEStruct(FEStruct *fe) 42*735d7f90SBarry Smith { 43*735d7f90SBarry Smith PetscErrorCode ierr; 44*735d7f90SBarry Smith 45*735d7f90SBarry Smith PetscFunctionBeginUser; 46*735d7f90SBarry Smith ierr = PetscFree(fe->vertices);CHKERRQ(ierr); 47*735d7f90SBarry Smith ierr = PetscFree(fe->coo);CHKERRQ(ierr); 48*735d7f90SBarry Smith PetscFunctionReturn(0); 49*735d7f90SBarry Smith } 50*735d7f90SBarry Smith 51*735d7f90SBarry Smith static PetscErrorCode CreateMatrix(FEStruct *fe,Mat *A) 52*735d7f90SBarry Smith { 53*735d7f90SBarry Smith PetscErrorCode ierr; 54*735d7f90SBarry Smith PetscInt *oor,*ooc,cnt = 0; 55*735d7f90SBarry Smith 56*735d7f90SBarry Smith PetscFunctionBeginUser; 57*735d7f90SBarry Smith ierr = MatCreate(PETSC_COMM_WORLD,A);CHKERRQ(ierr); 58*735d7f90SBarry Smith ierr = MatSetSizes(*A,fe->n,fe->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 59*735d7f90SBarry Smith ierr = MatSetFromOptions(*A);CHKERRQ(ierr); 60*735d7f90SBarry Smith 61*735d7f90SBarry Smith /* determine for each entry in each element stiffness matrix the global row and colum */ 62*735d7f90SBarry Smith /* since the element is triangular with piecewise linear basis functions there are three degrees of freedom per element, one for each vertex */ 63*735d7f90SBarry Smith ierr = PetscMalloc2(3*3*fe->Ne,&oor,3*3*fe->Ne,&ooc);CHKERRQ(ierr); 64*735d7f90SBarry Smith for (PetscInt e=0; e<fe->Ne; e++) { 65*735d7f90SBarry Smith for (PetscInt vi=0; vi<3; vi++) { 66*735d7f90SBarry Smith for (PetscInt vj=0; vj<3; vj++) { 67*735d7f90SBarry Smith oor[cnt] = fe->vertices[3*e+vi]; 68*735d7f90SBarry Smith ooc[cnt++] = fe->vertices[3*e+vj]; 69*735d7f90SBarry Smith } 70*735d7f90SBarry Smith } 71*735d7f90SBarry Smith } 72*735d7f90SBarry Smith ierr = MatSetPreallocationCOO(*A,3*3*fe->Ne,oor,ooc);CHKERRQ(ierr); 73*735d7f90SBarry Smith ierr = PetscFree2(oor,ooc);CHKERRQ(ierr); 74*735d7f90SBarry Smith 75*735d7f90SBarry Smith /* determine the offset into the COO value array the offset of each element stiffness; there are 9 = 3*3 entries for each element stiffness */ 76*735d7f90SBarry Smith /* for lists of elements with different numbers of degrees of freedom assocated with each element the offsets will not be uniform */ 77*735d7f90SBarry Smith ierr = PetscMalloc1(fe->Ne,&fe->coo);CHKERRQ(ierr); 78*735d7f90SBarry Smith fe->coo[0] = 0; 79*735d7f90SBarry Smith for (PetscInt e=1; e<fe->Ne; e++) { 80*735d7f90SBarry Smith fe->coo[e] = fe->coo[e-1] + 3*3; 81*735d7f90SBarry Smith } 82*735d7f90SBarry Smith PetscFunctionReturn(0); 83*735d7f90SBarry Smith } 84*735d7f90SBarry Smith 85*735d7f90SBarry Smith static PetscErrorCode FillMatrixCPU(FEStruct *fe,Mat A) 86*735d7f90SBarry Smith { 87*735d7f90SBarry Smith PetscErrorCode ierr; 88*735d7f90SBarry Smith PetscScalar s[9]; 89*735d7f90SBarry Smith 90*735d7f90SBarry Smith PetscFunctionBeginUser; 91*735d7f90SBarry Smith /* simulation of traditional PETSc CPU based finite assembly process */ 92*735d7f90SBarry Smith for (PetscInt e=0; e<fe->Ne; e++) { 93*735d7f90SBarry Smith for (PetscInt vi=0; vi<3; vi++) { 94*735d7f90SBarry Smith for (PetscInt vj=0; vj<3; vj++) { 95*735d7f90SBarry Smith s[3*vi+vj] = vi+2*vj; 96*735d7f90SBarry Smith } 97*735d7f90SBarry Smith } 98*735d7f90SBarry Smith ierr = MatSetValues(A,3,fe->vertices + 3*e,3, fe->vertices + 3*e,s,ADD_VALUES);CHKERRQ(ierr); 99*735d7f90SBarry Smith } 100*735d7f90SBarry Smith ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 101*735d7f90SBarry Smith ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 102*735d7f90SBarry Smith PetscFunctionReturn(0); 103*735d7f90SBarry Smith } 104*735d7f90SBarry Smith 105*735d7f90SBarry Smith /* 106*735d7f90SBarry Smith Shows an example of tracking element offsets explicitly, which allows for 107*735d7f90SBarry Smith mixed-topology meshes and combining both volume and surface parts into the weak form. 108*735d7f90SBarry Smith */ 109*735d7f90SBarry Smith static PetscErrorCode FillMatrixCPUCOO(FEStruct *fe,Mat A) 110*735d7f90SBarry Smith { 111*735d7f90SBarry Smith PetscErrorCode ierr; 112*735d7f90SBarry Smith PetscScalar *v,*s; 113*735d7f90SBarry Smith 114*735d7f90SBarry Smith PetscFunctionBeginUser; 115*735d7f90SBarry Smith /* simulation of CPU based finite assembly process with COO */ 116*735d7f90SBarry Smith ierr = PetscMalloc1(3*3*fe->Ne,&v);CHKERRQ(ierr); 117*735d7f90SBarry Smith for (PetscInt e=0; e<fe->Ne; e++) { 118*735d7f90SBarry Smith s = v + fe->coo[e]; /* point to location in COO of current element stiffness */ 119*735d7f90SBarry Smith for (PetscInt vi=0; vi<3; vi++) { 120*735d7f90SBarry Smith for (PetscInt vj=0; vj<3; vj++) { 121*735d7f90SBarry Smith s[3*vi+vj] = vi+2*vj; 122*735d7f90SBarry Smith } 123*735d7f90SBarry Smith } 124*735d7f90SBarry Smith } 125*735d7f90SBarry Smith ierr = MatSetValuesCOO(A,v,ADD_VALUES);CHKERRQ(ierr); 126*735d7f90SBarry Smith ierr = PetscFree(v);CHKERRQ(ierr); 127*735d7f90SBarry Smith PetscFunctionReturn(0); 128*735d7f90SBarry Smith } 129*735d7f90SBarry Smith 130*735d7f90SBarry Smith /* 131*735d7f90SBarry Smith Uses a multi-dimensional indexing technique that works for homogeneous meshes 132*735d7f90SBarry Smith such as single-topology with volume integral only. 133*735d7f90SBarry Smith */ 134*735d7f90SBarry Smith static PetscErrorCode FillMatrixCPUCOO3d(FEStruct *fe,Mat A) 135*735d7f90SBarry Smith { 136*735d7f90SBarry Smith PetscErrorCode ierr; 137*735d7f90SBarry Smith PetscScalar (*s)[3][3]; 138*735d7f90SBarry Smith 139*735d7f90SBarry Smith PetscFunctionBeginUser; 140*735d7f90SBarry Smith /* simulation of CPU based finite assembly process with COO */ 141*735d7f90SBarry Smith ierr = PetscMalloc1(fe->Ne,&s);CHKERRQ(ierr); 142*735d7f90SBarry Smith for (PetscInt e=0; e<fe->Ne; e++) { 143*735d7f90SBarry Smith for (PetscInt vi=0; vi<3; vi++) { 144*735d7f90SBarry Smith for (PetscInt vj=0; vj<3; vj++) { 145*735d7f90SBarry Smith s[e][vi][vj] = vi+2*vj; 146*735d7f90SBarry Smith } 147*735d7f90SBarry Smith } 148*735d7f90SBarry Smith } 149*735d7f90SBarry Smith ierr = MatSetValuesCOO(A,(PetscScalar*)s,INSERT_VALUES);CHKERRQ(ierr); 150*735d7f90SBarry Smith ierr = PetscFree(s);CHKERRQ(ierr); 151*735d7f90SBarry Smith PetscFunctionReturn(0); 152*735d7f90SBarry Smith } 153*735d7f90SBarry Smith 154*735d7f90SBarry Smith int main(int argc, char **args) 155*735d7f90SBarry Smith { 156*735d7f90SBarry Smith Mat A; 157*735d7f90SBarry Smith PetscErrorCode ierr; 158*735d7f90SBarry Smith FEStruct fe; 159*735d7f90SBarry Smith PetscMPIInt size; 160*735d7f90SBarry Smith PetscBool is_kokkos,is_cuda; 161*735d7f90SBarry Smith 162*735d7f90SBarry Smith ierr = PetscInitialize(&argc,&args,(char*)0,help);if (ierr) return ierr; 163*735d7f90SBarry Smith ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRMPI(ierr); 164*735d7f90SBarry Smith if (size > 1) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_WRONG_MPI_SIZE,"Demonstration is only for sequential runs"); 165*735d7f90SBarry Smith 166*735d7f90SBarry Smith ierr = CreateFEStruct(&fe);CHKERRQ(ierr); 167*735d7f90SBarry Smith ierr = CreateMatrix(&fe,&A);CHKERRQ(ierr); 168*735d7f90SBarry Smith 169*735d7f90SBarry Smith ierr = FillMatrixCPU(&fe,A);CHKERRQ(ierr); 170*735d7f90SBarry Smith ierr = MatView(A,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 171*735d7f90SBarry Smith 172*735d7f90SBarry Smith ierr = MatZeroEntries(A);CHKERRQ(ierr); 173*735d7f90SBarry Smith ierr = FillMatrixCPUCOO(&fe,A);CHKERRQ(ierr); 174*735d7f90SBarry Smith ierr = MatView(A,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 175*735d7f90SBarry Smith 176*735d7f90SBarry Smith ierr = MatZeroEntries(A);CHKERRQ(ierr); 177*735d7f90SBarry Smith ierr = FillMatrixCPUCOO3d(&fe,A);CHKERRQ(ierr); 178*735d7f90SBarry Smith ierr = MatView(A,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 179*735d7f90SBarry Smith 180*735d7f90SBarry Smith ierr = MatZeroEntries(A);CHKERRQ(ierr); 181*735d7f90SBarry Smith ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJKOKKOS,&is_kokkos);CHKERRQ(ierr); 182*735d7f90SBarry Smith ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJCUSPARSE,&is_cuda);CHKERRQ(ierr); 183*735d7f90SBarry Smith #if defined(PETSC_HAVE_KOKKOS) 184*735d7f90SBarry Smith if (is_kokkos) {ierr = FillMatrixKokkosCOO(&fe,A);CHKERRQ(ierr);} 185*735d7f90SBarry Smith #endif 186*735d7f90SBarry Smith #if defined(PETSC_HAVE_CUDA) 187*735d7f90SBarry Smith if (is_cuda) {ierr = FillMatrixCUDACOO(&fe,A);CHKERRQ(ierr);} 188*735d7f90SBarry Smith #endif 189*735d7f90SBarry Smith ierr = MatView(A,PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr); 190*735d7f90SBarry Smith 191*735d7f90SBarry Smith ierr = MatDestroy(&A);CHKERRQ(ierr); 192*735d7f90SBarry Smith ierr = DestroyFEStruct(&fe);CHKERRQ(ierr); 193*735d7f90SBarry Smith ierr = PetscFinalize(); 194*735d7f90SBarry Smith return ierr; 195*735d7f90SBarry Smith } 196*735d7f90SBarry Smith 197*735d7f90SBarry Smith /*TEST 198*735d7f90SBarry Smith build: 199*735d7f90SBarry Smith requires: cuda kokkos_kernels 200*735d7f90SBarry Smith depends: ex18cu.cu ex18kok.kokkos.cxx 201*735d7f90SBarry Smith 202*735d7f90SBarry Smith testset: 203*735d7f90SBarry Smith filter: grep -v "type" 204*735d7f90SBarry Smith output_file: output/ex18_1.out 205*735d7f90SBarry Smith 206*735d7f90SBarry Smith test: 207*735d7f90SBarry Smith suffix: kok 208*735d7f90SBarry Smith requires: kokkos_kernels 209*735d7f90SBarry Smith args: -mat_type aijkokkos 210*735d7f90SBarry Smith 211*735d7f90SBarry Smith test: 212*735d7f90SBarry Smith suffix: cuda 213*735d7f90SBarry Smith requires: cuda 214*735d7f90SBarry Smith args: -mat_type aijcusparse 215*735d7f90SBarry Smith 216*735d7f90SBarry Smith TEST*/ 217