1735d7f90SBarry Smith static char help[] = "Demonstrates the use of the COO interface to PETSc matrices for finite element computations\n\n"; 2735d7f90SBarry Smith 3735d7f90SBarry Smith /* 4735d7f90SBarry Smith The COO interface for PETSc matrices provides a convenient way to provide finite element element stiffness matrices to PETSc matrix that should work 5735d7f90SBarry Smith well on both CPUs and GPUs. It is an alternative to using MatSetValues() 6735d7f90SBarry Smith 7735d7f90SBarry Smith This example is intended for people who are NOT using DMPLEX or libCEED or any other higher-level infrastructure for finite elements; 8735d7f90SBarry Smith it is only to demonstrate the concepts in a simple way for those people who are interested and for those people who are using PETSc for 9735d7f90SBarry Smith linear algebra solvers but are managing their own finite element process. 10735d7f90SBarry Smith 11735d7f90SBarry Smith Please do NOT use this example as a starting point to writing your own finite element code from scratch! 12735d7f90SBarry Smith 13735d7f90SBarry Smith Each element in this example has three vertices; hence the the usage below needs to be adjusted for elements of a different number of vertices. 14735d7f90SBarry Smith */ 15735d7f90SBarry Smith 16735d7f90SBarry Smith #include <petscmat.h> 17735d7f90SBarry Smith #include "ex18.h" 18735d7f90SBarry Smith 199371c9d4SSatish Balay static PetscErrorCode CreateFEStruct(FEStruct *fe) { 20735d7f90SBarry Smith PetscFunctionBeginUser; 21735d7f90SBarry Smith fe->Nv = 5; 22735d7f90SBarry Smith fe->Ne = 3; 239566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(3 * fe->Ne, &fe->vertices)); 24735d7f90SBarry Smith /* the three vertices associated with each element in order of element */ 25735d7f90SBarry Smith fe->vertices[0 + 0] = 0; 26735d7f90SBarry Smith fe->vertices[0 + 1] = 1; 27735d7f90SBarry Smith fe->vertices[0 + 2] = 2; 28735d7f90SBarry Smith fe->vertices[3 + 0] = 2; 29735d7f90SBarry Smith fe->vertices[3 + 1] = 1; 30735d7f90SBarry Smith fe->vertices[3 + 2] = 3; 31735d7f90SBarry Smith fe->vertices[6 + 0] = 2; 32735d7f90SBarry Smith fe->vertices[6 + 1] = 4; 33735d7f90SBarry Smith fe->vertices[6 + 2] = 3; 34735d7f90SBarry Smith fe->n = 5; 35735d7f90SBarry Smith PetscFunctionReturn(0); 36735d7f90SBarry Smith } 37735d7f90SBarry Smith 389371c9d4SSatish Balay static PetscErrorCode DestroyFEStruct(FEStruct *fe) { 39735d7f90SBarry Smith PetscFunctionBeginUser; 409566063dSJacob Faibussowitsch PetscCall(PetscFree(fe->vertices)); 419566063dSJacob Faibussowitsch PetscCall(PetscFree(fe->coo)); 42735d7f90SBarry Smith PetscFunctionReturn(0); 43735d7f90SBarry Smith } 44735d7f90SBarry Smith 459371c9d4SSatish Balay static PetscErrorCode CreateMatrix(FEStruct *fe, Mat *A) { 46735d7f90SBarry Smith PetscInt *oor, *ooc, cnt = 0; 47735d7f90SBarry Smith 48735d7f90SBarry Smith PetscFunctionBeginUser; 499566063dSJacob Faibussowitsch PetscCall(MatCreate(PETSC_COMM_WORLD, A)); 509566063dSJacob Faibussowitsch PetscCall(MatSetSizes(*A, fe->n, fe->n, PETSC_DECIDE, PETSC_DECIDE)); 519566063dSJacob Faibussowitsch PetscCall(MatSetFromOptions(*A)); 52735d7f90SBarry Smith 53735d7f90SBarry Smith /* determine for each entry in each element stiffness matrix the global row and colum */ 54735d7f90SBarry Smith /* since the element is triangular with piecewise linear basis functions there are three degrees of freedom per element, one for each vertex */ 559566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(3 * 3 * fe->Ne, &oor, 3 * 3 * fe->Ne, &ooc)); 56735d7f90SBarry Smith for (PetscInt e = 0; e < fe->Ne; e++) { 57735d7f90SBarry Smith for (PetscInt vi = 0; vi < 3; vi++) { 58735d7f90SBarry Smith for (PetscInt vj = 0; vj < 3; vj++) { 59735d7f90SBarry Smith oor[cnt] = fe->vertices[3 * e + vi]; 60735d7f90SBarry Smith ooc[cnt++] = fe->vertices[3 * e + vj]; 61735d7f90SBarry Smith } 62735d7f90SBarry Smith } 63735d7f90SBarry Smith } 649566063dSJacob Faibussowitsch PetscCall(MatSetPreallocationCOO(*A, 3 * 3 * fe->Ne, oor, ooc)); 659566063dSJacob Faibussowitsch PetscCall(PetscFree2(oor, ooc)); 66735d7f90SBarry Smith 67735d7f90SBarry Smith /* determine the offset into the COO value array the offset of each element stiffness; there are 9 = 3*3 entries for each element stiffness */ 68735d7f90SBarry Smith /* for lists of elements with different numbers of degrees of freedom assocated with each element the offsets will not be uniform */ 699566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(fe->Ne, &fe->coo)); 70735d7f90SBarry Smith fe->coo[0] = 0; 71*ad540459SPierre Jolivet for (PetscInt e = 1; e < fe->Ne; e++) fe->coo[e] = fe->coo[e - 1] + 3 * 3; 72735d7f90SBarry Smith PetscFunctionReturn(0); 73735d7f90SBarry Smith } 74735d7f90SBarry Smith 759371c9d4SSatish Balay static PetscErrorCode FillMatrixCPU(FEStruct *fe, Mat A) { 76735d7f90SBarry Smith PetscScalar s[9]; 77735d7f90SBarry Smith 78735d7f90SBarry Smith PetscFunctionBeginUser; 79735d7f90SBarry Smith /* simulation of traditional PETSc CPU based finite assembly process */ 80735d7f90SBarry Smith for (PetscInt e = 0; e < fe->Ne; e++) { 81735d7f90SBarry Smith for (PetscInt vi = 0; vi < 3; vi++) { 82*ad540459SPierre Jolivet for (PetscInt vj = 0; vj < 3; vj++) s[3 * vi + vj] = vi + 2 * vj; 83735d7f90SBarry Smith } 849566063dSJacob Faibussowitsch PetscCall(MatSetValues(A, 3, fe->vertices + 3 * e, 3, fe->vertices + 3 * e, s, ADD_VALUES)); 85735d7f90SBarry Smith } 869566063dSJacob Faibussowitsch PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 879566063dSJacob Faibussowitsch PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 88735d7f90SBarry Smith PetscFunctionReturn(0); 89735d7f90SBarry Smith } 90735d7f90SBarry Smith 91735d7f90SBarry Smith /* 92735d7f90SBarry Smith Shows an example of tracking element offsets explicitly, which allows for 93735d7f90SBarry Smith mixed-topology meshes and combining both volume and surface parts into the weak form. 94735d7f90SBarry Smith */ 959371c9d4SSatish Balay static PetscErrorCode FillMatrixCPUCOO(FEStruct *fe, Mat A) { 96735d7f90SBarry Smith PetscScalar *v, *s; 97735d7f90SBarry Smith 98735d7f90SBarry Smith PetscFunctionBeginUser; 99735d7f90SBarry Smith /* simulation of CPU based finite assembly process with COO */ 1009566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(3 * 3 * fe->Ne, &v)); 101735d7f90SBarry Smith for (PetscInt e = 0; e < fe->Ne; e++) { 102735d7f90SBarry Smith s = v + fe->coo[e]; /* point to location in COO of current element stiffness */ 103735d7f90SBarry Smith for (PetscInt vi = 0; vi < 3; vi++) { 104*ad540459SPierre Jolivet for (PetscInt vj = 0; vj < 3; vj++) s[3 * vi + vj] = vi + 2 * vj; 105735d7f90SBarry Smith } 106735d7f90SBarry Smith } 1079566063dSJacob Faibussowitsch PetscCall(MatSetValuesCOO(A, v, ADD_VALUES)); 1089566063dSJacob Faibussowitsch PetscCall(PetscFree(v)); 109735d7f90SBarry Smith PetscFunctionReturn(0); 110735d7f90SBarry Smith } 111735d7f90SBarry Smith 112735d7f90SBarry Smith /* 113735d7f90SBarry Smith Uses a multi-dimensional indexing technique that works for homogeneous meshes 114735d7f90SBarry Smith such as single-topology with volume integral only. 115735d7f90SBarry Smith */ 1169371c9d4SSatish Balay static PetscErrorCode FillMatrixCPUCOO3d(FEStruct *fe, Mat A) { 117735d7f90SBarry Smith PetscScalar(*s)[3][3]; 118735d7f90SBarry Smith 119735d7f90SBarry Smith PetscFunctionBeginUser; 120735d7f90SBarry Smith /* simulation of CPU based finite assembly process with COO */ 1219566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(fe->Ne, &s)); 122735d7f90SBarry Smith for (PetscInt e = 0; e < fe->Ne; e++) { 123735d7f90SBarry Smith for (PetscInt vi = 0; vi < 3; vi++) { 124*ad540459SPierre Jolivet for (PetscInt vj = 0; vj < 3; vj++) s[e][vi][vj] = vi + 2 * vj; 125735d7f90SBarry Smith } 126735d7f90SBarry Smith } 1279566063dSJacob Faibussowitsch PetscCall(MatSetValuesCOO(A, (PetscScalar *)s, INSERT_VALUES)); 1289566063dSJacob Faibussowitsch PetscCall(PetscFree(s)); 129735d7f90SBarry Smith PetscFunctionReturn(0); 130735d7f90SBarry Smith } 131735d7f90SBarry Smith 1329371c9d4SSatish Balay int main(int argc, char **args) { 133735d7f90SBarry Smith Mat A; 134735d7f90SBarry Smith FEStruct fe; 135735d7f90SBarry Smith PetscMPIInt size; 136735d7f90SBarry Smith PetscBool is_kokkos, is_cuda; 137735d7f90SBarry Smith 138327415f7SBarry Smith PetscFunctionBeginUser; 1399566063dSJacob Faibussowitsch PetscCall(PetscInitialize(&argc, &args, (char *)0, help)); 1409566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 14154c59aa7SJacob Faibussowitsch PetscCheck(size <= 1, PETSC_COMM_WORLD, PETSC_ERR_WRONG_MPI_SIZE, "Demonstration is only for sequential runs"); 142735d7f90SBarry Smith 1439566063dSJacob Faibussowitsch PetscCall(CreateFEStruct(&fe)); 1449566063dSJacob Faibussowitsch PetscCall(CreateMatrix(&fe, &A)); 145735d7f90SBarry Smith 1469566063dSJacob Faibussowitsch PetscCall(FillMatrixCPU(&fe, A)); 1479566063dSJacob Faibussowitsch PetscCall(MatView(A, PETSC_VIEWER_STDOUT_WORLD)); 148735d7f90SBarry Smith 1499566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(A)); 1509566063dSJacob Faibussowitsch PetscCall(FillMatrixCPUCOO(&fe, A)); 1519566063dSJacob Faibussowitsch PetscCall(MatView(A, PETSC_VIEWER_STDOUT_WORLD)); 152735d7f90SBarry Smith 1539566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(A)); 1549566063dSJacob Faibussowitsch PetscCall(FillMatrixCPUCOO3d(&fe, A)); 1559566063dSJacob Faibussowitsch PetscCall(MatView(A, PETSC_VIEWER_STDOUT_WORLD)); 156735d7f90SBarry Smith 1579566063dSJacob Faibussowitsch PetscCall(MatZeroEntries(A)); 1589566063dSJacob Faibussowitsch PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJKOKKOS, &is_kokkos)); 1599566063dSJacob Faibussowitsch PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJCUSPARSE, &is_cuda)); 160735d7f90SBarry Smith #if defined(PETSC_HAVE_KOKKOS) 1619566063dSJacob Faibussowitsch if (is_kokkos) PetscCall(FillMatrixKokkosCOO(&fe, A)); 162735d7f90SBarry Smith #endif 163735d7f90SBarry Smith #if defined(PETSC_HAVE_CUDA) 1649566063dSJacob Faibussowitsch if (is_cuda) PetscCall(FillMatrixCUDACOO(&fe, A)); 165735d7f90SBarry Smith #endif 1669566063dSJacob Faibussowitsch PetscCall(MatView(A, PETSC_VIEWER_STDOUT_WORLD)); 167735d7f90SBarry Smith 1689566063dSJacob Faibussowitsch PetscCall(MatDestroy(&A)); 1699566063dSJacob Faibussowitsch PetscCall(DestroyFEStruct(&fe)); 1709566063dSJacob Faibussowitsch PetscCall(PetscFinalize()); 171b122ec5aSJacob Faibussowitsch return 0; 172735d7f90SBarry Smith } 173735d7f90SBarry Smith 174735d7f90SBarry Smith /*TEST 175735d7f90SBarry Smith build: 176735d7f90SBarry Smith requires: cuda kokkos_kernels 177735d7f90SBarry Smith depends: ex18cu.cu ex18kok.kokkos.cxx 178735d7f90SBarry Smith 179735d7f90SBarry Smith testset: 180735d7f90SBarry Smith filter: grep -v "type" 181735d7f90SBarry Smith output_file: output/ex18_1.out 182735d7f90SBarry Smith 183735d7f90SBarry Smith test: 184735d7f90SBarry Smith suffix: kok 185735d7f90SBarry Smith requires: kokkos_kernels 186735d7f90SBarry Smith args: -mat_type aijkokkos 187735d7f90SBarry Smith 188735d7f90SBarry Smith test: 189735d7f90SBarry Smith suffix: cuda 190735d7f90SBarry Smith requires: cuda 191735d7f90SBarry Smith args: -mat_type aijcusparse 192735d7f90SBarry Smith 193735d7f90SBarry Smith TEST*/ 194