1735d7f90SBarry Smith #include <petscdevice.h> 2735d7f90SBarry Smith #include "ex18.h" 3735d7f90SBarry Smith 4735d7f90SBarry Smith __global__ void FillValues(PetscInt n, PetscScalar *v) 5735d7f90SBarry Smith { 6735d7f90SBarry Smith PetscInt i = blockIdx.x * blockDim.x + threadIdx.x; 7735d7f90SBarry Smith PetscScalar *s; 8735d7f90SBarry Smith if (i < n) { 9735d7f90SBarry Smith s = &v[3*3*i]; 10735d7f90SBarry Smith for (PetscInt vi=0; vi<3; vi++) { 11735d7f90SBarry Smith for (PetscInt vj=0; vj<3; vj++) { 12735d7f90SBarry Smith s[vi*3+vj] = vi+2*vj; 13735d7f90SBarry Smith } 14735d7f90SBarry Smith } 15735d7f90SBarry Smith } 16735d7f90SBarry Smith } 17735d7f90SBarry Smith 18735d7f90SBarry Smith PetscErrorCode FillMatrixCUDACOO(FEStruct *fe,Mat A) 19735d7f90SBarry Smith { 20735d7f90SBarry Smith PetscScalar *v; 21735d7f90SBarry Smith 22735d7f90SBarry Smith PetscFunctionBeginUser; 23*9566063dSJacob Faibussowitsch PetscCallCUDA(cudaMalloc((void**)&v,3*3*fe->Ne*sizeof(PetscScalar))); 24735d7f90SBarry Smith FillValues<<<(fe->Ne+255)/256,256>>>(fe->Ne,v); 25*9566063dSJacob Faibussowitsch PetscCall(MatSetValuesCOO(A,v,INSERT_VALUES)); 26*9566063dSJacob Faibussowitsch PetscCallCUDA(cudaFree(v)); 27735d7f90SBarry Smith PetscFunctionReturn(0); 28735d7f90SBarry Smith } 29