1*735d7f90SBarry Smith #include <cuda_runtime.h> 2*735d7f90SBarry Smith #include <petscdevice.h> 3*735d7f90SBarry Smith #include "ex18.h" 4*735d7f90SBarry Smith 5*735d7f90SBarry Smith __global__ void FillValues(PetscInt n, PetscScalar *v) 6*735d7f90SBarry Smith { 7*735d7f90SBarry Smith PetscInt i = blockIdx.x * blockDim.x + threadIdx.x; 8*735d7f90SBarry Smith PetscScalar *s; 9*735d7f90SBarry Smith if (i < n) { 10*735d7f90SBarry Smith s = &v[3*3*i]; 11*735d7f90SBarry Smith for (PetscInt vi=0; vi<3; vi++) { 12*735d7f90SBarry Smith for (PetscInt vj=0; vj<3; vj++) { 13*735d7f90SBarry Smith s[vi*3+vj] = vi+2*vj; 14*735d7f90SBarry Smith } 15*735d7f90SBarry Smith } 16*735d7f90SBarry Smith } 17*735d7f90SBarry Smith } 18*735d7f90SBarry Smith 19*735d7f90SBarry Smith PetscErrorCode FillMatrixCUDACOO(FEStruct *fe,Mat A) 20*735d7f90SBarry Smith { 21*735d7f90SBarry Smith PetscErrorCode ierr; 22*735d7f90SBarry Smith cudaError_t cerr; 23*735d7f90SBarry Smith PetscScalar *v; 24*735d7f90SBarry Smith 25*735d7f90SBarry Smith PetscFunctionBeginUser; 26*735d7f90SBarry Smith cerr = cudaMalloc((void**)&v,3*3*fe->Ne*sizeof(PetscScalar));CHKERRCUDA(cerr); 27*735d7f90SBarry Smith FillValues<<<(fe->Ne+255)/256,256>>>(fe->Ne,v); 28*735d7f90SBarry Smith ierr = MatSetValuesCOO(A,v,INSERT_VALUES);CHKERRQ(ierr); 29*735d7f90SBarry Smith cerr = cudaFree(v);CHKERRCUDA(cerr); 30*735d7f90SBarry Smith PetscFunctionReturn(0); 31*735d7f90SBarry Smith } 32