13d8e8822SJeremy L Thompson // Copyright (c) 2017-2022, Lawrence Livermore National Security, LLC and other CEED contributors. 23d8e8822SJeremy L Thompson // All Rights Reserved. See the top-level LICENSE and NOTICE files for details. 30d0321e0SJeremy L Thompson // 43d8e8822SJeremy L Thompson // SPDX-License-Identifier: BSD-2-Clause 50d0321e0SJeremy L Thompson // 63d8e8822SJeremy L Thompson // This file is part of CEED: http://github.com/ceed 70d0321e0SJeremy L Thompson 849aac155SJeremy L Thompson #include <ceed.h> 90d0321e0SJeremy L Thompson #include <hip/hip_runtime.h> 100d0321e0SJeremy L Thompson 110d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 120d0321e0SJeremy L Thompson // Kernel for set value on device 130d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 149330daecSnbeams __global__ static void setValueK(CeedScalar *__restrict__ vec, CeedSize size, CeedScalar val) { 15*b7453713SJeremy L Thompson CeedSize index = threadIdx.x + (CeedSize)blockDim.x * blockIdx.x; 16*b7453713SJeremy L Thompson 17*b7453713SJeremy L Thompson if (index >= size) return; 18*b7453713SJeremy L Thompson vec[index] = val; 190d0321e0SJeremy L Thompson } 200d0321e0SJeremy L Thompson 210d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 220d0321e0SJeremy L Thompson // Set value on device memory 230d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 249330daecSnbeams extern "C" int CeedDeviceSetValue_Hip(CeedScalar *d_array, CeedSize length, CeedScalar val) { 25*b7453713SJeremy L Thompson const int block_size = 512; 26*b7453713SJeremy L Thompson const CeedSize vec_size = length; 27*b7453713SJeremy L Thompson int grid_size = vec_size / block_size; 280d0321e0SJeremy L Thompson 29*b7453713SJeremy L Thompson if (block_size * grid_size < vec_size) grid_size += 1; 30*b7453713SJeremy L Thompson hipLaunchKernelGGL(setValueK, dim3(grid_size), dim3(block_size), 0, 0, d_array, length, val); 310d0321e0SJeremy L Thompson return 0; 320d0321e0SJeremy L Thompson } 330d0321e0SJeremy L Thompson 340d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 350d0321e0SJeremy L Thompson // Kernel for taking reciprocal 360d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 379330daecSnbeams __global__ static void rcpValueK(CeedScalar *__restrict__ vec, CeedSize size) { 38*b7453713SJeremy L Thompson CeedSize index = threadIdx.x + (CeedSize)blockDim.x * blockIdx.x; 39*b7453713SJeremy L Thompson 40*b7453713SJeremy L Thompson if (index >= size) return; 41*b7453713SJeremy L Thompson if (fabs(vec[index]) > 1E-16) vec[index] = 1. / vec[index]; 420d0321e0SJeremy L Thompson } 430d0321e0SJeremy L Thompson 440d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 450d0321e0SJeremy L Thompson // Take vector reciprocal in device memory 460d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 479330daecSnbeams extern "C" int CeedDeviceReciprocal_Hip(CeedScalar *d_array, CeedSize length) { 48*b7453713SJeremy L Thompson const int block_size = 512; 49*b7453713SJeremy L Thompson const CeedSize vec_size = length; 50*b7453713SJeremy L Thompson int grid_size = vec_size / block_size; 510d0321e0SJeremy L Thompson 52*b7453713SJeremy L Thompson if (block_size * grid_size < vec_size) grid_size += 1; 53*b7453713SJeremy L Thompson hipLaunchKernelGGL(rcpValueK, dim3(grid_size), dim3(block_size), 0, 0, d_array, length); 540d0321e0SJeremy L Thompson return 0; 550d0321e0SJeremy L Thompson } 560d0321e0SJeremy L Thompson 570d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 580d0321e0SJeremy L Thompson // Kernel for scale 590d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 609330daecSnbeams __global__ static void scaleValueK(CeedScalar *__restrict__ x, CeedScalar alpha, CeedSize size) { 61*b7453713SJeremy L Thompson CeedSize index = threadIdx.x + (CeedSize)blockDim.x * blockIdx.x; 62*b7453713SJeremy L Thompson 63*b7453713SJeremy L Thompson if (index >= size) return; 64*b7453713SJeremy L Thompson x[index] *= alpha; 650d0321e0SJeremy L Thompson } 660d0321e0SJeremy L Thompson 670d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 680d0321e0SJeremy L Thompson // Compute x = alpha x on device 690d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 709330daecSnbeams extern "C" int CeedDeviceScale_Hip(CeedScalar *x_array, CeedScalar alpha, CeedSize length) { 71*b7453713SJeremy L Thompson const int block_size = 512; 72*b7453713SJeremy L Thompson const CeedSize vec_size = length; 73*b7453713SJeremy L Thompson int grid_size = vec_size / block_size; 740d0321e0SJeremy L Thompson 75*b7453713SJeremy L Thompson if (block_size * grid_size < vec_size) grid_size += 1; 76*b7453713SJeremy L Thompson hipLaunchKernelGGL(scaleValueK, dim3(grid_size), dim3(block_size), 0, 0, x_array, alpha, length); 770d0321e0SJeremy L Thompson return 0; 780d0321e0SJeremy L Thompson } 790d0321e0SJeremy L Thompson 800d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 810d0321e0SJeremy L Thompson // Kernel for axpy 820d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 839330daecSnbeams __global__ static void axpyValueK(CeedScalar *__restrict__ y, CeedScalar alpha, CeedScalar *__restrict__ x, CeedSize size) { 84*b7453713SJeremy L Thompson CeedSize index = threadIdx.x + (CeedSize)blockDim.x * blockIdx.x; 85*b7453713SJeremy L Thompson 86*b7453713SJeremy L Thompson if (index >= size) return; 87*b7453713SJeremy L Thompson y[index] += alpha * x[index]; 880d0321e0SJeremy L Thompson } 890d0321e0SJeremy L Thompson 900d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 910d0321e0SJeremy L Thompson // Compute y = alpha x + y on device 920d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 939330daecSnbeams extern "C" int CeedDeviceAXPY_Hip(CeedScalar *y_array, CeedScalar alpha, CeedScalar *x_array, CeedSize length) { 94*b7453713SJeremy L Thompson const int block_size = 512; 95*b7453713SJeremy L Thompson const CeedSize vec_size = length; 96*b7453713SJeremy L Thompson int grid_size = vec_size / block_size; 970d0321e0SJeremy L Thompson 98*b7453713SJeremy L Thompson if (block_size * grid_size < vec_size) grid_size += 1; 99*b7453713SJeremy L Thompson hipLaunchKernelGGL(axpyValueK, dim3(grid_size), dim3(block_size), 0, 0, y_array, alpha, x_array, length); 1000d0321e0SJeremy L Thompson return 0; 1010d0321e0SJeremy L Thompson } 1020d0321e0SJeremy L Thompson 1030d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1045fb68f37SKaren (Ren) Stengel // Kernel for axpby 1055fb68f37SKaren (Ren) Stengel //------------------------------------------------------------------------------ 1069330daecSnbeams __global__ static void axpbyValueK(CeedScalar *__restrict__ y, CeedScalar alpha, CeedScalar beta, CeedScalar *__restrict__ x, CeedSize size) { 107*b7453713SJeremy L Thompson CeedSize index = threadIdx.x + (CeedSize)blockDim.x * blockIdx.x; 108*b7453713SJeremy L Thompson 109*b7453713SJeremy L Thompson if (index >= size) return; 110*b7453713SJeremy L Thompson y[index] = beta * y[index]; 111*b7453713SJeremy L Thompson y[index] += alpha * x[index]; 1125fb68f37SKaren (Ren) Stengel } 1135fb68f37SKaren (Ren) Stengel 1145fb68f37SKaren (Ren) Stengel //------------------------------------------------------------------------------ 1155fb68f37SKaren (Ren) Stengel // Compute y = alpha x + beta y on device 1165fb68f37SKaren (Ren) Stengel //------------------------------------------------------------------------------ 1179330daecSnbeams extern "C" int CeedDeviceAXPBY_Hip(CeedScalar *y_array, CeedScalar alpha, CeedScalar beta, CeedScalar *x_array, CeedSize length) { 118*b7453713SJeremy L Thompson const int block_size = 512; 119*b7453713SJeremy L Thompson const CeedSize vec_size = length; 120*b7453713SJeremy L Thompson int grid_size = vec_size / block_size; 1215fb68f37SKaren (Ren) Stengel 122*b7453713SJeremy L Thompson if (block_size * grid_size < vec_size) grid_size += 1; 123*b7453713SJeremy L Thompson hipLaunchKernelGGL(axpbyValueK, dim3(grid_size), dim3(block_size), 0, 0, y_array, alpha, beta, x_array, length); 1245fb68f37SKaren (Ren) Stengel return 0; 1255fb68f37SKaren (Ren) Stengel } 1265fb68f37SKaren (Ren) Stengel 1275fb68f37SKaren (Ren) Stengel //------------------------------------------------------------------------------ 1280d0321e0SJeremy L Thompson // Kernel for pointwise mult 1290d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1309330daecSnbeams __global__ static void pointwiseMultValueK(CeedScalar *__restrict__ w, CeedScalar *x, CeedScalar *__restrict__ y, CeedSize size) { 131*b7453713SJeremy L Thompson CeedSize index = threadIdx.x + (CeedSize)blockDim.x * blockIdx.x; 132*b7453713SJeremy L Thompson 133*b7453713SJeremy L Thompson if (index >= size) return; 134*b7453713SJeremy L Thompson w[index] = x[index] * y[index]; 1350d0321e0SJeremy L Thompson } 1360d0321e0SJeremy L Thompson 1370d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1380d0321e0SJeremy L Thompson // Compute the pointwise multiplication w = x .* y on device 1390d0321e0SJeremy L Thompson //------------------------------------------------------------------------------ 1409330daecSnbeams extern "C" int CeedDevicePointwiseMult_Hip(CeedScalar *w_array, CeedScalar *x_array, CeedScalar *y_array, CeedSize length) { 141*b7453713SJeremy L Thompson const int block_size = 512; 142*b7453713SJeremy L Thompson const CeedSize vec_size = length; 143*b7453713SJeremy L Thompson int grid_size = vec_size / block_size; 1440d0321e0SJeremy L Thompson 145*b7453713SJeremy L Thompson if (block_size * grid_size < vec_size) grid_size += 1; 146*b7453713SJeremy L Thompson hipLaunchKernelGGL(pointwiseMultValueK, dim3(grid_size), dim3(block_size), 0, 0, w_array, x_array, y_array, length); 1470d0321e0SJeremy L Thompson return 0; 1480d0321e0SJeremy L Thompson } 1492a86cc9dSSebastian Grimberg 1502a86cc9dSSebastian Grimberg //------------------------------------------------------------------------------ 151