1*1f02d56fSJunchao Zhang static const char help[] = "Test PetscSF with integers and MPIU_2INT \n\n"; 2*1f02d56fSJunchao Zhang 3*1f02d56fSJunchao Zhang #include <petscvec.h> 4*1f02d56fSJunchao Zhang #include <petscsf.h> 5*1f02d56fSJunchao Zhang #include <petscdevice.h> 6*1f02d56fSJunchao Zhang 7*1f02d56fSJunchao Zhang int main(int argc, char *argv[]) 8*1f02d56fSJunchao Zhang { 9*1f02d56fSJunchao Zhang PetscInt n, n2, N = 12; 10*1f02d56fSJunchao Zhang PetscInt *indices; 11*1f02d56fSJunchao Zhang IS ix, iy; 12*1f02d56fSJunchao Zhang VecScatter vscat; 13*1f02d56fSJunchao Zhang Vec x, y; 14*1f02d56fSJunchao Zhang PetscInt rstart, rend; 15*1f02d56fSJunchao Zhang PetscInt *xh, *yh, *xd, *yd; 16*1f02d56fSJunchao Zhang PetscDeviceContext dctx; 17*1f02d56fSJunchao Zhang 18*1f02d56fSJunchao Zhang PetscFunctionBeginUser; 19*1f02d56fSJunchao Zhang PetscCall(PetscInitialize(&argc, &argv, NULL, help)); 20*1f02d56fSJunchao Zhang PetscCall(VecCreateFromOptions(PETSC_COMM_WORLD, NULL, 1, PETSC_DECIDE, N, &x)); 21*1f02d56fSJunchao Zhang PetscCall(VecDuplicate(x, &y)); 22*1f02d56fSJunchao Zhang PetscCall(VecGetLocalSize(x, &n)); 23*1f02d56fSJunchao Zhang 24*1f02d56fSJunchao Zhang PetscCall(VecGetOwnershipRange(x, &rstart, &rend)); 25*1f02d56fSJunchao Zhang PetscCall(ISCreateStride(PETSC_COMM_WORLD, n, rstart, 1, &ix)); 26*1f02d56fSJunchao Zhang PetscCall(PetscMalloc1(n, &indices)); 27*1f02d56fSJunchao Zhang for (int i = rstart; i < rend; i++) indices[i - rstart] = i / 2; 28*1f02d56fSJunchao Zhang PetscCall(ISCreateGeneral(PETSC_COMM_WORLD, n, indices, PETSC_OWN_POINTER, &iy)); 29*1f02d56fSJunchao Zhang // connect y[0] to x[0..1], y[1] to x[2..3], etc 30*1f02d56fSJunchao Zhang PetscCall(VecScatterCreate(y, iy, x, ix, &vscat)); // y has roots, x has leaves 31*1f02d56fSJunchao Zhang 32*1f02d56fSJunchao Zhang PetscCall(PetscDeviceContextGetCurrentContext(&dctx)); 33*1f02d56fSJunchao Zhang 34*1f02d56fSJunchao Zhang // double the allocation since we will use MPIU_2INT later 35*1f02d56fSJunchao Zhang n2 = 2 * n; 36*1f02d56fSJunchao Zhang PetscCall(PetscDeviceMalloc(dctx, PETSC_MEMTYPE_HOST, n2, &xh)); 37*1f02d56fSJunchao Zhang PetscCall(PetscDeviceMalloc(dctx, PETSC_MEMTYPE_HOST, n2, &yh)); 38*1f02d56fSJunchao Zhang PetscCall(PetscDeviceMalloc(dctx, PETSC_MEMTYPE_DEVICE, n2, &xd)); 39*1f02d56fSJunchao Zhang PetscCall(PetscDeviceMalloc(dctx, PETSC_MEMTYPE_DEVICE, n2, &yd)); 40*1f02d56fSJunchao Zhang 41*1f02d56fSJunchao Zhang for (PetscInt i = 0; i < n; i++) { 42*1f02d56fSJunchao Zhang xh[i] = xh[i + n] = i + rstart; 43*1f02d56fSJunchao Zhang yh[i] = yh[i + n] = i + rstart; 44*1f02d56fSJunchao Zhang } 45*1f02d56fSJunchao Zhang PetscCall(PetscDeviceMemcpy(dctx, xd, xh, sizeof(PetscInt) * n2)); 46*1f02d56fSJunchao Zhang PetscCall(PetscDeviceMemcpy(dctx, yd, yh, sizeof(PetscInt) * n2)); 47*1f02d56fSJunchao Zhang 48*1f02d56fSJunchao Zhang PetscCall(PetscSFReduceWithMemTypeBegin(vscat, MPIU_INT, PETSC_MEMTYPE_DEVICE, xd, PETSC_MEMTYPE_DEVICE, yd, MPI_SUM)); 49*1f02d56fSJunchao Zhang PetscCall(PetscSFReduceEnd(vscat, MPIU_INT, xd, yd, MPI_SUM)); 50*1f02d56fSJunchao Zhang PetscCall(PetscDeviceMemcpy(dctx, yh, yd, sizeof(PetscInt) * n)); 51*1f02d56fSJunchao Zhang PetscCall(PetscDeviceContextSynchronize(dctx)); // finish the async memcpy 52*1f02d56fSJunchao Zhang PetscCall(PetscIntView(n, yh, PETSC_VIEWER_STDOUT_WORLD)); 53*1f02d56fSJunchao Zhang 54*1f02d56fSJunchao Zhang PetscCall(PetscSFBcastWithMemTypeBegin(vscat, MPIU_2INT, PETSC_MEMTYPE_DEVICE, yd, PETSC_MEMTYPE_DEVICE, xd, MPI_MINLOC)); 55*1f02d56fSJunchao Zhang PetscCall(PetscSFBcastEnd(vscat, MPIU_2INT, yd, xd, MPI_MINLOC)); 56*1f02d56fSJunchao Zhang PetscCall(PetscDeviceMemcpy(dctx, xh, xd, sizeof(PetscInt) * n2)); 57*1f02d56fSJunchao Zhang PetscCall(PetscDeviceContextSynchronize(dctx)); // finish the async memcpy 58*1f02d56fSJunchao Zhang PetscCall(PetscIntView(n2, xh, PETSC_VIEWER_STDOUT_WORLD)); 59*1f02d56fSJunchao Zhang 60*1f02d56fSJunchao Zhang PetscCall(PetscDeviceFree(dctx, xh)); 61*1f02d56fSJunchao Zhang PetscCall(PetscDeviceFree(dctx, yh)); 62*1f02d56fSJunchao Zhang PetscCall(PetscDeviceFree(dctx, xd)); 63*1f02d56fSJunchao Zhang PetscCall(PetscDeviceFree(dctx, yd)); 64*1f02d56fSJunchao Zhang PetscCall(ISDestroy(&ix)); 65*1f02d56fSJunchao Zhang PetscCall(ISDestroy(&iy)); 66*1f02d56fSJunchao Zhang PetscCall(VecDestroy(&x)); 67*1f02d56fSJunchao Zhang PetscCall(VecDestroy(&y)); 68*1f02d56fSJunchao Zhang PetscCall(VecScatterDestroy(&vscat)); 69*1f02d56fSJunchao Zhang PetscCall(PetscFinalize()); 70*1f02d56fSJunchao Zhang } 71*1f02d56fSJunchao Zhang 72*1f02d56fSJunchao Zhang /*TEST 73*1f02d56fSJunchao Zhang testset: 74*1f02d56fSJunchao Zhang output_file: output/ex23.out 75*1f02d56fSJunchao Zhang nsize: 3 76*1f02d56fSJunchao Zhang 77*1f02d56fSJunchao Zhang test: 78*1f02d56fSJunchao Zhang suffix: 1 79*1f02d56fSJunchao Zhang requires: cuda 80*1f02d56fSJunchao Zhang 81*1f02d56fSJunchao Zhang test: 82*1f02d56fSJunchao Zhang suffix: 2 83*1f02d56fSJunchao Zhang requires: hip 84*1f02d56fSJunchao Zhang 85*1f02d56fSJunchao Zhang test: 86*1f02d56fSJunchao Zhang suffix: 3 87*1f02d56fSJunchao Zhang requires: sycl 88*1f02d56fSJunchao Zhang 89*1f02d56fSJunchao Zhang TEST*/ 90