xref: /petsc/src/vec/is/sf/tests/ex23.c (revision 1f02d56f9f2a526d4ee4c7eb7b0e3e497fd0602e)
1*1f02d56fSJunchao Zhang static const char help[] = "Test PetscSF with integers and MPIU_2INT \n\n";
2*1f02d56fSJunchao Zhang 
3*1f02d56fSJunchao Zhang #include <petscvec.h>
4*1f02d56fSJunchao Zhang #include <petscsf.h>
5*1f02d56fSJunchao Zhang #include <petscdevice.h>
6*1f02d56fSJunchao Zhang 
7*1f02d56fSJunchao Zhang int main(int argc, char *argv[])
8*1f02d56fSJunchao Zhang {
9*1f02d56fSJunchao Zhang   PetscInt           n, n2, N = 12;
10*1f02d56fSJunchao Zhang   PetscInt          *indices;
11*1f02d56fSJunchao Zhang   IS                 ix, iy;
12*1f02d56fSJunchao Zhang   VecScatter         vscat;
13*1f02d56fSJunchao Zhang   Vec                x, y;
14*1f02d56fSJunchao Zhang   PetscInt           rstart, rend;
15*1f02d56fSJunchao Zhang   PetscInt          *xh, *yh, *xd, *yd;
16*1f02d56fSJunchao Zhang   PetscDeviceContext dctx;
17*1f02d56fSJunchao Zhang 
18*1f02d56fSJunchao Zhang   PetscFunctionBeginUser;
19*1f02d56fSJunchao Zhang   PetscCall(PetscInitialize(&argc, &argv, NULL, help));
20*1f02d56fSJunchao Zhang   PetscCall(VecCreateFromOptions(PETSC_COMM_WORLD, NULL, 1, PETSC_DECIDE, N, &x));
21*1f02d56fSJunchao Zhang   PetscCall(VecDuplicate(x, &y));
22*1f02d56fSJunchao Zhang   PetscCall(VecGetLocalSize(x, &n));
23*1f02d56fSJunchao Zhang 
24*1f02d56fSJunchao Zhang   PetscCall(VecGetOwnershipRange(x, &rstart, &rend));
25*1f02d56fSJunchao Zhang   PetscCall(ISCreateStride(PETSC_COMM_WORLD, n, rstart, 1, &ix));
26*1f02d56fSJunchao Zhang   PetscCall(PetscMalloc1(n, &indices));
27*1f02d56fSJunchao Zhang   for (int i = rstart; i < rend; i++) indices[i - rstart] = i / 2;
28*1f02d56fSJunchao Zhang   PetscCall(ISCreateGeneral(PETSC_COMM_WORLD, n, indices, PETSC_OWN_POINTER, &iy));
29*1f02d56fSJunchao Zhang   // connect y[0] to x[0..1], y[1] to x[2..3], etc
30*1f02d56fSJunchao Zhang   PetscCall(VecScatterCreate(y, iy, x, ix, &vscat)); // y has roots, x has leaves
31*1f02d56fSJunchao Zhang 
32*1f02d56fSJunchao Zhang   PetscCall(PetscDeviceContextGetCurrentContext(&dctx));
33*1f02d56fSJunchao Zhang 
34*1f02d56fSJunchao Zhang   // double the allocation since we will use MPIU_2INT later
35*1f02d56fSJunchao Zhang   n2 = 2 * n;
36*1f02d56fSJunchao Zhang   PetscCall(PetscDeviceMalloc(dctx, PETSC_MEMTYPE_HOST, n2, &xh));
37*1f02d56fSJunchao Zhang   PetscCall(PetscDeviceMalloc(dctx, PETSC_MEMTYPE_HOST, n2, &yh));
38*1f02d56fSJunchao Zhang   PetscCall(PetscDeviceMalloc(dctx, PETSC_MEMTYPE_DEVICE, n2, &xd));
39*1f02d56fSJunchao Zhang   PetscCall(PetscDeviceMalloc(dctx, PETSC_MEMTYPE_DEVICE, n2, &yd));
40*1f02d56fSJunchao Zhang 
41*1f02d56fSJunchao Zhang   for (PetscInt i = 0; i < n; i++) {
42*1f02d56fSJunchao Zhang     xh[i] = xh[i + n] = i + rstart;
43*1f02d56fSJunchao Zhang     yh[i] = yh[i + n] = i + rstart;
44*1f02d56fSJunchao Zhang   }
45*1f02d56fSJunchao Zhang   PetscCall(PetscDeviceMemcpy(dctx, xd, xh, sizeof(PetscInt) * n2));
46*1f02d56fSJunchao Zhang   PetscCall(PetscDeviceMemcpy(dctx, yd, yh, sizeof(PetscInt) * n2));
47*1f02d56fSJunchao Zhang 
48*1f02d56fSJunchao Zhang   PetscCall(PetscSFReduceWithMemTypeBegin(vscat, MPIU_INT, PETSC_MEMTYPE_DEVICE, xd, PETSC_MEMTYPE_DEVICE, yd, MPI_SUM));
49*1f02d56fSJunchao Zhang   PetscCall(PetscSFReduceEnd(vscat, MPIU_INT, xd, yd, MPI_SUM));
50*1f02d56fSJunchao Zhang   PetscCall(PetscDeviceMemcpy(dctx, yh, yd, sizeof(PetscInt) * n));
51*1f02d56fSJunchao Zhang   PetscCall(PetscDeviceContextSynchronize(dctx)); // finish the async memcpy
52*1f02d56fSJunchao Zhang   PetscCall(PetscIntView(n, yh, PETSC_VIEWER_STDOUT_WORLD));
53*1f02d56fSJunchao Zhang 
54*1f02d56fSJunchao Zhang   PetscCall(PetscSFBcastWithMemTypeBegin(vscat, MPIU_2INT, PETSC_MEMTYPE_DEVICE, yd, PETSC_MEMTYPE_DEVICE, xd, MPI_MINLOC));
55*1f02d56fSJunchao Zhang   PetscCall(PetscSFBcastEnd(vscat, MPIU_2INT, yd, xd, MPI_MINLOC));
56*1f02d56fSJunchao Zhang   PetscCall(PetscDeviceMemcpy(dctx, xh, xd, sizeof(PetscInt) * n2));
57*1f02d56fSJunchao Zhang   PetscCall(PetscDeviceContextSynchronize(dctx)); // finish the async memcpy
58*1f02d56fSJunchao Zhang   PetscCall(PetscIntView(n2, xh, PETSC_VIEWER_STDOUT_WORLD));
59*1f02d56fSJunchao Zhang 
60*1f02d56fSJunchao Zhang   PetscCall(PetscDeviceFree(dctx, xh));
61*1f02d56fSJunchao Zhang   PetscCall(PetscDeviceFree(dctx, yh));
62*1f02d56fSJunchao Zhang   PetscCall(PetscDeviceFree(dctx, xd));
63*1f02d56fSJunchao Zhang   PetscCall(PetscDeviceFree(dctx, yd));
64*1f02d56fSJunchao Zhang   PetscCall(ISDestroy(&ix));
65*1f02d56fSJunchao Zhang   PetscCall(ISDestroy(&iy));
66*1f02d56fSJunchao Zhang   PetscCall(VecDestroy(&x));
67*1f02d56fSJunchao Zhang   PetscCall(VecDestroy(&y));
68*1f02d56fSJunchao Zhang   PetscCall(VecScatterDestroy(&vscat));
69*1f02d56fSJunchao Zhang   PetscCall(PetscFinalize());
70*1f02d56fSJunchao Zhang }
71*1f02d56fSJunchao Zhang 
72*1f02d56fSJunchao Zhang /*TEST
73*1f02d56fSJunchao Zhang   testset:
74*1f02d56fSJunchao Zhang     output_file: output/ex23.out
75*1f02d56fSJunchao Zhang     nsize: 3
76*1f02d56fSJunchao Zhang 
77*1f02d56fSJunchao Zhang     test:
78*1f02d56fSJunchao Zhang       suffix: 1
79*1f02d56fSJunchao Zhang       requires: cuda
80*1f02d56fSJunchao Zhang 
81*1f02d56fSJunchao Zhang     test:
82*1f02d56fSJunchao Zhang       suffix: 2
83*1f02d56fSJunchao Zhang       requires: hip
84*1f02d56fSJunchao Zhang 
85*1f02d56fSJunchao Zhang     test:
86*1f02d56fSJunchao Zhang       suffix: 3
87*1f02d56fSJunchao Zhang       requires: sycl
88*1f02d56fSJunchao Zhang 
89*1f02d56fSJunchao Zhang TEST*/
90