197929ea7SJunchao Zhang static char help[] = "Test VecScatterCreateToZero, VecScatterCreateToAll\n\n"; 297929ea7SJunchao Zhang 397929ea7SJunchao Zhang #include <petscvec.h> 4d71ae5a4SJacob Faibussowitsch int main(int argc, char **argv) 5d71ae5a4SJacob Faibussowitsch { 666100624SStefano Zampini PetscInt i, N = 10, n = PETSC_DECIDE, low, high, onlylocal = -1; 797929ea7SJunchao Zhang PetscMPIInt size, rank; 897929ea7SJunchao Zhang Vec x, y; 997929ea7SJunchao Zhang VecScatter vscat; 1097929ea7SJunchao Zhang 11327415f7SBarry Smith PetscFunctionBeginUser; 12*c8025a54SPierre Jolivet PetscCall(PetscInitialize(&argc, &argv, NULL, help)); 139566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size)); 149566063dSJacob Faibussowitsch PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank)); 1566100624SStefano Zampini PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &N, NULL)); 1666100624SStefano Zampini 1766100624SStefano Zampini /* Trigger special case in VecScatterCreateToAll to deal with the one-to-all pattern */ 1866100624SStefano Zampini PetscCall(PetscOptionsGetInt(NULL, NULL, "-onlylocal", &onlylocal, NULL)); 1966100624SStefano Zampini if (onlylocal >= 0 && onlylocal < size) n = (rank == onlylocal ? N : 0); 2097929ea7SJunchao Zhang 219566063dSJacob Faibussowitsch PetscCall(VecCreate(PETSC_COMM_WORLD, &x)); 229566063dSJacob Faibussowitsch PetscCall(VecSetFromOptions(x)); 2366100624SStefano Zampini PetscCall(VecSetSizes(x, n, N)); 249566063dSJacob Faibussowitsch PetscCall(VecGetOwnershipRange(x, &low, &high)); 259566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)x, "x")); 2697929ea7SJunchao Zhang 2797929ea7SJunchao Zhang /*-------------------------------------*/ 2897929ea7SJunchao Zhang /* VecScatterCreateToZero */ 2997929ea7SJunchao Zhang /*-------------------------------------*/ 3097929ea7SJunchao Zhang 3197929ea7SJunchao Zhang /* MPI vec x = [0, 1, 2, .., N-1] */ 329566063dSJacob Faibussowitsch for (i = low; i < high; i++) PetscCall(VecSetValue(x, i, (PetscScalar)i, INSERT_VALUES)); 339566063dSJacob Faibussowitsch PetscCall(VecAssemblyBegin(x)); 349566063dSJacob Faibussowitsch PetscCall(VecAssemblyEnd(x)); 3597929ea7SJunchao Zhang 369566063dSJacob Faibussowitsch PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\nTesting VecScatterCreateToZero\n")); 379566063dSJacob Faibussowitsch PetscCall(VecScatterCreateToZero(x, &vscat, &y)); 389566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)y, "y")); 3997929ea7SJunchao Zhang 4097929ea7SJunchao Zhang /* Test PetscSFBcastAndOp with op = MPI_REPLACE, which does y = x on rank 0 */ 419566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 429566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 439566063dSJacob Faibussowitsch if (rank == 0) PetscCall(VecView(y, PETSC_VIEWER_STDOUT_SELF)); 4497929ea7SJunchao Zhang 4597929ea7SJunchao Zhang /* Test PetscSFBcastAndOp with op = MPI_SUM, which does y += x */ 469566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vscat, x, y, ADD_VALUES, SCATTER_FORWARD)); 479566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vscat, x, y, ADD_VALUES, SCATTER_FORWARD)); 489566063dSJacob Faibussowitsch if (rank == 0) PetscCall(VecView(y, PETSC_VIEWER_STDOUT_SELF)); 4997929ea7SJunchao Zhang 5097929ea7SJunchao Zhang /* Test PetscSFReduce with op = MPI_REPLACE, which does x = y */ 519566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vscat, y, x, INSERT_VALUES, SCATTER_REVERSE)); 529566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vscat, y, x, INSERT_VALUES, SCATTER_REVERSE)); 539566063dSJacob Faibussowitsch PetscCall(VecView(x, PETSC_VIEWER_STDOUT_WORLD)); 5497929ea7SJunchao Zhang 5597929ea7SJunchao Zhang /* Test PetscSFReduce with op = MPI_SUM, which does x += y on x's local part on rank 0*/ 569566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vscat, y, x, ADD_VALUES, SCATTER_REVERSE_LOCAL)); 579566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vscat, y, x, ADD_VALUES, SCATTER_REVERSE_LOCAL)); 589566063dSJacob Faibussowitsch PetscCall(VecView(x, PETSC_VIEWER_STDOUT_WORLD)); 5997929ea7SJunchao Zhang 609566063dSJacob Faibussowitsch PetscCall(VecDestroy(&y)); 619566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vscat)); 6297929ea7SJunchao Zhang 6397929ea7SJunchao Zhang /*-------------------------------------*/ 6497929ea7SJunchao Zhang /* VecScatterCreateToAll */ 6597929ea7SJunchao Zhang /*-------------------------------------*/ 669566063dSJacob Faibussowitsch for (i = low; i < high; i++) PetscCall(VecSetValue(x, i, (PetscScalar)i, INSERT_VALUES)); 679566063dSJacob Faibussowitsch PetscCall(VecAssemblyBegin(x)); 689566063dSJacob Faibussowitsch PetscCall(VecAssemblyEnd(x)); 6997929ea7SJunchao Zhang 709566063dSJacob Faibussowitsch PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\nTesting VecScatterCreateToAll\n")); 7197929ea7SJunchao Zhang 729566063dSJacob Faibussowitsch PetscCall(VecScatterCreateToAll(x, &vscat, &y)); 739566063dSJacob Faibussowitsch PetscCall(PetscObjectSetName((PetscObject)y, "y")); 7497929ea7SJunchao Zhang 7597929ea7SJunchao Zhang /* Test PetscSFBcastAndOp with op = MPI_REPLACE, which does y = x on all ranks */ 769566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 779566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vscat, x, y, INSERT_VALUES, SCATTER_FORWARD)); 789566063dSJacob Faibussowitsch if (rank == 0) PetscCall(VecView(y, PETSC_VIEWER_STDOUT_SELF)); 7997929ea7SJunchao Zhang 8097929ea7SJunchao Zhang /* Test PetscSFBcastAndOp with op = MPI_SUM, which does y += x */ 819566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vscat, x, y, ADD_VALUES, SCATTER_FORWARD)); 829566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vscat, x, y, ADD_VALUES, SCATTER_FORWARD)); 839566063dSJacob Faibussowitsch if (rank == 0) PetscCall(VecView(y, PETSC_VIEWER_STDOUT_SELF)); 8497929ea7SJunchao Zhang 8597929ea7SJunchao Zhang /* Test PetscSFReduce with op = MPI_REPLACE, which does x = y */ 869566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vscat, y, x, INSERT_VALUES, SCATTER_REVERSE)); 879566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vscat, y, x, INSERT_VALUES, SCATTER_REVERSE)); 889566063dSJacob Faibussowitsch PetscCall(VecView(x, PETSC_VIEWER_STDOUT_WORLD)); 8997929ea7SJunchao Zhang 9097929ea7SJunchao Zhang /* Test PetscSFReduce with op = MPI_SUM, which does x += size*y */ 919566063dSJacob Faibussowitsch PetscCall(VecScatterBegin(vscat, y, x, ADD_VALUES, SCATTER_REVERSE)); 929566063dSJacob Faibussowitsch PetscCall(VecScatterEnd(vscat, y, x, ADD_VALUES, SCATTER_REVERSE)); 939566063dSJacob Faibussowitsch PetscCall(VecView(x, PETSC_VIEWER_STDOUT_WORLD)); 949566063dSJacob Faibussowitsch PetscCall(VecDestroy(&x)); 959566063dSJacob Faibussowitsch PetscCall(VecDestroy(&y)); 969566063dSJacob Faibussowitsch PetscCall(VecScatterDestroy(&vscat)); 9797929ea7SJunchao Zhang 989566063dSJacob Faibussowitsch PetscCall(PetscFinalize()); 99b122ec5aSJacob Faibussowitsch return 0; 10097929ea7SJunchao Zhang } 10197929ea7SJunchao Zhang 10297929ea7SJunchao Zhang /*TEST 10397929ea7SJunchao Zhang 10497929ea7SJunchao Zhang testset: 10597929ea7SJunchao Zhang # N=10 is divisible by nsize, to trigger Allgather/Gather in SF 10697929ea7SJunchao Zhang nsize: 2 10726e8e884SScott Kruger # Exact numbers really matter here 10826e8e884SScott Kruger diff_args: -j 10997929ea7SJunchao Zhang filter: grep -v "type" 11097929ea7SJunchao Zhang output_file: output/ex8_1.out 11197929ea7SJunchao Zhang 11297929ea7SJunchao Zhang test: 11397929ea7SJunchao Zhang suffix: 1_standard 11497929ea7SJunchao Zhang 11597929ea7SJunchao Zhang test: 11697929ea7SJunchao Zhang suffix: 1_cuda 11726e8e884SScott Kruger # sf_backend cuda is not needed if compiling only with cuda 118bd46da1dSJunchao Zhang args: -vec_type cuda -sf_backend cuda 11997929ea7SJunchao Zhang requires: cuda 12097929ea7SJunchao Zhang 12197929ea7SJunchao Zhang test: 12226e8e884SScott Kruger suffix: 1_hip 123bd46da1dSJunchao Zhang args: -vec_type hip -sf_backend hip 12426e8e884SScott Kruger requires: hip 12526e8e884SScott Kruger 12626e8e884SScott Kruger test: 12797929ea7SJunchao Zhang suffix: 1_cuda_aware_mpi 12826e8e884SScott Kruger # sf_backend cuda is not needed if compiling only with cuda 129bd46da1dSJunchao Zhang args: -vec_type cuda -sf_backend cuda 130dfd57a17SPierre Jolivet requires: cuda defined(PETSC_HAVE_MPI_GPU_AWARE) 13197929ea7SJunchao Zhang 13297929ea7SJunchao Zhang testset: 13397929ea7SJunchao Zhang # N=10 is not divisible by nsize, to trigger Allgatherv/Gatherv in SF 13497929ea7SJunchao Zhang nsize: 3 13526e8e884SScott Kruger # Exact numbers really matter here 13626e8e884SScott Kruger diff_args: -j 13766100624SStefano Zampini filter: grep -v "type" | grep -v "Process " 13897929ea7SJunchao Zhang output_file: output/ex8_2.out 13997929ea7SJunchao Zhang 14097929ea7SJunchao Zhang test: 14197929ea7SJunchao Zhang suffix: 2_standard 14297929ea7SJunchao Zhang 14397929ea7SJunchao Zhang test: 14497929ea7SJunchao Zhang suffix: 2_cuda 14526e8e884SScott Kruger # sf_backend cuda is not needed if compiling only with cuda 146bd46da1dSJunchao Zhang args: -vec_type cuda -sf_backend cuda 14797929ea7SJunchao Zhang requires: cuda 14897929ea7SJunchao Zhang 14997929ea7SJunchao Zhang test: 15026e8e884SScott Kruger suffix: 2_hip 15126e8e884SScott Kruger # sf_backend hip is not needed if compiling only with hip 152bd46da1dSJunchao Zhang args: -vec_type hip -sf_backend hip 15326e8e884SScott Kruger requires: hip 15426e8e884SScott Kruger 15526e8e884SScott Kruger test: 15697929ea7SJunchao Zhang suffix: 2_cuda_aware_mpi 157bd46da1dSJunchao Zhang args: -vec_type cuda 158dfd57a17SPierre Jolivet requires: cuda defined(PETSC_HAVE_MPI_GPU_AWARE) 15997929ea7SJunchao Zhang 16066100624SStefano Zampini testset: 16166100624SStefano Zampini # trigger one-to-all pattern in Allgatherv 16266100624SStefano Zampini nsize: 3 16366100624SStefano Zampini diff_args: -j 16466100624SStefano Zampini filter: grep -v "type" | grep -v "Process " 16566100624SStefano Zampini output_file: output/ex8_3.out 16666100624SStefano Zampini args: -onlylocal 1 16766100624SStefano Zampini 16866100624SStefano Zampini test: 16966100624SStefano Zampini suffix: 2_standard_onetoall 17066100624SStefano Zampini 17166100624SStefano Zampini test: 17266100624SStefano Zampini suffix: 2_cuda_onetoall 17366100624SStefano Zampini # sf_backend cuda is not needed if compiling only with cuda 17466100624SStefano Zampini args: -vec_type cuda -sf_backend cuda 17566100624SStefano Zampini requires: cuda 17666100624SStefano Zampini 17766100624SStefano Zampini test: 17866100624SStefano Zampini suffix: 2_hip_onetoall 17966100624SStefano Zampini # sf_backend hip is not needed if compiling only with hip 18066100624SStefano Zampini args: -vec_type hip -sf_backend hip 18166100624SStefano Zampini requires: hip 18266100624SStefano Zampini 18366100624SStefano Zampini test: 18466100624SStefano Zampini suffix: 2_cuda_aware_mpi_onetoall 18566100624SStefano Zampini args: -vec_type cuda 18666100624SStefano Zampini requires: cuda defined(PETSC_HAVE_MPI_GPU_AWARE) 18766100624SStefano Zampini 18897929ea7SJunchao Zhang TEST*/ 189