xref: /libCEED/rust/libceed-sys/c-src/backends/magma/tuning/generate_tuning.py (revision acc0bb127f9d52b89fa0cb7f74c98dc79acc3cb0)
126bdecf3SSebastian Grimberg#!/usr/bin/env python3
226bdecf3SSebastian Grimberg
326bdecf3SSebastian Grimberg# Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC.
426bdecf3SSebastian Grimberg# Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707.
526bdecf3SSebastian Grimberg# All Rights reserved. See files LICENSE and NOTICE for details.
626bdecf3SSebastian Grimberg#
726bdecf3SSebastian Grimberg# This file is part of CEED, a collection of benchmarks, miniapps, software
826bdecf3SSebastian Grimberg# libraries and APIs for efficient high-order finite element and spectral
926bdecf3SSebastian Grimberg# element discretizations for exascale applications. For more information and
1026bdecf3SSebastian Grimberg# source code availability see http://github.com/ceed
1126bdecf3SSebastian Grimberg#
1226bdecf3SSebastian Grimberg# The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
1326bdecf3SSebastian Grimberg# a collaborative effort of two U.S. Department of Energy organizations (Office
1426bdecf3SSebastian Grimberg# of Science and the National Nuclear Security Administration) responsible for
1526bdecf3SSebastian Grimberg# the planning and preparation of a capable exascale ecosystem, including
1626bdecf3SSebastian Grimberg# software, applications, hardware, advanced system engineering and early
1726bdecf3SSebastian Grimberg# testbed platforms, in support of the nation's exascale computing imperative.
1826bdecf3SSebastian Grimberg
1926bdecf3SSebastian Grimbergimport argparse
2026bdecf3SSebastian Grimbergimport os
21*acc0bb12SSebastian Grimbergimport glob
2226bdecf3SSebastian Grimbergimport re
23*acc0bb12SSebastian Grimbergimport shutil
2426bdecf3SSebastian Grimbergimport subprocess
2526bdecf3SSebastian Grimbergimport pandas as pd
2626bdecf3SSebastian Grimbergimport time
2726bdecf3SSebastian Grimberg
2826bdecf3SSebastian Grimbergscript_dir = os.path.dirname(os.path.realpath(__file__))
2926bdecf3SSebastian Grimberg
3026bdecf3SSebastian Grimberg
31*acc0bb12SSebastian Grimbergdef benchmark(nb, build_cmd, backend, log):
32*acc0bb12SSebastian Grimberg    # Build for new NB
33*acc0bb12SSebastian Grimberg    ceed_magma_h = f"{script_dir}/../ceed-magma.h"
34*acc0bb12SSebastian Grimberg    shutil.copyfile(ceed_magma_h, ceed_magma_h + ".backup")
35*acc0bb12SSebastian Grimberg    with open(ceed_magma_h, "r") as f:
3626bdecf3SSebastian Grimberg        data = f.read()
3726bdecf3SSebastian Grimberg        data = re.sub(
38*acc0bb12SSebastian Grimberg            r".*(#define ceed_magma_queue_sync\(\.\.\.\)).*",
39*acc0bb12SSebastian Grimberg            r"\1 " +
40*acc0bb12SSebastian Grimberg            ("hipDeviceSynchronize()" if "hip" in backend else "cudaDeviceSynchronize()"),
4126bdecf3SSebastian Grimberg            data)
42*acc0bb12SSebastian Grimberg    with open(ceed_magma_h, "w") as f:
4326bdecf3SSebastian Grimberg        f.write(data)
44*acc0bb12SSebastian Grimberg
45*acc0bb12SSebastian Grimberg    ceed_magma_gemm_selector_cpp = f"{script_dir}/../ceed-magma-gemm-selector.cpp"
46*acc0bb12SSebastian Grimberg    shutil.copyfile(
47*acc0bb12SSebastian Grimberg        ceed_magma_gemm_selector_cpp,
48*acc0bb12SSebastian Grimberg        ceed_magma_gemm_selector_cpp +
49*acc0bb12SSebastian Grimberg        ".backup")
50*acc0bb12SSebastian Grimberg    with open(ceed_magma_gemm_selector_cpp, "r") as f:
51*acc0bb12SSebastian Grimberg        data = f.read()
52*acc0bb12SSebastian Grimberg        data = re.sub(
53*acc0bb12SSebastian Grimberg            ".*(#define CEED_AUTOTUNE_RTC_NB).*",
54*acc0bb12SSebastian Grimberg            r"\1 " + f"{nb}",
55*acc0bb12SSebastian Grimberg            data)
56*acc0bb12SSebastian Grimberg    with open(ceed_magma_gemm_selector_cpp, "w") as f:
57*acc0bb12SSebastian Grimberg        f.write(data)
58*acc0bb12SSebastian Grimberg
5926bdecf3SSebastian Grimberg    subprocess.run(build_cmd, cwd=f"{script_dir}/../../..")
60*acc0bb12SSebastian Grimberg    subprocess.run(["make", "tuning", "OPT=-O0"], cwd=f"{script_dir}")
61*acc0bb12SSebastian Grimberg    shutil.move(ceed_magma_h + ".backup", ceed_magma_h)
62*acc0bb12SSebastian Grimberg    shutil.move(ceed_magma_gemm_selector_cpp +
63*acc0bb12SSebastian Grimberg                ".backup", ceed_magma_gemm_selector_cpp)
6426bdecf3SSebastian Grimberg
65*acc0bb12SSebastian Grimberg    # Run the benchmark
66*acc0bb12SSebastian Grimberg    with open(log, "w") as f:
67*acc0bb12SSebastian Grimberg        process = subprocess.run(
68*acc0bb12SSebastian Grimberg            [f"{script_dir}/tuning", f"{backend}"], stdout=f, stderr=f)
69*acc0bb12SSebastian Grimberg    csv = pd.read_csv(
70*acc0bb12SSebastian Grimberg        log,
71*acc0bb12SSebastian Grimberg        header=None,
72*acc0bb12SSebastian Grimberg        delim_whitespace=True,
73*acc0bb12SSebastian Grimberg        names=[
74*acc0bb12SSebastian Grimberg            "P",
75*acc0bb12SSebastian Grimberg            "Q",
76*acc0bb12SSebastian Grimberg            "N",
77*acc0bb12SSebastian Grimberg            "Q_COMP",
78*acc0bb12SSebastian Grimberg            "TRANS",
79*acc0bb12SSebastian Grimberg            "MFLOPS"])
80*acc0bb12SSebastian Grimberg    return csv
8126bdecf3SSebastian Grimberg
8226bdecf3SSebastian Grimberg
8326bdecf3SSebastian Grimbergif __name__ == "__main__":
8426bdecf3SSebastian Grimberg    # Command line arguments
8526bdecf3SSebastian Grimberg    parser = argparse.ArgumentParser("MAGMA RTC autotuning")
8626bdecf3SSebastian Grimberg    parser.add_argument(
8726bdecf3SSebastian Grimberg        "-arch",
8826bdecf3SSebastian Grimberg        help="Device architecture name for tuning data",
8926bdecf3SSebastian Grimberg        required=True)
9026bdecf3SSebastian Grimberg    parser.add_argument(
9126bdecf3SSebastian Grimberg        "-max-nb",
9226bdecf3SSebastian Grimberg        help="Maximum block size NB to consider for autotuning",
9326bdecf3SSebastian Grimberg        default=32,
9426bdecf3SSebastian Grimberg        type=int)
9526bdecf3SSebastian Grimberg    parser.add_argument(
9626bdecf3SSebastian Grimberg        "-build-cmd",
9726bdecf3SSebastian Grimberg        help="Command used to build libCEED from the source root directory",
9826bdecf3SSebastian Grimberg        default="make")
99*acc0bb12SSebastian Grimberg    parser.add_argument(
100*acc0bb12SSebastian Grimberg        "-ceed",
101*acc0bb12SSebastian Grimberg        help="Ceed resource specifier",
102*acc0bb12SSebastian Grimberg        default="/cpu/self")
10326bdecf3SSebastian Grimberg    args = parser.parse_args()
10426bdecf3SSebastian Grimberg
10526bdecf3SSebastian Grimberg    for nb in range(1, args.max_nb + 1):
10626bdecf3SSebastian Grimberg        # Run the benchmarks
10726bdecf3SSebastian Grimberg        start = time.perf_counter()
108*acc0bb12SSebastian Grimberg        data_nb = benchmark(nb, args.build_cmd, args.ceed,
109*acc0bb12SSebastian Grimberg                            f"{script_dir}/output-nb-{nb}.txt")
11026bdecf3SSebastian Grimberg        print(
11126bdecf3SSebastian Grimberg            f"Finished benchmarks for NB = {nb}, backend = {args.ceed} ({time.perf_counter() - start} s)")
11226bdecf3SSebastian Grimberg
11326bdecf3SSebastian Grimberg        # Save the data for the highest performing NB
11426bdecf3SSebastian Grimberg        if nb == 1:
11526bdecf3SSebastian Grimberg            data = pd.DataFrame(data_nb)
116*acc0bb12SSebastian Grimberg            data["NB"] = nb
11726bdecf3SSebastian Grimberg        else:
118*acc0bb12SSebastian Grimberg            idx = data_nb["MFLOPS"] > 1.05 * data["MFLOPS"]
119*acc0bb12SSebastian Grimberg            data.loc[idx, "NB"] = nb
120*acc0bb12SSebastian Grimberg            data.loc[idx, "MFLOPS"] = data_nb.loc[idx, "MFLOPS"]
12126bdecf3SSebastian Grimberg
12226bdecf3SSebastian Grimberg    # Print the results
123*acc0bb12SSebastian Grimberg    with open(f"{script_dir}/{args.arch}_rtc.h", "w") as f:
12426bdecf3SSebastian Grimberg        f.write(
12526bdecf3SSebastian Grimberg            "////////////////////////////////////////////////////////////////////////////////\n")
12626bdecf3SSebastian Grimberg        f.write(f"// auto-generated from data on {args.arch}\n\n")
12726bdecf3SSebastian Grimberg
128*acc0bb12SSebastian Grimberg        rows = data.loc[data["TRANS"] == 1].to_string(header=False, index=False, justify="right", columns=[
129*acc0bb12SSebastian Grimberg                                                      "P", "Q", "N", "Q_COMP", "NB"]).split("\n")
13026bdecf3SSebastian Grimberg        f.write(
13126bdecf3SSebastian Grimberg            "////////////////////////////////////////////////////////////////////////////////\n")
13226bdecf3SSebastian Grimberg        f.write(
13326bdecf3SSebastian Grimberg            f"std::vector<std::array<int, RECORD_LENGTH_RTC> > drtc_t_{args.arch}" +
13426bdecf3SSebastian Grimberg            " = {\n")
13526bdecf3SSebastian Grimberg        count = 0
13626bdecf3SSebastian Grimberg        for row in rows:
137*acc0bb12SSebastian Grimberg            f.write("    {" + re.sub(r"([0-9])(\s+)", r"\1,\2", row) +
13826bdecf3SSebastian Grimberg                    ("},\n" if count < len(rows) - 1 else "}\n"))
13926bdecf3SSebastian Grimberg            count += 1
14026bdecf3SSebastian Grimberg        f.write("};\n\n")
14126bdecf3SSebastian Grimberg
142*acc0bb12SSebastian Grimberg        rows = data.loc[data["TRANS"] == 0].to_string(header=False, index=False, justify="right", columns=[
143*acc0bb12SSebastian Grimberg                                                      "P", "Q", "N", "Q_COMP", "NB"]).split("\n")
14426bdecf3SSebastian Grimberg        f.write(
14526bdecf3SSebastian Grimberg            "////////////////////////////////////////////////////////////////////////////////\n")
14626bdecf3SSebastian Grimberg        f.write(
14726bdecf3SSebastian Grimberg            f"std::vector<std::array<int, RECORD_LENGTH_RTC> > drtc_n_{args.arch}" +
14826bdecf3SSebastian Grimberg            " = {\n")
14926bdecf3SSebastian Grimberg        count = 0
15026bdecf3SSebastian Grimberg        for row in rows:
151*acc0bb12SSebastian Grimberg            f.write("    {" + re.sub(r"([0-9])(\s+)", r"\1,\2", row) +
15226bdecf3SSebastian Grimberg                    ("},\n" if count < len(rows) - 1 else "}\n"))
15326bdecf3SSebastian Grimberg            count += 1
15426bdecf3SSebastian Grimberg        f.write("};\n")
155