126bdecf3SSebastian Grimberg#!/usr/bin/env python3 226bdecf3SSebastian Grimberg 326bdecf3SSebastian Grimberg# Copyright (c) 2017-2018, Lawrence Livermore National Security, LLC. 426bdecf3SSebastian Grimberg# Produced at the Lawrence Livermore National Laboratory. LLNL-CODE-734707. 526bdecf3SSebastian Grimberg# All Rights reserved. See files LICENSE and NOTICE for details. 626bdecf3SSebastian Grimberg# 726bdecf3SSebastian Grimberg# This file is part of CEED, a collection of benchmarks, miniapps, software 826bdecf3SSebastian Grimberg# libraries and APIs for efficient high-order finite element and spectral 926bdecf3SSebastian Grimberg# element discretizations for exascale applications. For more information and 1026bdecf3SSebastian Grimberg# source code availability see http://github.com/ceed 1126bdecf3SSebastian Grimberg# 1226bdecf3SSebastian Grimberg# The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, 1326bdecf3SSebastian Grimberg# a collaborative effort of two U.S. Department of Energy organizations (Office 1426bdecf3SSebastian Grimberg# of Science and the National Nuclear Security Administration) responsible for 1526bdecf3SSebastian Grimberg# the planning and preparation of a capable exascale ecosystem, including 1626bdecf3SSebastian Grimberg# software, applications, hardware, advanced system engineering and early 1726bdecf3SSebastian Grimberg# testbed platforms, in support of the nation's exascale computing imperative. 1826bdecf3SSebastian Grimberg 1926bdecf3SSebastian Grimbergimport argparse 2026bdecf3SSebastian Grimbergimport os 21*acc0bb12SSebastian Grimbergimport glob 2226bdecf3SSebastian Grimbergimport re 23*acc0bb12SSebastian Grimbergimport shutil 2426bdecf3SSebastian Grimbergimport subprocess 2526bdecf3SSebastian Grimbergimport pandas as pd 2626bdecf3SSebastian Grimbergimport time 2726bdecf3SSebastian Grimberg 2826bdecf3SSebastian Grimbergscript_dir = os.path.dirname(os.path.realpath(__file__)) 2926bdecf3SSebastian Grimberg 3026bdecf3SSebastian Grimberg 31*acc0bb12SSebastian Grimbergdef benchmark(nb, build_cmd, backend, log): 32*acc0bb12SSebastian Grimberg # Build for new NB 33*acc0bb12SSebastian Grimberg ceed_magma_h = f"{script_dir}/../ceed-magma.h" 34*acc0bb12SSebastian Grimberg shutil.copyfile(ceed_magma_h, ceed_magma_h + ".backup") 35*acc0bb12SSebastian Grimberg with open(ceed_magma_h, "r") as f: 3626bdecf3SSebastian Grimberg data = f.read() 3726bdecf3SSebastian Grimberg data = re.sub( 38*acc0bb12SSebastian Grimberg r".*(#define ceed_magma_queue_sync\(\.\.\.\)).*", 39*acc0bb12SSebastian Grimberg r"\1 " + 40*acc0bb12SSebastian Grimberg ("hipDeviceSynchronize()" if "hip" in backend else "cudaDeviceSynchronize()"), 4126bdecf3SSebastian Grimberg data) 42*acc0bb12SSebastian Grimberg with open(ceed_magma_h, "w") as f: 4326bdecf3SSebastian Grimberg f.write(data) 44*acc0bb12SSebastian Grimberg 45*acc0bb12SSebastian Grimberg ceed_magma_gemm_selector_cpp = f"{script_dir}/../ceed-magma-gemm-selector.cpp" 46*acc0bb12SSebastian Grimberg shutil.copyfile( 47*acc0bb12SSebastian Grimberg ceed_magma_gemm_selector_cpp, 48*acc0bb12SSebastian Grimberg ceed_magma_gemm_selector_cpp + 49*acc0bb12SSebastian Grimberg ".backup") 50*acc0bb12SSebastian Grimberg with open(ceed_magma_gemm_selector_cpp, "r") as f: 51*acc0bb12SSebastian Grimberg data = f.read() 52*acc0bb12SSebastian Grimberg data = re.sub( 53*acc0bb12SSebastian Grimberg ".*(#define CEED_AUTOTUNE_RTC_NB).*", 54*acc0bb12SSebastian Grimberg r"\1 " + f"{nb}", 55*acc0bb12SSebastian Grimberg data) 56*acc0bb12SSebastian Grimberg with open(ceed_magma_gemm_selector_cpp, "w") as f: 57*acc0bb12SSebastian Grimberg f.write(data) 58*acc0bb12SSebastian Grimberg 5926bdecf3SSebastian Grimberg subprocess.run(build_cmd, cwd=f"{script_dir}/../../..") 60*acc0bb12SSebastian Grimberg subprocess.run(["make", "tuning", "OPT=-O0"], cwd=f"{script_dir}") 61*acc0bb12SSebastian Grimberg shutil.move(ceed_magma_h + ".backup", ceed_magma_h) 62*acc0bb12SSebastian Grimberg shutil.move(ceed_magma_gemm_selector_cpp + 63*acc0bb12SSebastian Grimberg ".backup", ceed_magma_gemm_selector_cpp) 6426bdecf3SSebastian Grimberg 65*acc0bb12SSebastian Grimberg # Run the benchmark 66*acc0bb12SSebastian Grimberg with open(log, "w") as f: 67*acc0bb12SSebastian Grimberg process = subprocess.run( 68*acc0bb12SSebastian Grimberg [f"{script_dir}/tuning", f"{backend}"], stdout=f, stderr=f) 69*acc0bb12SSebastian Grimberg csv = pd.read_csv( 70*acc0bb12SSebastian Grimberg log, 71*acc0bb12SSebastian Grimberg header=None, 72*acc0bb12SSebastian Grimberg delim_whitespace=True, 73*acc0bb12SSebastian Grimberg names=[ 74*acc0bb12SSebastian Grimberg "P", 75*acc0bb12SSebastian Grimberg "Q", 76*acc0bb12SSebastian Grimberg "N", 77*acc0bb12SSebastian Grimberg "Q_COMP", 78*acc0bb12SSebastian Grimberg "TRANS", 79*acc0bb12SSebastian Grimberg "MFLOPS"]) 80*acc0bb12SSebastian Grimberg return csv 8126bdecf3SSebastian Grimberg 8226bdecf3SSebastian Grimberg 8326bdecf3SSebastian Grimbergif __name__ == "__main__": 8426bdecf3SSebastian Grimberg # Command line arguments 8526bdecf3SSebastian Grimberg parser = argparse.ArgumentParser("MAGMA RTC autotuning") 8626bdecf3SSebastian Grimberg parser.add_argument( 8726bdecf3SSebastian Grimberg "-arch", 8826bdecf3SSebastian Grimberg help="Device architecture name for tuning data", 8926bdecf3SSebastian Grimberg required=True) 9026bdecf3SSebastian Grimberg parser.add_argument( 9126bdecf3SSebastian Grimberg "-max-nb", 9226bdecf3SSebastian Grimberg help="Maximum block size NB to consider for autotuning", 9326bdecf3SSebastian Grimberg default=32, 9426bdecf3SSebastian Grimberg type=int) 9526bdecf3SSebastian Grimberg parser.add_argument( 9626bdecf3SSebastian Grimberg "-build-cmd", 9726bdecf3SSebastian Grimberg help="Command used to build libCEED from the source root directory", 9826bdecf3SSebastian Grimberg default="make") 99*acc0bb12SSebastian Grimberg parser.add_argument( 100*acc0bb12SSebastian Grimberg "-ceed", 101*acc0bb12SSebastian Grimberg help="Ceed resource specifier", 102*acc0bb12SSebastian Grimberg default="/cpu/self") 10326bdecf3SSebastian Grimberg args = parser.parse_args() 10426bdecf3SSebastian Grimberg 10526bdecf3SSebastian Grimberg for nb in range(1, args.max_nb + 1): 10626bdecf3SSebastian Grimberg # Run the benchmarks 10726bdecf3SSebastian Grimberg start = time.perf_counter() 108*acc0bb12SSebastian Grimberg data_nb = benchmark(nb, args.build_cmd, args.ceed, 109*acc0bb12SSebastian Grimberg f"{script_dir}/output-nb-{nb}.txt") 11026bdecf3SSebastian Grimberg print( 11126bdecf3SSebastian Grimberg f"Finished benchmarks for NB = {nb}, backend = {args.ceed} ({time.perf_counter() - start} s)") 11226bdecf3SSebastian Grimberg 11326bdecf3SSebastian Grimberg # Save the data for the highest performing NB 11426bdecf3SSebastian Grimberg if nb == 1: 11526bdecf3SSebastian Grimberg data = pd.DataFrame(data_nb) 116*acc0bb12SSebastian Grimberg data["NB"] = nb 11726bdecf3SSebastian Grimberg else: 118*acc0bb12SSebastian Grimberg idx = data_nb["MFLOPS"] > 1.05 * data["MFLOPS"] 119*acc0bb12SSebastian Grimberg data.loc[idx, "NB"] = nb 120*acc0bb12SSebastian Grimberg data.loc[idx, "MFLOPS"] = data_nb.loc[idx, "MFLOPS"] 12126bdecf3SSebastian Grimberg 12226bdecf3SSebastian Grimberg # Print the results 123*acc0bb12SSebastian Grimberg with open(f"{script_dir}/{args.arch}_rtc.h", "w") as f: 12426bdecf3SSebastian Grimberg f.write( 12526bdecf3SSebastian Grimberg "////////////////////////////////////////////////////////////////////////////////\n") 12626bdecf3SSebastian Grimberg f.write(f"// auto-generated from data on {args.arch}\n\n") 12726bdecf3SSebastian Grimberg 128*acc0bb12SSebastian Grimberg rows = data.loc[data["TRANS"] == 1].to_string(header=False, index=False, justify="right", columns=[ 129*acc0bb12SSebastian Grimberg "P", "Q", "N", "Q_COMP", "NB"]).split("\n") 13026bdecf3SSebastian Grimberg f.write( 13126bdecf3SSebastian Grimberg "////////////////////////////////////////////////////////////////////////////////\n") 13226bdecf3SSebastian Grimberg f.write( 13326bdecf3SSebastian Grimberg f"std::vector<std::array<int, RECORD_LENGTH_RTC> > drtc_t_{args.arch}" + 13426bdecf3SSebastian Grimberg " = {\n") 13526bdecf3SSebastian Grimberg count = 0 13626bdecf3SSebastian Grimberg for row in rows: 137*acc0bb12SSebastian Grimberg f.write(" {" + re.sub(r"([0-9])(\s+)", r"\1,\2", row) + 13826bdecf3SSebastian Grimberg ("},\n" if count < len(rows) - 1 else "}\n")) 13926bdecf3SSebastian Grimberg count += 1 14026bdecf3SSebastian Grimberg f.write("};\n\n") 14126bdecf3SSebastian Grimberg 142*acc0bb12SSebastian Grimberg rows = data.loc[data["TRANS"] == 0].to_string(header=False, index=False, justify="right", columns=[ 143*acc0bb12SSebastian Grimberg "P", "Q", "N", "Q_COMP", "NB"]).split("\n") 14426bdecf3SSebastian Grimberg f.write( 14526bdecf3SSebastian Grimberg "////////////////////////////////////////////////////////////////////////////////\n") 14626bdecf3SSebastian Grimberg f.write( 14726bdecf3SSebastian Grimberg f"std::vector<std::array<int, RECORD_LENGTH_RTC> > drtc_n_{args.arch}" + 14826bdecf3SSebastian Grimberg " = {\n") 14926bdecf3SSebastian Grimberg count = 0 15026bdecf3SSebastian Grimberg for row in rows: 151*acc0bb12SSebastian Grimberg f.write(" {" + re.sub(r"([0-9])(\s+)", r"\1,\2", row) + 15226bdecf3SSebastian Grimberg ("},\n" if count < len(rows) - 1 else "}\n")) 15326bdecf3SSebastian Grimberg count += 1 15426bdecf3SSebastian Grimberg f.write("};\n") 155