From ff5535b51730974b7933dd93e140579f3232a275 Mon Sep 17 00:00:00 2001 From: "Claudius \"keldu\" Holeksa" Date: Thu, 4 Jul 2024 11:45:02 +0200 Subject: Ammending kernel work --- modules/codec/c++/data.hpp | 10 +- modules/remote-sycl/benchmarks/SConscript | 3 +- .../benchmarks/kernel_mixed_precision.cpp | 29 ++++- modules/remote-sycl/benchmarks/mixed_precision.cpp | 139 +++++++++++++++------ modules/remote-sycl/benchmarks/mixed_precision.hpp | 2 +- 5 files changed, 137 insertions(+), 46 deletions(-) (limited to 'modules') diff --git a/modules/codec/c++/data.hpp b/modules/codec/c++/data.hpp index dd70cd9..05bd8e2 100644 --- a/modules/codec/c++/data.hpp +++ b/modules/codec/c++/data.hpp @@ -147,7 +147,7 @@ public: * Casts */ template - data cast_to(){ + data cast_to() const { auto raw_to = static_cast::type>(value_); return {raw_to}; } @@ -170,9 +170,13 @@ public: typename saw::native_data_type::type get() const { return value_.template cast_to().get(); } + + data(const saw::data& val){ + value_ = val.template cast_to(); + } void set(typename saw::native_data_type::type val){ - value_.set(val.template cast_to().get()); + value_.set(static_cast(val)); } data operator*(const data& rhs) const { @@ -182,7 +186,7 @@ public: return {left * right}; } - data operator*(const data& rhs) const { + data operator*(const data& rhs) const { using CalcType = typename native_data_type::type; CalcType left = static_cast(value_.get()); CalcType right = rhs.get(); diff --git a/modules/remote-sycl/benchmarks/SConscript b/modules/remote-sycl/benchmarks/SConscript index 9976e0e..434993c 100644 --- a/modules/remote-sycl/benchmarks/SConscript +++ b/modules/remote-sycl/benchmarks/SConscript @@ -12,10 +12,9 @@ dir_path = Dir('.').abspath # Environment for base library benchmarks_env = env.Clone(); - benchmarks_sycl_env = benchmarks_env.Clone(); benchmarks_sycl_env['CXX'] = 'acpp'; -benchmarks_sycl_env['CXXFLAGS'] += ['-O2']; +benchmarks_sycl_env['CXXFLAGS'] += ['-O3']; benchmarks_env.sources = sorted(glob.glob(dir_path + "/*.cpp")) benchmarks_env.headers = sorted(glob.glob(dir_path + "/*.hpp")) diff --git a/modules/remote-sycl/benchmarks/kernel_mixed_precision.cpp b/modules/remote-sycl/benchmarks/kernel_mixed_precision.cpp index 1c82361..591ded2 100644 --- a/modules/remote-sycl/benchmarks/kernel_mixed_precision.cpp +++ b/modules/remote-sycl/benchmarks/kernel_mixed_precision.cpp @@ -1,6 +1,6 @@ #include "mixed_precision.hpp" -saw::interface listen_mixed_precision(cl::sycl::event& mixed_ev, cl::sycl::event& float64_ev, cl::sycl::event& float32_ev){ +saw::interface listen_mixed_precision(cl::sycl::event& mixed_ev, cl::sycl::event& float64_ev, cl::sycl::event& float32_ev, uint64_t& arithmetic_intensity){ return { /** * Mixed @@ -12,7 +12,14 @@ saw::interface(h); h.parallel_for(cl::sycl::range<1>(in_size), [=] (cl::sycl::id<1> it){ - acc_buff[0u].at(it[0u]) = acc_buff[0u].at(it[0u]) * saw::data{1.7342345}; + saw::data foo = {acc_buff[0u].at(it[0u]).get()}; + for(uint64_t i = 0; i < arithmetic_intensity; ++i){ + if( foo.get() == 1.1e12 ){ + acc_buff[0u].at(it[0u]) = 0.f; + } + foo = foo + foo * saw::data{1.7342345}; + } + acc_buff[0u].at(it[0u]) = foo; }); }); return saw::void_t{}; @@ -23,7 +30,14 @@ saw::interface(h); h.parallel_for(cl::sycl::range<1>(in_size), [=] (cl::sycl::id<1> it){ - acc_buff[0u].at(it[0u]) = acc_buff[0u].at(it[0u]) * saw::data{1.7342345}; + saw::data foo = {acc_buff[0u].at(it[0u]).get()}; + for(uint64_t i = 0; i < arithmetic_intensity; ++i){ + if( foo == saw::data{1.1e12} ){ + acc_buff[0u].at(it[0u]) = 0.f; + } + foo = foo +foo * saw::data{1.7342345}; + } + acc_buff[0u].at(it[0u]) = foo; }); }); return saw::void_t{}; @@ -34,7 +48,14 @@ saw::interface(h); h.parallel_for(cl::sycl::range<1>(in_size), [=] (cl::sycl::id<1> it){ - acc_buff[0u].at(it[0u]) = acc_buff[0u].at(it[0u]) * saw::data{1.7342345f}; + saw::data foo = {acc_buff[0u].at(it[0u]).get()}; + for(uint64_t i = 0; i < arithmetic_intensity; ++i){ + if( foo == saw::data{1.1e12f} ){ + acc_buff[0u].at(it[0u]) = 0.f; + } + foo = foo + foo * saw::data{1.7342345f}; + } + acc_buff[0u].at(it[0u]) = foo; }); }); return saw::void_t{}; diff --git a/modules/remote-sycl/benchmarks/mixed_precision.cpp b/modules/remote-sycl/benchmarks/mixed_precision.cpp index b554a1c..e804f4e 100644 --- a/modules/remote-sycl/benchmarks/mixed_precision.cpp +++ b/modules/remote-sycl/benchmarks/mixed_precision.cpp @@ -3,15 +3,69 @@ #include -int main(){ +int main(int argc, char** argv){ using namespace saw; uint64_t start_test_size = 1024ul * 1024ul; + + if(argc <= 0 || argc >= 256){ + std::cerr<<"Argument size being weird. Got "< args; + args.resize(static_cast(argc)); + for(uint64_t i = 0; i < args.size(); ++i){ + args.at(i) = {argv[i]}; + } + if(args.size() > 1){ + auto& str = args.at(1); + auto ec = std::from_chars(str.data(), str.data() + str.size(), start_test_size); + if(ec.ec != std::errc{}){ + std::cerr<<"Start size is not an int."< 2){ + auto& str = args.at(2); + auto ec = std::from_chars(str.data(), str.data() + str.size(), max_test_size); + if(ec.ec != std::errc{}){ + std::cerr<<"Stop size is not an int."< max_test_size){ + std::cerr<<"Invalid arguments. Stop size is smaller than Start size."< 3){ + auto& str = args.at(3); + auto ec = std::from_chars(str.data(), str.data() + str.size(), runs); + if(ec.ec != std::errc{}){ + std::cerr<<"Run size is not an int."< 4){ + auto& str = args.at(4); + auto ec = std::from_chars(str.data(), str.data() + str.size(), arithmetic_intensity); + if(ec.ec != std::errc{}){ + std::cerr<<"Arithmetic intensity is not an int."< dis{-1.0,1.0}; + std::uniform_real_distribution<> dis{-3.0,-1.0}; saw::event_loop loop; @@ -34,17 +88,18 @@ int main(){ cl::sycl::event float32_ev; cl::sycl::event float64_ev; - auto sycl_iface = listen_mixed_precision(mixed_ev, float64_ev, float32_ev); + auto sycl_iface = listen_mixed_precision(mixed_ev, float64_ev, float32_ev, arithmetic_intensity); data mixed_host_data; data float64_host_data; data float32_host_data; - auto time_eval = [](std::stringstream& sstr, cl::sycl::event& ev){ + auto time_eval = [](uint64_t & current_min_time, cl::sycl::event& ev){ auto end = ev.get_profiling_info(); auto start = ev.get_profiling_info(); - sstr<<(end-start) / 1.0e9; + uint64_t curr_time = (end-start); + current_min_time = std::min(curr_time, current_min_time); }; auto& device = rmt_addr->get_device(); @@ -79,41 +134,53 @@ int main(){ * Benchmark */ std::stringstream sstr; - for(uint64_t test_size = start_test_size; test_size < max_test_size; test_size *= 2ul){ - - (std::cout<<'.').flush(); - - data mixed_host_data; - data float64_host_data; - data float32_host_data; - mixed_host_data = {test_size}; - float64_host_data = {test_size}; - float32_host_data = {test_size}; - for(uint64_t i = 0; i < test_size; ++i){ - double gen_num = dis(e1); - mixed_host_data.at(i) = static_cast(gen_num); - float64_host_data.at(i) = static_cast(gen_num); - float32_host_data.at(i) = static_cast(gen_num); + for(uint64_t test_size = start_test_size; test_size <= max_test_size; test_size *= 2ul){ + uint64_t time_mixed = std::numeric_limits::max(); + uint64_t time_float64 = std::numeric_limits::max(); + uint64_t time_float32 = std::numeric_limits::max(); + for(uint64_t runs_i = 0u; runs_i < runs; ++runs_i){ + + (std::cout<<'.').flush(); + + data mixed_host_data; + data float64_host_data; + data float32_host_data; + + mixed_host_data = {test_size}; + float64_host_data = {test_size}; + float32_host_data = {test_size}; + + for(uint64_t i = 0; i < test_size; ++i){ + double gen_num = dis(e1); + mixed_host_data.at(i) = static_cast(gen_num); + float64_host_data.at(i) = static_cast(gen_num); + float32_host_data.at(i) = static_cast(gen_num); + } + + data mixed_device_data{mixed_host_data}; + data float64_device_data{float64_host_data}; + data float32_device_data{float32_host_data}; + + sycl_iface.template call<"float64_32">(mixed_device_data, &(device.get_handle())); + device.get_handle().wait(); + time_eval(time_mixed, mixed_ev); + sycl_iface.template call<"float64">(float64_device_data, &(device.get_handle())); + device.get_handle().wait(); + time_eval(time_float64, float64_ev); + sycl_iface.template call<"float32">(float32_device_data, &(device.get_handle())); + device.get_handle().wait(); + time_eval(time_float32, float32_ev); } - data mixed_device_data{mixed_host_data}; - data float64_device_data{float64_host_data}; - data float32_device_data{float32_host_data}; - - sstr<(mixed_device_data, &(device.get_handle())); - device.get_handle().wait(); - time_eval(sstr, mixed_ev); + sstr<(float64_device_data, &(device.get_handle())); - device.get_handle().wait(); - time_eval(sstr, float64_ev); + sstr<(float32_device_data, &(device.get_handle())); - device.get_handle().wait(); - time_eval(sstr, float32_ev); - sstr<<'\n'; + sstr<; } -saw::interface listen_mixed_precision(cl::sycl::event& mixed_ev, cl::sycl::event& float64_ev, cl::sycl::event& float32_ev); +saw::interface listen_mixed_precision(cl::sycl::event& mixed_ev, cl::sycl::event& float64_ev, cl::sycl::event& float32_ev, uint64_t& arithmetic_intensity); -- cgit v1.2.3