diff options
author | Claudius "keldu" Holeksa <mail@keldu.de> | 2024-07-04 11:45:02 +0200 |
---|---|---|
committer | Claudius "keldu" Holeksa <mail@keldu.de> | 2024-07-04 11:45:02 +0200 |
commit | ff5535b51730974b7933dd93e140579f3232a275 (patch) | |
tree | 967521f8aa7c208475dae63fc82eaff7e52bfb7b /modules/remote-sycl | |
parent | fe49d9fcac2f0e45d998abc1909c1a3e35ec83ce (diff) |
Ammending kernel work
Diffstat (limited to 'modules/remote-sycl')
-rw-r--r-- | modules/remote-sycl/benchmarks/SConscript | 3 | ||||
-rw-r--r-- | modules/remote-sycl/benchmarks/kernel_mixed_precision.cpp | 29 | ||||
-rw-r--r-- | modules/remote-sycl/benchmarks/mixed_precision.cpp | 139 | ||||
-rw-r--r-- | modules/remote-sycl/benchmarks/mixed_precision.hpp | 2 |
4 files changed, 130 insertions, 43 deletions
diff --git a/modules/remote-sycl/benchmarks/SConscript b/modules/remote-sycl/benchmarks/SConscript index 9976e0e..434993c 100644 --- a/modules/remote-sycl/benchmarks/SConscript +++ b/modules/remote-sycl/benchmarks/SConscript @@ -12,10 +12,9 @@ dir_path = Dir('.').abspath # Environment for base library benchmarks_env = env.Clone(); - benchmarks_sycl_env = benchmarks_env.Clone(); benchmarks_sycl_env['CXX'] = 'acpp'; -benchmarks_sycl_env['CXXFLAGS'] += ['-O2']; +benchmarks_sycl_env['CXXFLAGS'] += ['-O3']; benchmarks_env.sources = sorted(glob.glob(dir_path + "/*.cpp")) benchmarks_env.headers = sorted(glob.glob(dir_path + "/*.hpp")) diff --git a/modules/remote-sycl/benchmarks/kernel_mixed_precision.cpp b/modules/remote-sycl/benchmarks/kernel_mixed_precision.cpp index 1c82361..591ded2 100644 --- a/modules/remote-sycl/benchmarks/kernel_mixed_precision.cpp +++ b/modules/remote-sycl/benchmarks/kernel_mixed_precision.cpp @@ -1,6 +1,6 @@ #include "mixed_precision.hpp" -saw::interface<sch::MixedPrecisionBenchmarkInterface, saw::encode::Native, saw::rmt::Sycl, cl::sycl::queue*> listen_mixed_precision(cl::sycl::event& mixed_ev, cl::sycl::event& float64_ev, cl::sycl::event& float32_ev){ +saw::interface<sch::MixedPrecisionBenchmarkInterface, saw::encode::Native, saw::rmt::Sycl, cl::sycl::queue*> listen_mixed_precision(cl::sycl::event& mixed_ev, cl::sycl::event& float64_ev, cl::sycl::event& float32_ev, uint64_t& arithmetic_intensity){ return { /** * Mixed @@ -12,7 +12,14 @@ saw::interface<sch::MixedPrecisionBenchmarkInterface, saw::encode::Native, saw:: auto acc_buff = in.template access<cl::sycl::access::mode::read_write>(h); h.parallel_for(cl::sycl::range<1>(in_size), [=] (cl::sycl::id<1> it){ - acc_buff[0u].at(it[0u]) = acc_buff[0u].at(it[0u]) * saw::data<sch::Float64>{1.7342345}; + saw::data<sch::Float64> foo = {acc_buff[0u].at(it[0u]).get()}; + for(uint64_t i = 0; i < arithmetic_intensity; ++i){ + if( foo.get() == 1.1e12 ){ + acc_buff[0u].at(it[0u]) = 0.f; + } + foo = foo + foo * saw::data<sch::Float64>{1.7342345}; + } + acc_buff[0u].at(it[0u]) = foo; }); }); return saw::void_t{}; @@ -23,7 +30,14 @@ saw::interface<sch::MixedPrecisionBenchmarkInterface, saw::encode::Native, saw:: auto acc_buff = in.template access<cl::sycl::access::mode::read_write>(h); h.parallel_for(cl::sycl::range<1>(in_size), [=] (cl::sycl::id<1> it){ - acc_buff[0u].at(it[0u]) = acc_buff[0u].at(it[0u]) * saw::data<sch::Float64>{1.7342345}; + saw::data<sch::Float64> foo = {acc_buff[0u].at(it[0u]).get()}; + for(uint64_t i = 0; i < arithmetic_intensity; ++i){ + if( foo == saw::data<sch::Float64>{1.1e12} ){ + acc_buff[0u].at(it[0u]) = 0.f; + } + foo = foo +foo * saw::data<sch::Float64>{1.7342345}; + } + acc_buff[0u].at(it[0u]) = foo; }); }); return saw::void_t{}; @@ -34,7 +48,14 @@ saw::interface<sch::MixedPrecisionBenchmarkInterface, saw::encode::Native, saw:: auto acc_buff = in.template access<cl::sycl::access::mode::read_write>(h); h.parallel_for(cl::sycl::range<1>(in_size), [=] (cl::sycl::id<1> it){ - acc_buff[0u].at(it[0u]) = acc_buff[0u].at(it[0u]) * saw::data<sch::Float32>{1.7342345f}; + saw::data<sch::Float32> foo = {acc_buff[0u].at(it[0u]).get()}; + for(uint64_t i = 0; i < arithmetic_intensity; ++i){ + if( foo == saw::data<sch::Float32>{1.1e12f} ){ + acc_buff[0u].at(it[0u]) = 0.f; + } + foo = foo + foo * saw::data<sch::Float32>{1.7342345f}; + } + acc_buff[0u].at(it[0u]) = foo; }); }); return saw::void_t{}; diff --git a/modules/remote-sycl/benchmarks/mixed_precision.cpp b/modules/remote-sycl/benchmarks/mixed_precision.cpp index b554a1c..e804f4e 100644 --- a/modules/remote-sycl/benchmarks/mixed_precision.cpp +++ b/modules/remote-sycl/benchmarks/mixed_precision.cpp @@ -3,15 +3,69 @@ #include <sstream> -int main(){ +int main(int argc, char** argv){ using namespace saw; uint64_t start_test_size = 1024ul * 1024ul; + + if(argc <= 0 || argc >= 256){ + std::cerr<<"Argument size being weird. Got "<<argc<<" args"<<std::endl; + return -1; + } + + std::vector<std::string_view> args; + args.resize(static_cast<uint64_t>(argc)); + for(uint64_t i = 0; i < args.size(); ++i){ + args.at(i) = {argv[i]}; + } + if(args.size() > 1){ + auto& str = args.at(1); + auto ec = std::from_chars(str.data(), str.data() + str.size(), start_test_size); + if(ec.ec != std::errc{}){ + std::cerr<<"Start size is not an int."<<std::endl; + return -1; + } + } uint64_t max_test_size = start_test_size * 1024ul; + + if(args.size() > 2){ + auto& str = args.at(2); + auto ec = std::from_chars(str.data(), str.data() + str.size(), max_test_size); + if(ec.ec != std::errc{}){ + std::cerr<<"Stop size is not an int."<<std::endl; + return -1; + } + } + + if(start_test_size > max_test_size){ + std::cerr<<"Invalid arguments. Stop size is smaller than Start size."<<std::endl; + return -1; + } + + uint64_t runs = 128ul; + + if(args.size() > 3){ + auto& str = args.at(3); + auto ec = std::from_chars(str.data(), str.data() + str.size(), runs); + if(ec.ec != std::errc{}){ + std::cerr<<"Run size is not an int."<<std::endl; + return -1; + } + } + uint64_t arithmetic_intensity = 1u; + if(args.size() > 4){ + auto& str = args.at(4); + auto ec = std::from_chars(str.data(), str.data() + str.size(), arithmetic_intensity); + if(ec.ec != std::errc{}){ + std::cerr<<"Arithmetic intensity is not an int."<<std::endl; + return -1; + } + } + std::random_device r; std::default_random_engine e1{r()}; - std::uniform_real_distribution<> dis{-1.0,1.0}; + std::uniform_real_distribution<> dis{-3.0,-1.0}; saw::event_loop loop; @@ -34,17 +88,18 @@ int main(){ cl::sycl::event float32_ev; cl::sycl::event float64_ev; - auto sycl_iface = listen_mixed_precision(mixed_ev, float64_ev, float32_ev); + auto sycl_iface = listen_mixed_precision(mixed_ev, float64_ev, float32_ev, arithmetic_intensity); data<sch::MixedArray> mixed_host_data; data<sch::Float64Array> float64_host_data; data<sch::Float32Array> float32_host_data; - auto time_eval = [](std::stringstream& sstr, cl::sycl::event& ev){ + auto time_eval = [](uint64_t & current_min_time, cl::sycl::event& ev){ auto end = ev.get_profiling_info<cl::sycl::info::event_profiling::command_end>(); auto start = ev.get_profiling_info<cl::sycl::info::event_profiling::command_start>(); - sstr<<(end-start) / 1.0e9; + uint64_t curr_time = (end-start); + current_min_time = std::min(curr_time, current_min_time); }; auto& device = rmt_addr->get_device(); @@ -79,41 +134,53 @@ int main(){ * Benchmark */ std::stringstream sstr; - for(uint64_t test_size = start_test_size; test_size < max_test_size; test_size *= 2ul){ - - (std::cout<<'.').flush(); - - data<sch::MixedArray> mixed_host_data; - data<sch::Float64Array> float64_host_data; - data<sch::Float32Array> float32_host_data; - mixed_host_data = {test_size}; - float64_host_data = {test_size}; - float32_host_data = {test_size}; - for(uint64_t i = 0; i < test_size; ++i){ - double gen_num = dis(e1); - mixed_host_data.at(i) = static_cast<double>(gen_num); - float64_host_data.at(i) = static_cast<double>(gen_num); - float32_host_data.at(i) = static_cast<float>(gen_num); + for(uint64_t test_size = start_test_size; test_size <= max_test_size; test_size *= 2ul){ + uint64_t time_mixed = std::numeric_limits<uint64_t>::max(); + uint64_t time_float64 = std::numeric_limits<uint64_t>::max(); + uint64_t time_float32 = std::numeric_limits<uint64_t>::max(); + for(uint64_t runs_i = 0u; runs_i < runs; ++runs_i){ + + (std::cout<<'.').flush(); + + data<sch::MixedArray> mixed_host_data; + data<sch::Float64Array> float64_host_data; + data<sch::Float32Array> float32_host_data; + + mixed_host_data = {test_size}; + float64_host_data = {test_size}; + float32_host_data = {test_size}; + + for(uint64_t i = 0; i < test_size; ++i){ + double gen_num = dis(e1); + mixed_host_data.at(i) = static_cast<double>(gen_num); + float64_host_data.at(i) = static_cast<double>(gen_num); + float32_host_data.at(i) = static_cast<float>(gen_num); + } + + data<sch::MixedArray, encode::Native, rmt::Sycl> mixed_device_data{mixed_host_data}; + data<sch::Float64Array, encode::Native, rmt::Sycl> float64_device_data{float64_host_data}; + data<sch::Float32Array, encode::Native, rmt::Sycl> float32_device_data{float32_host_data}; + + sycl_iface.template call<"float64_32">(mixed_device_data, &(device.get_handle())); + device.get_handle().wait(); + time_eval(time_mixed, mixed_ev); + sycl_iface.template call<"float64">(float64_device_data, &(device.get_handle())); + device.get_handle().wait(); + time_eval(time_float64, float64_ev); + sycl_iface.template call<"float32">(float32_device_data, &(device.get_handle())); + device.get_handle().wait(); + time_eval(time_float32, float32_ev); } - data<sch::MixedArray, encode::Native, rmt::Sycl> mixed_device_data{mixed_host_data}; - data<sch::Float64Array, encode::Native, rmt::Sycl> float64_device_data{float64_host_data}; - data<sch::Float32Array, encode::Native, rmt::Sycl> float32_device_data{float32_host_data}; - - sstr<<test_size<<",\t"; - sycl_iface.template call<"float64_32">(mixed_device_data, &(device.get_handle())); - device.get_handle().wait(); - time_eval(sstr, mixed_ev); + sstr<<test_size; sstr<<",\t"; - sycl_iface.template call<"float64">(float64_device_data, &(device.get_handle())); - device.get_handle().wait(); - time_eval(sstr, float64_ev); + sstr<<time_mixed / 1.0e9; sstr<<",\t"; - sycl_iface.template call<"float32">(float32_device_data, &(device.get_handle())); - device.get_handle().wait(); - time_eval(sstr, float32_ev); - sstr<<'\n'; + sstr<<time_float64 / 1.0e9; + sstr<<",\t"; + sstr<<time_float32 / 1.0e9; + sstr<<"\n"; } - std::cout<<sstr.str()<<std::endl; + std::cout<<'\n'<<'\n'<<sstr.str()<<std::endl; return 0; } diff --git a/modules/remote-sycl/benchmarks/mixed_precision.hpp b/modules/remote-sycl/benchmarks/mixed_precision.hpp index 3462bcd..784b9b5 100644 --- a/modules/remote-sycl/benchmarks/mixed_precision.hpp +++ b/modules/remote-sycl/benchmarks/mixed_precision.hpp @@ -24,4 +24,4 @@ using MixedPrecisionBenchmarkInterface = Interface< >; } -saw::interface<sch::MixedPrecisionBenchmarkInterface, saw::encode::Native, saw::rmt::Sycl, cl::sycl::queue*> listen_mixed_precision(cl::sycl::event& mixed_ev, cl::sycl::event& float64_ev, cl::sycl::event& float32_ev); +saw::interface<sch::MixedPrecisionBenchmarkInterface, saw::encode::Native, saw::rmt::Sycl, cl::sycl::queue*> listen_mixed_precision(cl::sycl::event& mixed_ev, cl::sycl::event& float64_ev, cl::sycl::event& float32_ev, uint64_t& arithmetic_intensity); |