diff options
Diffstat (limited to 'modules/remote-sycl/benchmarks')
-rw-r--r-- | modules/remote-sycl/benchmarks/SConscript | 39 | ||||
-rw-r--r-- | modules/remote-sycl/benchmarks/kernel_mixed_precision.cpp | 42 | ||||
-rw-r--r-- | modules/remote-sycl/benchmarks/mixed_precision.cpp | 110 | ||||
-rw-r--r-- | modules/remote-sycl/benchmarks/mixed_precision.hpp | 27 |
4 files changed, 218 insertions, 0 deletions
diff --git a/modules/remote-sycl/benchmarks/SConscript b/modules/remote-sycl/benchmarks/SConscript new file mode 100644 index 0000000..9976e0e --- /dev/null +++ b/modules/remote-sycl/benchmarks/SConscript @@ -0,0 +1,39 @@ +#!/bin/false + +import os +import os.path +import glob + + +Import('env') + +dir_path = Dir('.').abspath + +# Environment for base library +benchmarks_env = env.Clone(); + + +benchmarks_sycl_env = benchmarks_env.Clone(); +benchmarks_sycl_env['CXX'] = 'acpp'; +benchmarks_sycl_env['CXXFLAGS'] += ['-O2']; + +benchmarks_env.sources = sorted(glob.glob(dir_path + "/*.cpp")) +benchmarks_env.headers = sorted(glob.glob(dir_path + "/*.hpp")) + +env.sources += benchmarks_env.sources; +env.headers += benchmarks_env.headers; + +sycl_objects = []; +benchmarks_sycl_env.add_source_files(sycl_objects, ['kernel_mixed_precision.cpp'], shared=True); + +objects_static = [] +benchmarks_env.sycl_basic = benchmarks_env.Program('#bin/benchmark_mixed_precision', ['mixed_precision.cpp', env.library_static, sycl_objects]); + +# Set Alias +env.benchmarks = [benchmarks_env.sycl_basic]; +env.Alias('benchmarks', env.benchmarks); + +if env["build_benchmarks"]: + env.targets += ['benchmarks']; + env.Install('$prefix/bin/', env.benchmarks); +#endif diff --git a/modules/remote-sycl/benchmarks/kernel_mixed_precision.cpp b/modules/remote-sycl/benchmarks/kernel_mixed_precision.cpp new file mode 100644 index 0000000..0ac9756 --- /dev/null +++ b/modules/remote-sycl/benchmarks/kernel_mixed_precision.cpp @@ -0,0 +1,42 @@ +#include "mixed_precision.hpp" + +saw::interface<sch::MixedPrecisionBenchmarkInterface, saw::encode::Native, saw::rmt::Sycl, cl::sycl::queue*> listen_mixed_precision(cl::sycl::event& mixed_ev, cl::sycl::event& float64_ev, cl::sycl::event& float32_ev){ + return { + /** + * Mixed + */ + [&](saw::data<sch::MixedArray, saw::encode::Native, saw::rmt::Sycl>& in, cl::sycl::queue* cmd) -> saw::error_or<void> { + + mixed_ev = cmd->submit([&](cl::sycl::handler& h){ + auto acc_buff = in.template access<cl::sycl::access::mode::read_write>(h); + + h.parallel_for(cl::sycl::range<1>(in.size()), [=] (cl::sycl::id<1> it){ + acc_buff[0u].at(it[0u]) = acc_buff[0u].at(it[0u]) * saw::data<sch::Float64>{2.0}; + }); + }); + return saw::void_t{}; + }, + [&](saw::data<sch::Float64Array, saw::encode::Native, saw::rmt::Sycl>& in, cl::sycl::queue* cmd) -> saw::error_or<void> { + + float64_ev = cmd->submit([&](cl::sycl::handler& h){ + auto acc_buff = in.template access<cl::sycl::access::mode::read_write>(h); + + h.parallel_for(cl::sycl::range<1>(in.size()), [=] (cl::sycl::id<1> it){ + acc_buff[0u].at(it[0u]) = acc_buff[0u].at(it[0u]) * saw::data<sch::Float64>{2.0}; + }); + }); + return saw::void_t{}; + }, + [&](saw::data<sch::Float32Array, saw::encode::Native, saw::rmt::Sycl>& in, cl::sycl::queue* cmd) -> saw::error_or<void> { + + float32_ev = cmd->submit([&](cl::sycl::handler& h){ + auto acc_buff = in.template access<cl::sycl::access::mode::read_write>(h); + + h.parallel_for(cl::sycl::range<1>(in.size()), [=] (cl::sycl::id<1> it){ + acc_buff[0u].at(it[0u]) = acc_buff[0u].at(it[0u]) * saw::data<sch::Float32>{2.0f}; + }); + }); + return saw::void_t{}; + } + }; +} diff --git a/modules/remote-sycl/benchmarks/mixed_precision.cpp b/modules/remote-sycl/benchmarks/mixed_precision.cpp new file mode 100644 index 0000000..e63a814 --- /dev/null +++ b/modules/remote-sycl/benchmarks/mixed_precision.cpp @@ -0,0 +1,110 @@ +#include "./mixed_precision.hpp" +#include <forstio/codec/schema.hpp> + + +int main(){ + using namespace saw; + + constexpr uint64_t max_test_size = 1024ul * 1024ul * 512ul; + + std::random_device r; + std::default_random_engine e1{r()}; + std::uniform_real_distribution<> dis{-1.0,1.0}; + + + saw::event_loop loop; + saw::wait_scope wait{loop}; + + remote<rmt::Sycl> rmt; + + own<remote_address<rmt::Sycl>> rmt_addr{}; + + rmt.resolve_address().then([&](auto addr){ + rmt_addr = std::move(addr); + }).detach(); + + wait.poll(); + if(!rmt_addr){ + return -1; + } + + data<sch::MixedArray> mixed_host_data; + data<sch::Float64Array> float64_host_data; + data<sch::Float32Array> float32_host_data; + + cl::sycl::event mixed_ev; + cl::sycl::event float32_ev; + cl::sycl::event float64_ev; + + auto sycl_iface = listen_mixed_precision(mixed_ev, float64_ev, float32_ev); + + auto time_eval = [](std::string_view tag, cl::sycl::event& ev){ + auto end = ev.get_profiling_info<cl::sycl::info::event_profiling::command_end>(); + auto start = ev.get_profiling_info<cl::sycl::info::event_profiling::command_start>(); + + std::cout<<"Elapsed "<<tag<<" kernel time: "<< (end-start) / 1.0e9 << " seconds\n"; + }; + + auto& device = rmt_addr->get_device(); + + /** + * Warmup + */ + { + uint64_t test_size = max_test_size; + mixed_host_data = {test_size}; + float64_host_data = {test_size}; + float32_host_data = {test_size}; + for(uint64_t i = 0; i < test_size; ++i){ + double gen_num = dis(e1); + mixed_host_data.at(i) = static_cast<double>(gen_num); + float64_host_data.at(i) = static_cast<double>(gen_num); + float32_host_data.at(i) = static_cast<float>(gen_num); + } + data<sch::MixedArray, encode::Native, rmt::Sycl> mixed_device_data{mixed_host_data}; + data<sch::Float64Array, encode::Native, rmt::Sycl> float64_device_data{float64_host_data}; + data<sch::Float32Array, encode::Native, rmt::Sycl> float32_device_data{float32_host_data}; + + sycl_iface.template call<"float64_32">(mixed_device_data, &(device.get_handle())); + device.get_handle().wait(); + sycl_iface.template call<"float64">(float64_device_data, &(device.get_handle())); + device.get_handle().wait(); + sycl_iface.template call<"float32">(float32_device_data, &(device.get_handle())); + device.get_handle().wait(); + + } + + /** + * Benchmark + */ + for(uint64_t test_size = 1ul; test_size < max_test_size; test_size *= 2ul){ + device.get_handle().wait(); + mixed_host_data = {test_size}; + float64_host_data = {test_size}; + float32_host_data = {test_size}; + for(uint64_t i = 0; i < test_size; ++i){ + double gen_num = dis(e1); + mixed_host_data.at(i) = static_cast<double>(gen_num); + float64_host_data.at(i) = static_cast<double>(gen_num); + float32_host_data.at(i) = static_cast<float>(gen_num); + } + data<sch::MixedArray, encode::Native, rmt::Sycl> mixed_device_data{mixed_host_data}; + data<sch::Float64Array, encode::Native, rmt::Sycl> float64_device_data{float64_host_data}; + data<sch::Float32Array, encode::Native, rmt::Sycl> float32_device_data{float32_host_data}; + + sycl_iface.template call<"float64_32">(mixed_device_data); + device.get_handle().wait(); + sycl_iface.template call<"float64">(float64_device_data); + device.get_handle().wait(); + sycl_iface.template call<"float32">(float32_device_data); + device.get_handle().wait(); + + std::cout<<"\nSize: "<<test_size<<'\n'; + time_eval("Mixed", mixed_ev); + time_eval("Float32", float32_ev); + time_eval("Float64", float64_ev); + } + std::cout<<std::endl; + + return 0; +} diff --git a/modules/remote-sycl/benchmarks/mixed_precision.hpp b/modules/remote-sycl/benchmarks/mixed_precision.hpp new file mode 100644 index 0000000..3462bcd --- /dev/null +++ b/modules/remote-sycl/benchmarks/mixed_precision.hpp @@ -0,0 +1,27 @@ +#pragma once + +#include "../c++/remote.hpp" + +namespace sch { +using namespace saw::schema; + +using MixedArray = Array< + MixedPrecision<Float64, Float32> +>; + +using Float64Array = Array< + Float64 +>; + +using Float32Array = Array< + Float32 +>; + +using MixedPrecisionBenchmarkInterface = Interface< + Member<Function<MixedArray,Void>, "float64_32">, + Member<Function<Float64Array,Void>, "float64">, + Member<Function<Float32Array,Void>, "float32"> +>; +} + +saw::interface<sch::MixedPrecisionBenchmarkInterface, saw::encode::Native, saw::rmt::Sycl, cl::sycl::queue*> listen_mixed_precision(cl::sycl::event& mixed_ev, cl::sycl::event& float64_ev, cl::sycl::event& float32_ev); |