summaryrefslogtreecommitdiff
path: root/modules/remote-sycl/benchmarks/mixed_precision.cpp
diff options
context:
space:
mode:
authorClaudius "keldu" Holeksa <mail@keldu.de>2024-07-02 19:46:02 +0200
committerClaudius "keldu" Holeksa <mail@keldu.de>2024-07-02 19:46:02 +0200
commit25e05907f0292310eaae27a032db0ee274413874 (patch)
tree283de0ebb6b61add2221436a77bb09e2ff101080 /modules/remote-sycl/benchmarks/mixed_precision.cpp
parente51d2b1c0493dfd30d1622c8a0628ecf98c92f1c (diff)
Preparing benchmark work
Diffstat (limited to 'modules/remote-sycl/benchmarks/mixed_precision.cpp')
-rw-r--r--modules/remote-sycl/benchmarks/mixed_precision.cpp110
1 files changed, 110 insertions, 0 deletions
diff --git a/modules/remote-sycl/benchmarks/mixed_precision.cpp b/modules/remote-sycl/benchmarks/mixed_precision.cpp
new file mode 100644
index 0000000..e63a814
--- /dev/null
+++ b/modules/remote-sycl/benchmarks/mixed_precision.cpp
@@ -0,0 +1,110 @@
+#include "./mixed_precision.hpp"
+#include <forstio/codec/schema.hpp>
+
+
+int main(){
+ using namespace saw;
+
+ constexpr uint64_t max_test_size = 1024ul * 1024ul * 512ul;
+
+ std::random_device r;
+ std::default_random_engine e1{r()};
+ std::uniform_real_distribution<> dis{-1.0,1.0};
+
+
+ saw::event_loop loop;
+ saw::wait_scope wait{loop};
+
+ remote<rmt::Sycl> rmt;
+
+ own<remote_address<rmt::Sycl>> rmt_addr{};
+
+ rmt.resolve_address().then([&](auto addr){
+ rmt_addr = std::move(addr);
+ }).detach();
+
+ wait.poll();
+ if(!rmt_addr){
+ return -1;
+ }
+
+ data<sch::MixedArray> mixed_host_data;
+ data<sch::Float64Array> float64_host_data;
+ data<sch::Float32Array> float32_host_data;
+
+ cl::sycl::event mixed_ev;
+ cl::sycl::event float32_ev;
+ cl::sycl::event float64_ev;
+
+ auto sycl_iface = listen_mixed_precision(mixed_ev, float64_ev, float32_ev);
+
+ auto time_eval = [](std::string_view tag, cl::sycl::event& ev){
+ auto end = ev.get_profiling_info<cl::sycl::info::event_profiling::command_end>();
+ auto start = ev.get_profiling_info<cl::sycl::info::event_profiling::command_start>();
+
+ std::cout<<"Elapsed "<<tag<<" kernel time: "<< (end-start) / 1.0e9 << " seconds\n";
+ };
+
+ auto& device = rmt_addr->get_device();
+
+ /**
+ * Warmup
+ */
+ {
+ uint64_t test_size = max_test_size;
+ mixed_host_data = {test_size};
+ float64_host_data = {test_size};
+ float32_host_data = {test_size};
+ for(uint64_t i = 0; i < test_size; ++i){
+ double gen_num = dis(e1);
+ mixed_host_data.at(i) = static_cast<double>(gen_num);
+ float64_host_data.at(i) = static_cast<double>(gen_num);
+ float32_host_data.at(i) = static_cast<float>(gen_num);
+ }
+ data<sch::MixedArray, encode::Native, rmt::Sycl> mixed_device_data{mixed_host_data};
+ data<sch::Float64Array, encode::Native, rmt::Sycl> float64_device_data{float64_host_data};
+ data<sch::Float32Array, encode::Native, rmt::Sycl> float32_device_data{float32_host_data};
+
+ sycl_iface.template call<"float64_32">(mixed_device_data, &(device.get_handle()));
+ device.get_handle().wait();
+ sycl_iface.template call<"float64">(float64_device_data, &(device.get_handle()));
+ device.get_handle().wait();
+ sycl_iface.template call<"float32">(float32_device_data, &(device.get_handle()));
+ device.get_handle().wait();
+
+ }
+
+ /**
+ * Benchmark
+ */
+ for(uint64_t test_size = 1ul; test_size < max_test_size; test_size *= 2ul){
+ device.get_handle().wait();
+ mixed_host_data = {test_size};
+ float64_host_data = {test_size};
+ float32_host_data = {test_size};
+ for(uint64_t i = 0; i < test_size; ++i){
+ double gen_num = dis(e1);
+ mixed_host_data.at(i) = static_cast<double>(gen_num);
+ float64_host_data.at(i) = static_cast<double>(gen_num);
+ float32_host_data.at(i) = static_cast<float>(gen_num);
+ }
+ data<sch::MixedArray, encode::Native, rmt::Sycl> mixed_device_data{mixed_host_data};
+ data<sch::Float64Array, encode::Native, rmt::Sycl> float64_device_data{float64_host_data};
+ data<sch::Float32Array, encode::Native, rmt::Sycl> float32_device_data{float32_host_data};
+
+ sycl_iface.template call<"float64_32">(mixed_device_data);
+ device.get_handle().wait();
+ sycl_iface.template call<"float64">(float64_device_data);
+ device.get_handle().wait();
+ sycl_iface.template call<"float32">(float32_device_data);
+ device.get_handle().wait();
+
+ std::cout<<"\nSize: "<<test_size<<'\n';
+ time_eval("Mixed", mixed_ev);
+ time_eval("Float32", float32_ev);
+ time_eval("Float64", float64_ev);
+ }
+ std::cout<<std::endl;
+
+ return 0;
+}