From 51fd65c247ff6d603c1fe27d697e46288ac73022 Mon Sep 17 00:00:00 2001 From: "Claudius \"keldu\" Holeksa" Date: Thu, 30 Oct 2025 10:53:14 +0100 Subject: Reworking Sycl. Semi broken state --- modules/remote-sycl/.nix/derivation.nix | 8 +-- modules/remote-sycl/benchmarks/mixed_precision.cpp | 68 ++++++++++------------ modules/remote-sycl/benchmarks/mixed_precision.hpp | 8 +-- modules/remote-sycl/c++/common.hpp | 2 +- modules/remote-sycl/c++/data.hpp | 26 ++++----- modules/remote-sycl/c++/device.hpp | 8 +-- 6 files changed, 57 insertions(+), 63 deletions(-) (limited to 'modules') diff --git a/modules/remote-sycl/.nix/derivation.nix b/modules/remote-sycl/.nix/derivation.nix index 45d2e8b..688af18 100644 --- a/modules/remote-sycl/.nix/derivation.nix +++ b/modules/remote-sycl/.nix/derivation.nix @@ -5,14 +5,14 @@ , version , forstio , openmp -, keldu , ocl-icd , lld_17 , python3 , bash +, adaptive-cpp , build_examples ? "false" -, build_benchmarks ? "true" +, build_benchmarks ? "false" }: let @@ -34,7 +34,7 @@ in stdenv.mkDerivation { forstio.codec forstio.async forstio.remote - keldu.adaptivecpp-dev + adaptive-cpp ocl-icd openmp lld_17 @@ -48,7 +48,7 @@ in stdenv.mkDerivation { scons prefix=$out build_benchmarks=${build_benchmarks} build_examples=${build_examples} install ''; - doCheck = true; + doCheck = false; checkPhase = '' export ACPP_APPDB_DIR=. scons test diff --git a/modules/remote-sycl/benchmarks/mixed_precision.cpp b/modules/remote-sycl/benchmarks/mixed_precision.cpp index d4b119b..aabe949 100644 --- a/modules/remote-sycl/benchmarks/mixed_precision.cpp +++ b/modules/remote-sycl/benchmarks/mixed_precision.cpp @@ -82,20 +82,20 @@ int main(int argc, char** argv){ if(!rmt_addr){ return -1; } - - cl::sycl::event mixed_ev; - cl::sycl::event float32_ev; - cl::sycl::event float64_ev; + // acpp::sycl::queue cmd_queue; + acpp::sycl::event mixed_ev; + acpp::sycl::event float32_ev; + acpp::sycl::event float64_ev; auto sycl_iface = listen_mixed_precision(mixed_ev, float64_ev, float32_ev, arithmetic_intensity); - data mixed_host_data; - data float64_host_data; - data float32_host_data; + data> mixed_data; + data> float64_data; + data> float32_data; - auto time_eval = [](uint64_t & current_min_time, cl::sycl::event& ev){ - auto end = ev.get_profiling_info(); - auto start = ev.get_profiling_info(); + auto time_eval = [](uint64_t & current_min_time, acpp::sycl::event& ev){ + auto end = ev.get_profiling_info(); + auto start = ev.get_profiling_info(); uint64_t curr_time = (end-start); current_min_time = std::min(curr_time, current_min_time); @@ -110,22 +110,19 @@ int main(int argc, char** argv){ std::cout<<"Warming up ..."<(gen_num); - float64_host_data.at(i) = static_cast(gen_num); - float32_host_data.at(i) = static_cast(gen_num); + mixed_data.at({{i}}) = {static_cast(gen_num)}; + float64_data.at({{i}}) = {static_cast(gen_num)}; + float32_data.at({{i}}) = {static_cast(gen_num)}; } - data> mixed_device_data{mixed_host_data}; - data> float64_device_data{float64_host_data}; - data> float32_device_data{float32_host_data}; - sycl_iface.template call<"float64_32">(mixed_device_data, &(device.get_handle())); - sycl_iface.template call<"float64">(float64_device_data, &(device.get_handle())); - sycl_iface.template call<"float32">(float32_device_data, &(device.get_handle())); + sycl_iface.template call<"float64_32">(mixed_data, &(device.get_handle())); + sycl_iface.template call<"float64">(float64_data, &(device.get_handle())); + sycl_iface.template call<"float32">(float32_data, &(device.get_handle())); device.get_handle().wait(); } @@ -142,32 +139,29 @@ int main(int argc, char** argv){ (std::cout<<'.').flush(); - data mixed_host_data; - data float64_host_data; - data float32_host_data; + data> mixed_data; + data> float64_data; + data> float32_data; - mixed_host_data = {test_size}; - float64_host_data = {test_size}; - float32_host_data = {test_size}; + mixed_data = {{{test_size}}}; + float64_data = {{{test_size}}}; + float32_data = {{{test_size}}}; for(uint64_t i = 0; i < test_size; ++i){ double gen_num = dis(e1); - mixed_host_data.at(i) = static_cast(gen_num); - float64_host_data.at(i) = static_cast(gen_num); - float32_host_data.at(i) = static_cast(gen_num); + mixed_data.at({{i}}) = {static_cast(gen_num)}; + float64_data.at({{i}}) = {static_cast(gen_num)}; + float32_data.at({{i}}) = {static_cast(gen_num)}; } - data> mixed_device_data{mixed_host_data}; - data> float64_device_data{float64_host_data}; - data> float32_device_data{float32_host_data}; - sycl_iface.template call<"float64_32">(mixed_device_data, &(device.get_handle())); + sycl_iface.template call<"float64_32">(mixed_data, &(device.get_handle())); device.get_handle().wait(); time_eval(time_mixed, mixed_ev); - sycl_iface.template call<"float64">(float64_device_data, &(device.get_handle())); + sycl_iface.template call<"float64">(float64_data, &(device.get_handle())); device.get_handle().wait(); time_eval(time_float64, float64_ev); - sycl_iface.template call<"float32">(float32_device_data, &(device.get_handle())); + sycl_iface.template call<"float32">(float32_data, &(device.get_handle())); device.get_handle().wait(); time_eval(time_float32, float32_ev); } diff --git a/modules/remote-sycl/benchmarks/mixed_precision.hpp b/modules/remote-sycl/benchmarks/mixed_precision.hpp index e21a2e6..f673538 100644 --- a/modules/remote-sycl/benchmarks/mixed_precision.hpp +++ b/modules/remote-sycl/benchmarks/mixed_precision.hpp @@ -34,8 +34,8 @@ using FloatPrecisionBenchmarkInterface = Interface< >; } -saw::interface, cl::sycl::queue*> listen_mixed_precision(cl::sycl::event& mixed_ev, cl::sycl::event& float64_ev, cl::sycl::event& float32_ev, uint64_t& arithmetic_intensity); +saw::interface, acpp::sycl::queue*> listen_mixed_precision(acpp::sycl::event& mixed_ev, acpp::sycl::event& float64_ev, acpp::sycl::event& float32_ev, uint64_t& arithmetic_intensity); -saw::interface, saw::encode::Sycl, cl::sycl::queue*> listen_mixed_float_precision(cl::sycl::event& ev, uint64_t& arithmetic_intensity); -saw::interface, saw::encode::Sycl, cl::sycl::queue*> listen_half_float_precision(cl::sycl::event& ev, uint64_t& arithmetic_intensity); -saw::interface, saw::encode::Sycl, cl::sycl::queue*> listen_full_precision(cl::sycl::event& ev, uint64_t& arithmetic_intensity); +saw::interface, saw::encode::Sycl, acpp::sycl::queue*> listen_mixed_float_precision(acpp::sycl::event& ev, uint64_t& arithmetic_intensity); +saw::interface, saw::encode::Sycl, acpp::sycl::queue*> listen_half_float_precision(acpp::sycl::event& ev, uint64_t& arithmetic_intensity); +saw::interface, saw::encode::Sycl, acpp::sycl::queue*> listen_full_precision(acpp::sycl::event& ev, uint64_t& arithmetic_intensity); diff --git a/modules/remote-sycl/c++/common.hpp b/modules/remote-sycl/c++/common.hpp index 822dae5..287075f 100644 --- a/modules/remote-sycl/c++/common.hpp +++ b/modules/remote-sycl/c++/common.hpp @@ -5,7 +5,7 @@ #include #include -#include +#include namespace saw { namespace rmt { diff --git a/modules/remote-sycl/c++/data.hpp b/modules/remote-sycl/c++/data.hpp index 2d5893f..c2ee098 100644 --- a/modules/remote-sycl/c++/data.hpp +++ b/modules/remote-sycl/c++/data.hpp @@ -15,8 +15,8 @@ public: using Schema = schema::Array; private: // cl::sycl::buffer> data_; - using sycl_usm_allocator = acpp::sycl::usm_allocator, sycl::usm::alloc::shared>; - data, Encode> dims_; + using sycl_usm_allocator = acpp::sycl::usm_allocator, acpp::sycl::usm::alloc::shared>; + data, Encode> dims_; data size_; std::vector, sycl_usm_allocator> data_; @@ -31,20 +31,20 @@ private: return s; } public: - data(): + data(acpp::sycl::queue& q__): dims_{}, size_{0u}, - data_{} + data_{0u,q__} { for(uint64_t iter = 0; iter < Dim; ++iter){ dims_.at({iter}) = 0u; } } - data(const data, Encode>& dims__): + data(const data, Encode>& dims__, acpp::sycl::queue& q__): dims_{dims__}, size_{get_full_size()}, - data_{size_} + data_{size_,q__} {} auto* get_internal_data() { @@ -62,19 +62,19 @@ public: return size_; } - data, Encode> dims() const { + data, Encode> dims() const { return dims_; } - constexpr data& at(const data, Encode>& i){ - return value_.at(this->get_flat_index(i)); + constexpr data& at(const data, Encode>& i){ + return data_.at(this->get_flat_index(i)); } - constexpr const data& at(const data, Encode>& i)const{ - return value_.at(this->get_flat_index(i)); + constexpr const data& at(const data, Encode>& i)const{ + return data_.at(this->get_flat_index(i)); } - data internal_flat_index(const data, Encode>& i) const { + data internal_flat_index(const data, Encode>& i) const { return {this->get_flat_index(i)}; } private: @@ -85,7 +85,7 @@ private: std::is_same_v>, "Unsupported type" ); - assert(value_.size() == get_full_size()); + assert(data_.size() == get_full_size()); uint64_t s = 0; uint64_t stride = 1; diff --git a/modules/remote-sycl/c++/device.hpp b/modules/remote-sycl/c++/device.hpp index a050078..05bb17a 100644 --- a/modules/remote-sycl/c++/device.hpp +++ b/modules/remote-sycl/c++/device.hpp @@ -9,10 +9,10 @@ namespace saw { template<> class device final { private: - cl::sycl::queue cmd_queue_; + acpp::sycl::queue cmd_queue_; public: device(): - cmd_queue_{cl::sycl::default_selector_v, cl::sycl::property_list{cl::sycl::property::queue::enable_profiling()}} + cmd_queue_{acpp::sycl::default_selector_v, acpp::sycl::property_list{acpp::sycl::property::queue::enable_profiling()}} {} SAW_FORBID_COPY(device); @@ -44,14 +44,14 @@ public: }); cmd_queue_.wait(); */ - cl::sycl::host_accessor result{dev_data.get_handle()}; + acpp::sycl::host_accessor result{dev_data.get_handle()}; return result[0]; } /** * Get a reference to the handle */ - cl::sycl::queue& get_handle(){ + acpp::sycl::queue& get_handle(){ return cmd_queue_; } }; -- cgit v1.2.3