diff options
| -rw-r--r-- | default.nix | 4 | ||||
| -rw-r--r-- | modules/remote-sycl/.nix/derivation.nix | 8 | ||||
| -rw-r--r-- | modules/remote-sycl/benchmarks/mixed_precision.cpp | 68 | ||||
| -rw-r--r-- | modules/remote-sycl/benchmarks/mixed_precision.hpp | 8 | ||||
| -rw-r--r-- | modules/remote-sycl/c++/common.hpp | 2 | ||||
| -rw-r--r-- | modules/remote-sycl/c++/data.hpp | 26 | ||||
| -rw-r--r-- | modules/remote-sycl/c++/device.hpp | 8 |
7 files changed, 59 insertions, 65 deletions
diff --git a/default.nix b/default.nix index ef0bc3d..24b2d77 100644 --- a/default.nix +++ b/default.nix @@ -6,7 +6,7 @@ ref = "master"; }).outPath + "/default.nix"){ }).gasp -, adaptive-cpp ? (pkgs.callPackage ./.nix/adaptivecpp.nix { +, adaptive-cpp ? (pkgs.callPackage ./.nix/adaptive-cpp.nix { inherit stdenv; }) }: @@ -133,7 +133,7 @@ in rec { openmp = pkgs.llvmPackages_17.openmp; build_examples = "false"; - build_benchmarks = "true"; + build_benchmarks = "false"; }; remote-hip = pkgs.callPackage modules/remote-hip/.nix/derivation.nix { diff --git a/modules/remote-sycl/.nix/derivation.nix b/modules/remote-sycl/.nix/derivation.nix index 45d2e8b..688af18 100644 --- a/modules/remote-sycl/.nix/derivation.nix +++ b/modules/remote-sycl/.nix/derivation.nix @@ -5,14 +5,14 @@ , version , forstio , openmp -, keldu , ocl-icd , lld_17 , python3 , bash +, adaptive-cpp , build_examples ? "false" -, build_benchmarks ? "true" +, build_benchmarks ? "false" }: let @@ -34,7 +34,7 @@ in stdenv.mkDerivation { forstio.codec forstio.async forstio.remote - keldu.adaptivecpp-dev + adaptive-cpp ocl-icd openmp lld_17 @@ -48,7 +48,7 @@ in stdenv.mkDerivation { scons prefix=$out build_benchmarks=${build_benchmarks} build_examples=${build_examples} install ''; - doCheck = true; + doCheck = false; checkPhase = '' export ACPP_APPDB_DIR=. scons test diff --git a/modules/remote-sycl/benchmarks/mixed_precision.cpp b/modules/remote-sycl/benchmarks/mixed_precision.cpp index d4b119b..aabe949 100644 --- a/modules/remote-sycl/benchmarks/mixed_precision.cpp +++ b/modules/remote-sycl/benchmarks/mixed_precision.cpp @@ -82,20 +82,20 @@ int main(int argc, char** argv){ if(!rmt_addr){ return -1; } - - cl::sycl::event mixed_ev; - cl::sycl::event float32_ev; - cl::sycl::event float64_ev; + // acpp::sycl::queue cmd_queue; + acpp::sycl::event mixed_ev; + acpp::sycl::event float32_ev; + acpp::sycl::event float64_ev; auto sycl_iface = listen_mixed_precision(mixed_ev, float64_ev, float32_ev, arithmetic_intensity); - data<sch::MixedArray> mixed_host_data; - data<sch::Float64Array> float64_host_data; - data<sch::Float32Array> float32_host_data; + data<sch::MixedArray, encode::Sycl<encode::Native>> mixed_data; + data<sch::Float64Array, encode::Sycl<encode::Native>> float64_data; + data<sch::Float32Array, encode::Sycl<encode::Native>> float32_data; - auto time_eval = [](uint64_t & current_min_time, cl::sycl::event& ev){ - auto end = ev.get_profiling_info<cl::sycl::info::event_profiling::command_end>(); - auto start = ev.get_profiling_info<cl::sycl::info::event_profiling::command_start>(); + auto time_eval = [](uint64_t & current_min_time, acpp::sycl::event& ev){ + auto end = ev.get_profiling_info<acpp::sycl::info::event_profiling::command_end>(); + auto start = ev.get_profiling_info<acpp::sycl::info::event_profiling::command_start>(); uint64_t curr_time = (end-start); current_min_time = std::min(curr_time, current_min_time); @@ -110,22 +110,19 @@ int main(int argc, char** argv){ std::cout<<"Warming up ..."<<std::endl; for(uint64_t test_size = 1ul; test_size < max_test_size; test_size *= 2ul){ - mixed_host_data = {test_size}; - float64_host_data = {test_size}; - float32_host_data = {test_size}; + mixed_data = {{{test_size,}}}; + float64_data = {{{test_size}}}; + float32_data = {{{test_size}}}; for(uint64_t i = 0; i < test_size; ++i){ double gen_num = dis(e1); - mixed_host_data.at(i) = static_cast<double>(gen_num); - float64_host_data.at(i) = static_cast<double>(gen_num); - float32_host_data.at(i) = static_cast<float>(gen_num); + mixed_data.at({{i}}) = {static_cast<double>(gen_num)}; + float64_data.at({{i}}) = {static_cast<double>(gen_num)}; + float32_data.at({{i}}) = {static_cast<float>(gen_num)}; } - data<sch::MixedArray, encode::Sycl<encode::Native>> mixed_device_data{mixed_host_data}; - data<sch::Float64Array, encode::Sycl<encode::Native>> float64_device_data{float64_host_data}; - data<sch::Float32Array, encode::Sycl<encode::Native>> float32_device_data{float32_host_data}; - sycl_iface.template call<"float64_32">(mixed_device_data, &(device.get_handle())); - sycl_iface.template call<"float64">(float64_device_data, &(device.get_handle())); - sycl_iface.template call<"float32">(float32_device_data, &(device.get_handle())); + sycl_iface.template call<"float64_32">(mixed_data, &(device.get_handle())); + sycl_iface.template call<"float64">(float64_data, &(device.get_handle())); + sycl_iface.template call<"float32">(float32_data, &(device.get_handle())); device.get_handle().wait(); } @@ -142,32 +139,29 @@ int main(int argc, char** argv){ (std::cout<<'.').flush(); - data<sch::MixedArray> mixed_host_data; - data<sch::Float64Array> float64_host_data; - data<sch::Float32Array> float32_host_data; + data<sch::MixedArray, encode::Sycl<encode::Native>> mixed_data; + data<sch::Float64Array, encode::Sycl<encode::Native>> float64_data; + data<sch::Float32Array, encode::Sycl<encode::Native>> float32_data; - mixed_host_data = {test_size}; - float64_host_data = {test_size}; - float32_host_data = {test_size}; + mixed_data = {{{test_size}}}; + float64_data = {{{test_size}}}; + float32_data = {{{test_size}}}; for(uint64_t i = 0; i < test_size; ++i){ double gen_num = dis(e1); - mixed_host_data.at(i) = static_cast<double>(gen_num); - float64_host_data.at(i) = static_cast<double>(gen_num); - float32_host_data.at(i) = static_cast<float>(gen_num); + mixed_data.at({{i}}) = {static_cast<double>(gen_num)}; + float64_data.at({{i}}) = {static_cast<double>(gen_num)}; + float32_data.at({{i}}) = {static_cast<float>(gen_num)}; } - data<sch::MixedArray, encode::Sycl<encode::Native>> mixed_device_data{mixed_host_data}; - data<sch::Float64Array, encode::Sycl<encode::Native>> float64_device_data{float64_host_data}; - data<sch::Float32Array, encode::Sycl<encode::Native>> float32_device_data{float32_host_data}; - sycl_iface.template call<"float64_32">(mixed_device_data, &(device.get_handle())); + sycl_iface.template call<"float64_32">(mixed_data, &(device.get_handle())); device.get_handle().wait(); time_eval(time_mixed, mixed_ev); - sycl_iface.template call<"float64">(float64_device_data, &(device.get_handle())); + sycl_iface.template call<"float64">(float64_data, &(device.get_handle())); device.get_handle().wait(); time_eval(time_float64, float64_ev); - sycl_iface.template call<"float32">(float32_device_data, &(device.get_handle())); + sycl_iface.template call<"float32">(float32_data, &(device.get_handle())); device.get_handle().wait(); time_eval(time_float32, float32_ev); } diff --git a/modules/remote-sycl/benchmarks/mixed_precision.hpp b/modules/remote-sycl/benchmarks/mixed_precision.hpp index e21a2e6..f673538 100644 --- a/modules/remote-sycl/benchmarks/mixed_precision.hpp +++ b/modules/remote-sycl/benchmarks/mixed_precision.hpp @@ -34,8 +34,8 @@ using FloatPrecisionBenchmarkInterface = Interface< >; } -saw::interface<sch::MixedPrecisionBenchmarkInterface, saw::encode::Sycl<saw::encode::Native>, cl::sycl::queue*> listen_mixed_precision(cl::sycl::event& mixed_ev, cl::sycl::event& float64_ev, cl::sycl::event& float32_ev, uint64_t& arithmetic_intensity); +saw::interface<sch::MixedPrecisionBenchmarkInterface, saw::encode::Sycl<saw::encode::Native>, acpp::sycl::queue*> listen_mixed_precision(acpp::sycl::event& mixed_ev, acpp::sycl::event& float64_ev, acpp::sycl::event& float32_ev, uint64_t& arithmetic_intensity); -saw::interface<sch::FloatPrecisionBenchmarkInterface<sch::MixedArray>, saw::encode::Sycl<saw::encode::Native>, cl::sycl::queue*> listen_mixed_float_precision(cl::sycl::event& ev, uint64_t& arithmetic_intensity); -saw::interface<sch::FloatPrecisionBenchmarkInterface<sch::Float32Array>, saw::encode::Sycl<saw::encode::Native>, cl::sycl::queue*> listen_half_float_precision(cl::sycl::event& ev, uint64_t& arithmetic_intensity); -saw::interface<sch::FloatPrecisionBenchmarkInterface<sch::Float64Array>, saw::encode::Sycl<saw::encode::Native>, cl::sycl::queue*> listen_full_precision(cl::sycl::event& ev, uint64_t& arithmetic_intensity); +saw::interface<sch::FloatPrecisionBenchmarkInterface<sch::MixedArray>, saw::encode::Sycl<saw::encode::Native>, acpp::sycl::queue*> listen_mixed_float_precision(acpp::sycl::event& ev, uint64_t& arithmetic_intensity); +saw::interface<sch::FloatPrecisionBenchmarkInterface<sch::Float32Array>, saw::encode::Sycl<saw::encode::Native>, acpp::sycl::queue*> listen_half_float_precision(acpp::sycl::event& ev, uint64_t& arithmetic_intensity); +saw::interface<sch::FloatPrecisionBenchmarkInterface<sch::Float64Array>, saw::encode::Sycl<saw::encode::Native>, acpp::sycl::queue*> listen_full_precision(acpp::sycl::event& ev, uint64_t& arithmetic_intensity); diff --git a/modules/remote-sycl/c++/common.hpp b/modules/remote-sycl/c++/common.hpp index 822dae5..287075f 100644 --- a/modules/remote-sycl/c++/common.hpp +++ b/modules/remote-sycl/c++/common.hpp @@ -5,7 +5,7 @@ #include <forstio/codec/schema_hash.hpp> #include <forstio/codec/id_map.hpp> -#include <AdaptiveCpp/CL/sycl.hpp> +#include <AdaptiveCpp/sycl/sycl.hpp> namespace saw { namespace rmt { diff --git a/modules/remote-sycl/c++/data.hpp b/modules/remote-sycl/c++/data.hpp index 2d5893f..c2ee098 100644 --- a/modules/remote-sycl/c++/data.hpp +++ b/modules/remote-sycl/c++/data.hpp @@ -15,8 +15,8 @@ public: using Schema = schema::Array<Sch,Dim>; private: // cl::sycl::buffer<data<Sch, encode::Native>> data_; - using sycl_usm_allocator = acpp::sycl::usm_allocator<data<Sch,Encode>, sycl::usm::alloc::shared>; - data<schema::FixedArray<schema::UInt64, sizeof...(D)>, Encode> dims_; + using sycl_usm_allocator = acpp::sycl::usm_allocator<data<Sch,Encode>, acpp::sycl::usm::alloc::shared>; + data<schema::FixedArray<schema::UInt64, Dim>, Encode> dims_; data<schema::UInt64, Encode> size_; std::vector<data<Sch,Encode>, sycl_usm_allocator> data_; @@ -31,20 +31,20 @@ private: return s; } public: - data(): + data(acpp::sycl::queue& q__): dims_{}, size_{0u}, - data_{} + data_{0u,q__} { for(uint64_t iter = 0; iter < Dim; ++iter){ dims_.at({iter}) = 0u; } } - data(const data<schema::FixedArray<schema::UInt64, sizeof...(D)>, Encode>& dims__): + data(const data<schema::FixedArray<schema::UInt64, Dim>, Encode>& dims__, acpp::sycl::queue& q__): dims_{dims__}, size_{get_full_size()}, - data_{size_} + data_{size_,q__} {} auto* get_internal_data() { @@ -62,19 +62,19 @@ public: return size_; } - data<schema::FixedArray<schema::UInt64, sizeof...(D)>, Encode> dims() const { + data<schema::FixedArray<schema::UInt64, Dim>, Encode> dims() const { return dims_; } - constexpr data<T, Encode>& at(const data<schema::FixedArray<schema::UInt64, sizeof...(D)>, Encode>& i){ - return value_.at(this->get_flat_index(i)); + constexpr data<Sch, Encode>& at(const data<schema::FixedArray<schema::UInt64, Dim>, Encode>& i){ + return data_.at(this->get_flat_index(i)); } - constexpr const data<T, Encode>& at(const data<schema::FixedArray<schema::UInt64, sizeof...(D)>, Encode>& i)const{ - return value_.at(this->get_flat_index(i)); + constexpr const data<Sch, Encode>& at(const data<schema::FixedArray<schema::UInt64, Dim>, Encode>& i)const{ + return data_.at(this->get_flat_index(i)); } - data<schema::UInt64,Encode> internal_flat_index(const data<schema::FixedArray<schema::UInt64, sizeof...(D)>, Encode>& i) const { + data<schema::UInt64,Encode> internal_flat_index(const data<schema::FixedArray<schema::UInt64, Dim>, Encode>& i) const { return {this->get_flat_index(i)}; } private: @@ -85,7 +85,7 @@ private: std::is_same_v<U,std::array<uint64_t,Dim>>, "Unsupported type" ); - assert(value_.size() == get_full_size()); + assert(data_.size() == get_full_size()); uint64_t s = 0; uint64_t stride = 1; diff --git a/modules/remote-sycl/c++/device.hpp b/modules/remote-sycl/c++/device.hpp index a050078..05bb17a 100644 --- a/modules/remote-sycl/c++/device.hpp +++ b/modules/remote-sycl/c++/device.hpp @@ -9,10 +9,10 @@ namespace saw { template<> class device<rmt::Sycl> final { private: - cl::sycl::queue cmd_queue_; + acpp::sycl::queue cmd_queue_; public: device(): - cmd_queue_{cl::sycl::default_selector_v, cl::sycl::property_list{cl::sycl::property::queue::enable_profiling()}} + cmd_queue_{acpp::sycl::default_selector_v, acpp::sycl::property_list{acpp::sycl::property::queue::enable_profiling()}} {} SAW_FORBID_COPY(device); @@ -44,14 +44,14 @@ public: }); cmd_queue_.wait(); */ - cl::sycl::host_accessor result{dev_data.get_handle()}; + acpp::sycl::host_accessor result{dev_data.get_handle()}; return result[0]; } /** * Get a reference to the handle */ - cl::sycl::queue& get_handle(){ + acpp::sycl::queue& get_handle(){ return cmd_queue_; } }; |
