diff options
author | Claudius "keldu" Holeksa <mail@keldu.de> | 2024-07-02 19:46:02 +0200 |
---|---|---|
committer | Claudius "keldu" Holeksa <mail@keldu.de> | 2024-07-02 19:46:02 +0200 |
commit | 25e05907f0292310eaae27a032db0ee274413874 (patch) | |
tree | 283de0ebb6b61add2221436a77bb09e2ff101080 | |
parent | e51d2b1c0493dfd30d1622c8a0628ecf98c92f1c (diff) |
Preparing benchmark work
-rw-r--r-- | default.nix | 1 | ||||
-rw-r--r-- | modules/codec/c++/data.hpp | 47 | ||||
-rw-r--r-- | modules/codec/c++/rpc.hpp | 5 | ||||
-rw-r--r-- | modules/codec/c++/transfer.hpp | 8 | ||||
-rw-r--r-- | modules/remote-sycl/.nix/derivation.nix | 7 | ||||
-rw-r--r-- | modules/remote-sycl/SConstruct | 10 | ||||
-rw-r--r-- | modules/remote-sycl/benchmarks/SConscript | 39 | ||||
-rw-r--r-- | modules/remote-sycl/benchmarks/kernel_mixed_precision.cpp | 42 | ||||
-rw-r--r-- | modules/remote-sycl/benchmarks/mixed_precision.cpp | 110 | ||||
-rw-r--r-- | modules/remote-sycl/benchmarks/mixed_precision.hpp | 27 | ||||
-rw-r--r-- | modules/remote-sycl/c++/common.hpp | 7 | ||||
-rw-r--r-- | modules/remote-sycl/c++/data.hpp | 8 | ||||
-rw-r--r-- | modules/remote-sycl/c++/device.hpp | 4 | ||||
-rw-r--r-- | modules/remote-sycl/c++/transfer.hpp | 3 | ||||
-rw-r--r-- | modules/remote-sycl/tests/mixed_precision.cpp | 212 | ||||
-rw-r--r-- | modules/remote-sycl/tests/sycl_basics.cpp | 9 |
16 files changed, 489 insertions, 50 deletions
diff --git a/default.nix b/default.nix index 001125f..f5e33ee 100644 --- a/default.nix +++ b/default.nix @@ -87,6 +87,7 @@ in rec { openmp = pkgs.llvmPackages_15.openmp; build_examples = "false"; + build_benchmarks = "true"; }; tools = pkgs.callPackage modules/tools/.nix/derivation.nix { diff --git a/modules/codec/c++/data.hpp b/modules/codec/c++/data.hpp index 5ebe579..dd70cd9 100644 --- a/modules/codec/c++/data.hpp +++ b/modules/codec/c++/data.hpp @@ -86,8 +86,8 @@ private: static_assert(always_false<T>, "Type not supported"); }; -template<typename T, size_t N> -class data<schema::Primitive<T,N>, encode::Native, storage::Default> { +template<typename T, size_t N, typename Storage> +class data<schema::Primitive<T,N>, encode::Native, Storage> { public: using Schema = schema::Primitive<T,N>; private: @@ -107,29 +107,29 @@ public: typename native_data_type<Schema>::type get() const {return value_;} - data<Schema, encode::Native, storage::Default> operator*(const data<Schema, encode::Native, storage::Default>& rhs)const{ + data<Schema, encode::Native, Storage> operator*(const data<Schema, encode::Native, Storage>& rhs)const{ return {get() * rhs.get()}; } - data<Schema, encode::Native, storage::Default> operator/(const data<Schema, encode::Native, storage::Default>& rhs)const{ + data<Schema, encode::Native, Storage> operator/(const data<Schema, encode::Native, Storage>& rhs)const{ return {get() / rhs.get()}; } - data<Schema, encode::Native, storage::Default> operator+(const data<Schema, encode::Native, storage::Default>& rhs)const{ + data<Schema, encode::Native, Storage> operator+(const data<Schema, encode::Native, Storage>& rhs)const{ return {get() + rhs.get()}; } - data<Schema, encode::Native, storage::Default>& operator+=(const data<Schema, encode::Native, storage::Default>& rhs)const{ + data<Schema, encode::Native, Storage>& operator+=(const data<Schema, encode::Native, Storage>& rhs)const{ value_ += rhs.get(); return *this; } - data<Schema, encode::Native, storage::Default>& operator-=(const data<Schema, encode::Native, storage::Default>& rhs)const{ + data<Schema, encode::Native, Storage>& operator-=(const data<Schema, encode::Native, Storage>& rhs)const{ value_ -= rhs.get(); return *this; } - data<Schema, encode::Native, storage::Default> operator-(const data<Schema, encode::Native, storage::Default>& rhs)const{ + data<Schema, encode::Native, Storage> operator-(const data<Schema, encode::Native, Storage>& rhs)const{ return {get() - rhs.get()}; } @@ -147,7 +147,7 @@ public: * Casts */ template<typename Target> - data<Target, encode::Native, storage::Default> cast_to(){ + data<Target, encode::Native, Storage> cast_to(){ auto raw_to = static_cast<typename saw::native_data_type<Target>::type>(value_); return {raw_to}; } @@ -156,12 +156,12 @@ public: /** * Mixed precision class for native formats */ -template<typename TA, uint64_t NA, typename TB, uint64_t NB> -class data<schema::MixedPrecision<schema::Primitive<TA,NA>, schema::Primitive<TB,NB>>, encode::Native, storage::Default>{ +template<typename TA, uint64_t NA, typename TB, uint64_t NB, typename Storage> +class data<schema::MixedPrecision<schema::Primitive<TA,NA>, schema::Primitive<TB,NB>>, encode::Native, Storage>{ public: using Schema = schema::MixedPrecision<schema::Primitive<TA,NA>, schema::Primitive<TB,NB>>; private: - data<typename Schema::StorageSchema, encode::Native, storage::Default> value_; + data<typename Schema::StorageSchema, encode::Native, Storage> value_; public: data():value_{}{} @@ -175,51 +175,58 @@ public: value_.set(val.template cast_to<typename Schema::StorageSchema>().get()); } - data<Schema, encode::Native, storage::Default> operator*(const data<Schema, encode::Native, storage::Default>& rhs) const { + data<Schema, encode::Native, Storage> operator*(const data<Schema, encode::Native, Storage>& rhs) const { using CalcType = typename native_data_type<typename Schema::InterfaceSchema>::type; CalcType left = static_cast<CalcType>(value_.get()); CalcType right = static_cast<CalcType>(rhs.get()); return {left * right}; } - data<Schema, encode::Native, storage::Default> operator/(const data<Schema, encode::Native, storage::Default>& rhs)const{ + data<Schema, encode::Native, Storage> operator*(const data<typename Schema::InterfaceSchema, encode::Native, Storage>& rhs) const { + using CalcType = typename native_data_type<typename Schema::InterfaceSchema>::type; + CalcType left = static_cast<CalcType>(value_.get()); + CalcType right = rhs.get(); + return {left * right}; + } + + data<Schema, encode::Native, Storage> operator/(const data<Schema, encode::Native, Storage>& rhs)const{ using CalcType = typename native_data_type<typename Schema::InterfaceSchema>::type; CalcType left = static_cast<CalcType>(value_.get()); CalcType right = static_cast<CalcType>(rhs.get()); return {left / right}; } - data<Schema, encode::Native, storage::Default> operator+(const data<Schema, encode::Native, storage::Default>& rhs)const{ + data<Schema, encode::Native, Storage> operator+(const data<Schema, encode::Native, Storage>& rhs)const{ using CalcType = typename native_data_type<typename Schema::InterfaceSchema>::type; CalcType left = static_cast<CalcType>(value_.get()); CalcType right = static_cast<CalcType>(rhs.get()); return {left + right}; } - data<Schema, encode::Native, storage::Default>& operator+=(const data<Schema, encode::Native, storage::Default>& rhs)const{ + data<Schema, encode::Native, Storage>& operator+=(const data<Schema, encode::Native, Storage>& rhs)const{ *this = *this + rhs.get(); return *this; } - data<Schema, encode::Native, storage::Default> operator-(const data<Schema, encode::Native, storage::Default>& rhs)const{ + data<Schema, encode::Native, Storage> operator-(const data<Schema, encode::Native, Storage>& rhs)const{ using CalcType = typename native_data_type<typename Schema::InterfaceSchema>::type; CalcType left = static_cast<CalcType>(value_.get()); CalcType right = static_cast<CalcType>(rhs.get()); return {left - right}; } - data<Schema, encode::Native, storage::Default>& operator-=(const data<Schema, encode::Native, storage::Default>& rhs) const { + data<Schema, encode::Native, Storage>& operator-=(const data<Schema, encode::Native, Storage>& rhs) const { *this = *this - rhs.get(); return *this; } template<typename Enc> - bool operator==(const data<Schema, Enc>& rhs)const{ + bool operator==(const data<Schema, Enc, Storage>& rhs)const{ return get() == rhs.get(); } template<typename Enc> - bool operator<(const data<Schema, Enc>& rhs) const { + bool operator<(const data<Schema, Enc, Storage>& rhs) const { return get() < rhs.get(); } }; diff --git a/modules/codec/c++/rpc.hpp b/modules/codec/c++/rpc.hpp index 2c97d6b..d172ec4 100644 --- a/modules/codec/c++/rpc.hpp +++ b/modules/codec/c++/rpc.hpp @@ -9,7 +9,6 @@ #include <variant> namespace saw { - /** * This class acts as a helper for rpc calls and representing data on the remote. */ @@ -139,7 +138,9 @@ template<typename Remote> class remote_address { static_assert(always_false<Remote>, "Type of remote not supported"); - + /** + * + */ }; /** diff --git a/modules/codec/c++/transfer.hpp b/modules/codec/c++/transfer.hpp index b6aa977..1fb297e 100644 --- a/modules/codec/c++/transfer.hpp +++ b/modules/codec/c++/transfer.hpp @@ -1,9 +1,9 @@ #pragma once namespace saw { -template<typename T> -class data_client; - -template<typename T> +template<typename Schema, typename Encoding, typename Remote> class data_server; + +template<typename Schema, typename Encoding, typename Remote> +class data_client; } diff --git a/modules/remote-sycl/.nix/derivation.nix b/modules/remote-sycl/.nix/derivation.nix index 71c5dff..ba6d1ec 100644 --- a/modules/remote-sycl/.nix/derivation.nix +++ b/modules/remote-sycl/.nix/derivation.nix @@ -12,6 +12,7 @@ , bash , build_examples ? "false" +, build_benchmarks ? "false" }: let @@ -33,8 +34,6 @@ in stdenv.mkDerivation { forstio.core forstio.codec forstio.async - forstio.io - forstio.io_codec keldu.adaptivecpp-dev ocl-icd openmp @@ -42,11 +41,11 @@ in stdenv.mkDerivation { ]; buildPhase = '' - scons build_examples=${build_examples} + scons build_benchmarks=${build_benchmarks} build_examples=${build_examples} ''; installPhase = '' - scons prefix=$out build_examples=${build_examples} install + scons prefix=$out build_benchmarks=${build_benchmarks} build_examples=${build_examples} install ''; doCheck = true; diff --git a/modules/remote-sycl/SConstruct b/modules/remote-sycl/SConstruct index 96e9df9..40344e7 100644 --- a/modules/remote-sycl/SConstruct +++ b/modules/remote-sycl/SConstruct @@ -50,14 +50,19 @@ env_vars.Add( ) ); +env_vars.Add( + BoolVariable('build_benchmarks', + help='Build benchmarks', + default=False + ) +); + env=Environment(ENV=os.environ, variables=env_vars, CPPPATH=[], CPPDEFINES=['SAW_UNIX'], CXXFLAGS=['-std=c++20','-g','-Wall','-Wextra'], LIBS=['forstio-core' ,'forstio-codec' ,'forstio-async' - ,'forstio-io' - ,'forstio-io_codec' ,'acpp-rt' ,'OpenCL' ,'omp' @@ -76,6 +81,7 @@ Export('env') SConscript('c++/SConscript') SConscript('tests/SConscript') SConscript('examples/SConscript') +SConscript('benchmarks/SConscript') env.Alias('cdb', env.cdb); env.Alias('all', [env.targets]); diff --git a/modules/remote-sycl/benchmarks/SConscript b/modules/remote-sycl/benchmarks/SConscript new file mode 100644 index 0000000..9976e0e --- /dev/null +++ b/modules/remote-sycl/benchmarks/SConscript @@ -0,0 +1,39 @@ +#!/bin/false + +import os +import os.path +import glob + + +Import('env') + +dir_path = Dir('.').abspath + +# Environment for base library +benchmarks_env = env.Clone(); + + +benchmarks_sycl_env = benchmarks_env.Clone(); +benchmarks_sycl_env['CXX'] = 'acpp'; +benchmarks_sycl_env['CXXFLAGS'] += ['-O2']; + +benchmarks_env.sources = sorted(glob.glob(dir_path + "/*.cpp")) +benchmarks_env.headers = sorted(glob.glob(dir_path + "/*.hpp")) + +env.sources += benchmarks_env.sources; +env.headers += benchmarks_env.headers; + +sycl_objects = []; +benchmarks_sycl_env.add_source_files(sycl_objects, ['kernel_mixed_precision.cpp'], shared=True); + +objects_static = [] +benchmarks_env.sycl_basic = benchmarks_env.Program('#bin/benchmark_mixed_precision', ['mixed_precision.cpp', env.library_static, sycl_objects]); + +# Set Alias +env.benchmarks = [benchmarks_env.sycl_basic]; +env.Alias('benchmarks', env.benchmarks); + +if env["build_benchmarks"]: + env.targets += ['benchmarks']; + env.Install('$prefix/bin/', env.benchmarks); +#endif diff --git a/modules/remote-sycl/benchmarks/kernel_mixed_precision.cpp b/modules/remote-sycl/benchmarks/kernel_mixed_precision.cpp new file mode 100644 index 0000000..0ac9756 --- /dev/null +++ b/modules/remote-sycl/benchmarks/kernel_mixed_precision.cpp @@ -0,0 +1,42 @@ +#include "mixed_precision.hpp" + +saw::interface<sch::MixedPrecisionBenchmarkInterface, saw::encode::Native, saw::rmt::Sycl, cl::sycl::queue*> listen_mixed_precision(cl::sycl::event& mixed_ev, cl::sycl::event& float64_ev, cl::sycl::event& float32_ev){ + return { + /** + * Mixed + */ + [&](saw::data<sch::MixedArray, saw::encode::Native, saw::rmt::Sycl>& in, cl::sycl::queue* cmd) -> saw::error_or<void> { + + mixed_ev = cmd->submit([&](cl::sycl::handler& h){ + auto acc_buff = in.template access<cl::sycl::access::mode::read_write>(h); + + h.parallel_for(cl::sycl::range<1>(in.size()), [=] (cl::sycl::id<1> it){ + acc_buff[0u].at(it[0u]) = acc_buff[0u].at(it[0u]) * saw::data<sch::Float64>{2.0}; + }); + }); + return saw::void_t{}; + }, + [&](saw::data<sch::Float64Array, saw::encode::Native, saw::rmt::Sycl>& in, cl::sycl::queue* cmd) -> saw::error_or<void> { + + float64_ev = cmd->submit([&](cl::sycl::handler& h){ + auto acc_buff = in.template access<cl::sycl::access::mode::read_write>(h); + + h.parallel_for(cl::sycl::range<1>(in.size()), [=] (cl::sycl::id<1> it){ + acc_buff[0u].at(it[0u]) = acc_buff[0u].at(it[0u]) * saw::data<sch::Float64>{2.0}; + }); + }); + return saw::void_t{}; + }, + [&](saw::data<sch::Float32Array, saw::encode::Native, saw::rmt::Sycl>& in, cl::sycl::queue* cmd) -> saw::error_or<void> { + + float32_ev = cmd->submit([&](cl::sycl::handler& h){ + auto acc_buff = in.template access<cl::sycl::access::mode::read_write>(h); + + h.parallel_for(cl::sycl::range<1>(in.size()), [=] (cl::sycl::id<1> it){ + acc_buff[0u].at(it[0u]) = acc_buff[0u].at(it[0u]) * saw::data<sch::Float32>{2.0f}; + }); + }); + return saw::void_t{}; + } + }; +} diff --git a/modules/remote-sycl/benchmarks/mixed_precision.cpp b/modules/remote-sycl/benchmarks/mixed_precision.cpp new file mode 100644 index 0000000..e63a814 --- /dev/null +++ b/modules/remote-sycl/benchmarks/mixed_precision.cpp @@ -0,0 +1,110 @@ +#include "./mixed_precision.hpp" +#include <forstio/codec/schema.hpp> + + +int main(){ + using namespace saw; + + constexpr uint64_t max_test_size = 1024ul * 1024ul * 512ul; + + std::random_device r; + std::default_random_engine e1{r()}; + std::uniform_real_distribution<> dis{-1.0,1.0}; + + + saw::event_loop loop; + saw::wait_scope wait{loop}; + + remote<rmt::Sycl> rmt; + + own<remote_address<rmt::Sycl>> rmt_addr{}; + + rmt.resolve_address().then([&](auto addr){ + rmt_addr = std::move(addr); + }).detach(); + + wait.poll(); + if(!rmt_addr){ + return -1; + } + + data<sch::MixedArray> mixed_host_data; + data<sch::Float64Array> float64_host_data; + data<sch::Float32Array> float32_host_data; + + cl::sycl::event mixed_ev; + cl::sycl::event float32_ev; + cl::sycl::event float64_ev; + + auto sycl_iface = listen_mixed_precision(mixed_ev, float64_ev, float32_ev); + + auto time_eval = [](std::string_view tag, cl::sycl::event& ev){ + auto end = ev.get_profiling_info<cl::sycl::info::event_profiling::command_end>(); + auto start = ev.get_profiling_info<cl::sycl::info::event_profiling::command_start>(); + + std::cout<<"Elapsed "<<tag<<" kernel time: "<< (end-start) / 1.0e9 << " seconds\n"; + }; + + auto& device = rmt_addr->get_device(); + + /** + * Warmup + */ + { + uint64_t test_size = max_test_size; + mixed_host_data = {test_size}; + float64_host_data = {test_size}; + float32_host_data = {test_size}; + for(uint64_t i = 0; i < test_size; ++i){ + double gen_num = dis(e1); + mixed_host_data.at(i) = static_cast<double>(gen_num); + float64_host_data.at(i) = static_cast<double>(gen_num); + float32_host_data.at(i) = static_cast<float>(gen_num); + } + data<sch::MixedArray, encode::Native, rmt::Sycl> mixed_device_data{mixed_host_data}; + data<sch::Float64Array, encode::Native, rmt::Sycl> float64_device_data{float64_host_data}; + data<sch::Float32Array, encode::Native, rmt::Sycl> float32_device_data{float32_host_data}; + + sycl_iface.template call<"float64_32">(mixed_device_data, &(device.get_handle())); + device.get_handle().wait(); + sycl_iface.template call<"float64">(float64_device_data, &(device.get_handle())); + device.get_handle().wait(); + sycl_iface.template call<"float32">(float32_device_data, &(device.get_handle())); + device.get_handle().wait(); + + } + + /** + * Benchmark + */ + for(uint64_t test_size = 1ul; test_size < max_test_size; test_size *= 2ul){ + device.get_handle().wait(); + mixed_host_data = {test_size}; + float64_host_data = {test_size}; + float32_host_data = {test_size}; + for(uint64_t i = 0; i < test_size; ++i){ + double gen_num = dis(e1); + mixed_host_data.at(i) = static_cast<double>(gen_num); + float64_host_data.at(i) = static_cast<double>(gen_num); + float32_host_data.at(i) = static_cast<float>(gen_num); + } + data<sch::MixedArray, encode::Native, rmt::Sycl> mixed_device_data{mixed_host_data}; + data<sch::Float64Array, encode::Native, rmt::Sycl> float64_device_data{float64_host_data}; + data<sch::Float32Array, encode::Native, rmt::Sycl> float32_device_data{float32_host_data}; + + sycl_iface.template call<"float64_32">(mixed_device_data); + device.get_handle().wait(); + sycl_iface.template call<"float64">(float64_device_data); + device.get_handle().wait(); + sycl_iface.template call<"float32">(float32_device_data); + device.get_handle().wait(); + + std::cout<<"\nSize: "<<test_size<<'\n'; + time_eval("Mixed", mixed_ev); + time_eval("Float32", float32_ev); + time_eval("Float64", float64_ev); + } + std::cout<<std::endl; + + return 0; +} diff --git a/modules/remote-sycl/benchmarks/mixed_precision.hpp b/modules/remote-sycl/benchmarks/mixed_precision.hpp new file mode 100644 index 0000000..3462bcd --- /dev/null +++ b/modules/remote-sycl/benchmarks/mixed_precision.hpp @@ -0,0 +1,27 @@ +#pragma once + +#include "../c++/remote.hpp" + +namespace sch { +using namespace saw::schema; + +using MixedArray = Array< + MixedPrecision<Float64, Float32> +>; + +using Float64Array = Array< + Float64 +>; + +using Float32Array = Array< + Float32 +>; + +using MixedPrecisionBenchmarkInterface = Interface< + Member<Function<MixedArray,Void>, "float64_32">, + Member<Function<Float64Array,Void>, "float64">, + Member<Function<Float32Array,Void>, "float32"> +>; +} + +saw::interface<sch::MixedPrecisionBenchmarkInterface, saw::encode::Native, saw::rmt::Sycl, cl::sycl::queue*> listen_mixed_precision(cl::sycl::event& mixed_ev, cl::sycl::event& float64_ev, cl::sycl::event& float32_ev); diff --git a/modules/remote-sycl/c++/common.hpp b/modules/remote-sycl/c++/common.hpp index 9bc734d..078879f 100644 --- a/modules/remote-sycl/c++/common.hpp +++ b/modules/remote-sycl/c++/common.hpp @@ -1,6 +1,6 @@ #pragma once -#include <forstio/io_codec/rpc.hpp> +#include <forstio/codec/rpc.hpp> #include <forstio/codec/data.hpp> #include <forstio/codec/id_map.hpp> @@ -17,9 +17,4 @@ class remote<rmt::Sycl>; template<typename T> class device; -template<typename Schema, typename Encoding, typename Remote> -class data_server; - -template<typename Schema, typename Encoding, typename Remote> -class data_client; } diff --git a/modules/remote-sycl/c++/data.hpp b/modules/remote-sycl/c++/data.hpp index e436e72..d939a53 100644 --- a/modules/remote-sycl/c++/data.hpp +++ b/modules/remote-sycl/c++/data.hpp @@ -12,9 +12,11 @@ template<typename Schema> class data<Schema, encode::Native, rmt::Sycl> { private: cl::sycl::buffer<data<Schema, encode::Native, storage::Default>> data_; + uint64_t size_; public: data(const data<Schema, encode::Native, storage::Default>& data__): - data_{&data__, 1u} + data_{&data__, 1u}, + size_{data__.size()} {} auto& get_handle() { @@ -25,6 +27,10 @@ public: return data_; } + uint64_t size() const { + return size_; + } + template<cl::sycl::access::mode AccessMode> auto access(cl::sycl::handler& h){ return data_.template get_access<AccessMode>(h); diff --git a/modules/remote-sycl/c++/device.hpp b/modules/remote-sycl/c++/device.hpp index e6f3fe3..ae19524 100644 --- a/modules/remote-sycl/c++/device.hpp +++ b/modules/remote-sycl/c++/device.hpp @@ -11,7 +11,9 @@ class device<rmt::Sycl> final { private: cl::sycl::queue cmd_queue_; public: - device() = default; + device(): + cmd_queue_{cl::sycl::default_selector_v, cl::sycl::property_list{cl::sycl::property::queue::enable_profiling()}} + {} SAW_FORBID_COPY(device); SAW_FORBID_MOVE(device); diff --git a/modules/remote-sycl/c++/transfer.hpp b/modules/remote-sycl/c++/transfer.hpp index f535751..72b111f 100644 --- a/modules/remote-sycl/c++/transfer.hpp +++ b/modules/remote-sycl/c++/transfer.hpp @@ -6,6 +6,7 @@ #include <forstio/error.hpp> #include <forstio/reduce_templates.hpp> +#include <forstio/codec/transfer.hpp> namespace saw { namespace impl { @@ -31,7 +32,7 @@ private: /** * Store for the data the server manages. */ - impl::data_server_redux<Encoding, typename tmpl_reduce<tmpl_group<Schema...>>::type >::type values_; + typename impl::data_server_redux<Encoding, typename tmpl_reduce<tmpl_group<Schema...>>::type >::type values_; public: /** * Main constructor diff --git a/modules/remote-sycl/tests/mixed_precision.cpp b/modules/remote-sycl/tests/mixed_precision.cpp new file mode 100644 index 0000000..4a62569 --- /dev/null +++ b/modules/remote-sycl/tests/mixed_precision.cpp @@ -0,0 +1,212 @@ +#include <forstio/test/suite.hpp> + +#include "../c++/remote.hpp" + +#include <random> + +namespace { +namespace schema { +using namespace saw::schema; + +using TestMixedArray = Array< + MixedPrecision<Float64,Float32> +>; + +using MixedFoo = Interface< + Member<Function<TestMixedArray, Void>, "foo"> +>; + +using TestDoubleArray = Array< + Float64 +>; + +using DoubleFoo = Interface< + Member<Function<TestDoubleArray, Void>, "foo"> +>; + +using TestFloatArray = Array< + Float32 +>; + +using FloatFoo = Interface< + Member<Function<TestFloatArray, Void>, "foo"> +>; +} + +constexpr uint64_t test_size = 1024ul; + +SAW_TEST("SYCL Mixed Test"){ + using namespace saw; + + std::random_device r; + std::default_random_engine e1{r()}; + std::uniform_real_distribution<> dis{-1.0,1.0}; + + data<schema::TestMixedArray> host_data; + host_data = {test_size}; + for(uint64_t i = 0; i < test_size; ++i){ + host_data.at(i) = static_cast<double>(dis(e1)); + } + + saw::event_loop loop; + saw::wait_scope wait{loop}; + + remote<rmt::Sycl> rmt; + + own<remote_address<rmt::Sycl>> rmt_addr{}; + + rmt.resolve_address().then([&](auto addr){ + rmt_addr = std::move(addr); + }).detach(); + + wait.poll(); + SAW_EXPECT(rmt_addr, "Remote address hasn't been filled"); + + data<schema::TestMixedArray, encode::Native, rmt::Sycl> device_data{host_data}; + + cl::sycl::event ev; + + interface<schema::MixedFoo, encode::Native,rmt::Sycl, cl::sycl::queue*> cl_iface { +[&](data<schema::TestMixedArray, encode::Native, rmt::Sycl>& in, cl::sycl::queue* cmd) -> error_or<void> { + + ev = cmd->submit([&](cl::sycl::handler& h){ + + auto acc_buff = in.template access<cl::sycl::access::mode::read_write>(h); + + h.parallel_for(cl::sycl::range<1>(test_size), [=] (cl::sycl::id<1> it){ + acc_buff[0u].at(it[0u]) = acc_buff[0u].at(it[0u]) * data<schema::Float64>{2.0}; + }); + }); + return saw::void_t{}; + } + }; + auto& device = rmt_addr->get_device(); + + cl_iface.template call <"foo">(device_data, &(device.get_handle())); + device.get_handle().wait(); + + { + auto end = ev.get_profiling_info<cl::sycl::info::event_profiling::command_end>(); + auto start = ev.get_profiling_info<cl::sycl::info::event_profiling::command_start>(); + + std::cout<<"Elapsed kernel time: "<< (end-start) / 1.0e9 << " seconds"<<std::endl; + } +} + +SAW_TEST("SYCL Float Test"){ + using namespace saw; + + std::random_device r; + std::default_random_engine e1{r()}; + std::uniform_real_distribution<> dis{-1.0,1.0}; + + data<schema::TestFloatArray> host_data; + host_data = {test_size}; + for(uint64_t i = 0; i < test_size; ++i){ + host_data.at(i) = static_cast<double>(dis(e1)); + } + + saw::event_loop loop; + saw::wait_scope wait{loop}; + + remote<rmt::Sycl> rmt; + + own<remote_address<rmt::Sycl>> rmt_addr{}; + + rmt.resolve_address().then([&](auto addr){ + rmt_addr = std::move(addr); + }).detach(); + + wait.poll(); + SAW_EXPECT(rmt_addr, "Remote address hasn't been filled"); + + data<schema::TestFloatArray, encode::Native, rmt::Sycl> device_data{host_data}; + + cl::sycl::event ev; + + interface<schema::FloatFoo, encode::Native,rmt::Sycl, cl::sycl::queue*> cl_iface { +[&](data<schema::TestFloatArray, encode::Native, rmt::Sycl>& in, cl::sycl::queue* cmd) -> error_or<void> { + + ev = cmd->submit([&](cl::sycl::handler& h){ + + auto acc_buff = in.template access<cl::sycl::access::mode::read_write>(h); + + h.parallel_for(cl::sycl::range<1>(test_size), [=] (cl::sycl::id<1> it){ + acc_buff[0u].at(it[0u]) = acc_buff[0u].at(it[0u]) * data<schema::Float32>{2.0}; + }); + }); + return saw::void_t{}; + } + }; + auto& device = rmt_addr->get_device(); + + cl_iface.template call <"foo">(device_data, &(device.get_handle())); + device.get_handle().wait(); + + { + auto end = ev.get_profiling_info<cl::sycl::info::event_profiling::command_end>(); + auto start = ev.get_profiling_info<cl::sycl::info::event_profiling::command_start>(); + + std::cout<<"Elapsed kernel time: "<< (end-start) / 1.0e9 << " seconds"<<std::endl; + } +} + +SAW_TEST("SYCL Double Test"){ + using namespace saw; + + std::random_device r; + std::default_random_engine e1{r()}; + std::uniform_real_distribution<> dis{-1.0,1.0}; + + data<schema::TestDoubleArray> host_data; + host_data = {test_size}; + for(uint64_t i = 0; i < test_size; ++i){ + host_data.at(i) = static_cast<double>(dis(e1)); + } + + saw::event_loop loop; + saw::wait_scope wait{loop}; + + remote<rmt::Sycl> rmt; + + own<remote_address<rmt::Sycl>> rmt_addr{}; + + rmt.resolve_address().then([&](auto addr){ + rmt_addr = std::move(addr); + }).detach(); + + wait.poll(); + SAW_EXPECT(rmt_addr, "Remote address hasn't been filled"); + + data<schema::TestDoubleArray, encode::Native, rmt::Sycl> device_data{host_data}; + + cl::sycl::event ev; + + interface<schema::DoubleFoo, encode::Native,rmt::Sycl, cl::sycl::queue*> cl_iface { +[&](data<schema::TestDoubleArray, encode::Native, rmt::Sycl>& in, cl::sycl::queue* cmd) -> error_or<void> { + + ev = cmd->submit([&](cl::sycl::handler& h){ + + auto acc_buff = in.template access<cl::sycl::access::mode::read_write>(h); + + h.parallel_for(cl::sycl::range<1>(test_size), [=] (cl::sycl::id<1> it){ + acc_buff[0u].at(it[0u]) = acc_buff[0u].at(it[0u]) * data<schema::Float64>{2.0}; + }); + }); + return saw::void_t{}; + } + }; + auto& device = rmt_addr->get_device(); + + cl_iface.template call <"foo">(device_data, &(device.get_handle())); + device.get_handle().wait(); + + { + auto end = ev.get_profiling_info<cl::sycl::info::event_profiling::command_end>(); + auto start = ev.get_profiling_info<cl::sycl::info::event_profiling::command_start>(); + + std::cout<<"Elapsed kernel time: "<< (end-start) / 1.0e9 << " seconds"<<std::endl; + } +} + +} diff --git a/modules/remote-sycl/tests/sycl_basics.cpp b/modules/remote-sycl/tests/sycl_basics.cpp index 61e0d87..7212a77 100644 --- a/modules/remote-sycl/tests/sycl_basics.cpp +++ b/modules/remote-sycl/tests/sycl_basics.cpp @@ -15,15 +15,6 @@ using TestStruct = Struct< using Foo = Interface< Member<Function<TestStruct, Void>, "foo"> >; - -using Calculator = Interface< - Member< - Function<Tuple<Int64, Int64>, Int64>, "add" - > -, Member< - Function<Tuple<Int64, Int64>, Int64>, "multiply" - > ->; } SAW_TEST("SYCL Test Setup"){ using namespace saw; |