summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorClaudius "keldu" Holeksa <mail@keldu.de>2024-07-02 19:46:02 +0200
committerClaudius "keldu" Holeksa <mail@keldu.de>2024-07-02 19:46:02 +0200
commit25e05907f0292310eaae27a032db0ee274413874 (patch)
tree283de0ebb6b61add2221436a77bb09e2ff101080
parente51d2b1c0493dfd30d1622c8a0628ecf98c92f1c (diff)
Preparing benchmark work
-rw-r--r--default.nix1
-rw-r--r--modules/codec/c++/data.hpp47
-rw-r--r--modules/codec/c++/rpc.hpp5
-rw-r--r--modules/codec/c++/transfer.hpp8
-rw-r--r--modules/remote-sycl/.nix/derivation.nix7
-rw-r--r--modules/remote-sycl/SConstruct10
-rw-r--r--modules/remote-sycl/benchmarks/SConscript39
-rw-r--r--modules/remote-sycl/benchmarks/kernel_mixed_precision.cpp42
-rw-r--r--modules/remote-sycl/benchmarks/mixed_precision.cpp110
-rw-r--r--modules/remote-sycl/benchmarks/mixed_precision.hpp27
-rw-r--r--modules/remote-sycl/c++/common.hpp7
-rw-r--r--modules/remote-sycl/c++/data.hpp8
-rw-r--r--modules/remote-sycl/c++/device.hpp4
-rw-r--r--modules/remote-sycl/c++/transfer.hpp3
-rw-r--r--modules/remote-sycl/tests/mixed_precision.cpp212
-rw-r--r--modules/remote-sycl/tests/sycl_basics.cpp9
16 files changed, 489 insertions, 50 deletions
diff --git a/default.nix b/default.nix
index 001125f..f5e33ee 100644
--- a/default.nix
+++ b/default.nix
@@ -87,6 +87,7 @@ in rec {
openmp = pkgs.llvmPackages_15.openmp;
build_examples = "false";
+ build_benchmarks = "true";
};
tools = pkgs.callPackage modules/tools/.nix/derivation.nix {
diff --git a/modules/codec/c++/data.hpp b/modules/codec/c++/data.hpp
index 5ebe579..dd70cd9 100644
--- a/modules/codec/c++/data.hpp
+++ b/modules/codec/c++/data.hpp
@@ -86,8 +86,8 @@ private:
static_assert(always_false<T>, "Type not supported");
};
-template<typename T, size_t N>
-class data<schema::Primitive<T,N>, encode::Native, storage::Default> {
+template<typename T, size_t N, typename Storage>
+class data<schema::Primitive<T,N>, encode::Native, Storage> {
public:
using Schema = schema::Primitive<T,N>;
private:
@@ -107,29 +107,29 @@ public:
typename native_data_type<Schema>::type get() const {return value_;}
- data<Schema, encode::Native, storage::Default> operator*(const data<Schema, encode::Native, storage::Default>& rhs)const{
+ data<Schema, encode::Native, Storage> operator*(const data<Schema, encode::Native, Storage>& rhs)const{
return {get() * rhs.get()};
}
- data<Schema, encode::Native, storage::Default> operator/(const data<Schema, encode::Native, storage::Default>& rhs)const{
+ data<Schema, encode::Native, Storage> operator/(const data<Schema, encode::Native, Storage>& rhs)const{
return {get() / rhs.get()};
}
- data<Schema, encode::Native, storage::Default> operator+(const data<Schema, encode::Native, storage::Default>& rhs)const{
+ data<Schema, encode::Native, Storage> operator+(const data<Schema, encode::Native, Storage>& rhs)const{
return {get() + rhs.get()};
}
- data<Schema, encode::Native, storage::Default>& operator+=(const data<Schema, encode::Native, storage::Default>& rhs)const{
+ data<Schema, encode::Native, Storage>& operator+=(const data<Schema, encode::Native, Storage>& rhs)const{
value_ += rhs.get();
return *this;
}
- data<Schema, encode::Native, storage::Default>& operator-=(const data<Schema, encode::Native, storage::Default>& rhs)const{
+ data<Schema, encode::Native, Storage>& operator-=(const data<Schema, encode::Native, Storage>& rhs)const{
value_ -= rhs.get();
return *this;
}
- data<Schema, encode::Native, storage::Default> operator-(const data<Schema, encode::Native, storage::Default>& rhs)const{
+ data<Schema, encode::Native, Storage> operator-(const data<Schema, encode::Native, Storage>& rhs)const{
return {get() - rhs.get()};
}
@@ -147,7 +147,7 @@ public:
* Casts
*/
template<typename Target>
- data<Target, encode::Native, storage::Default> cast_to(){
+ data<Target, encode::Native, Storage> cast_to(){
auto raw_to = static_cast<typename saw::native_data_type<Target>::type>(value_);
return {raw_to};
}
@@ -156,12 +156,12 @@ public:
/**
* Mixed precision class for native formats
*/
-template<typename TA, uint64_t NA, typename TB, uint64_t NB>
-class data<schema::MixedPrecision<schema::Primitive<TA,NA>, schema::Primitive<TB,NB>>, encode::Native, storage::Default>{
+template<typename TA, uint64_t NA, typename TB, uint64_t NB, typename Storage>
+class data<schema::MixedPrecision<schema::Primitive<TA,NA>, schema::Primitive<TB,NB>>, encode::Native, Storage>{
public:
using Schema = schema::MixedPrecision<schema::Primitive<TA,NA>, schema::Primitive<TB,NB>>;
private:
- data<typename Schema::StorageSchema, encode::Native, storage::Default> value_;
+ data<typename Schema::StorageSchema, encode::Native, Storage> value_;
public:
data():value_{}{}
@@ -175,51 +175,58 @@ public:
value_.set(val.template cast_to<typename Schema::StorageSchema>().get());
}
- data<Schema, encode::Native, storage::Default> operator*(const data<Schema, encode::Native, storage::Default>& rhs) const {
+ data<Schema, encode::Native, Storage> operator*(const data<Schema, encode::Native, Storage>& rhs) const {
using CalcType = typename native_data_type<typename Schema::InterfaceSchema>::type;
CalcType left = static_cast<CalcType>(value_.get());
CalcType right = static_cast<CalcType>(rhs.get());
return {left * right};
}
- data<Schema, encode::Native, storage::Default> operator/(const data<Schema, encode::Native, storage::Default>& rhs)const{
+ data<Schema, encode::Native, Storage> operator*(const data<typename Schema::InterfaceSchema, encode::Native, Storage>& rhs) const {
+ using CalcType = typename native_data_type<typename Schema::InterfaceSchema>::type;
+ CalcType left = static_cast<CalcType>(value_.get());
+ CalcType right = rhs.get();
+ return {left * right};
+ }
+
+ data<Schema, encode::Native, Storage> operator/(const data<Schema, encode::Native, Storage>& rhs)const{
using CalcType = typename native_data_type<typename Schema::InterfaceSchema>::type;
CalcType left = static_cast<CalcType>(value_.get());
CalcType right = static_cast<CalcType>(rhs.get());
return {left / right};
}
- data<Schema, encode::Native, storage::Default> operator+(const data<Schema, encode::Native, storage::Default>& rhs)const{
+ data<Schema, encode::Native, Storage> operator+(const data<Schema, encode::Native, Storage>& rhs)const{
using CalcType = typename native_data_type<typename Schema::InterfaceSchema>::type;
CalcType left = static_cast<CalcType>(value_.get());
CalcType right = static_cast<CalcType>(rhs.get());
return {left + right};
}
- data<Schema, encode::Native, storage::Default>& operator+=(const data<Schema, encode::Native, storage::Default>& rhs)const{
+ data<Schema, encode::Native, Storage>& operator+=(const data<Schema, encode::Native, Storage>& rhs)const{
*this = *this + rhs.get();
return *this;
}
- data<Schema, encode::Native, storage::Default> operator-(const data<Schema, encode::Native, storage::Default>& rhs)const{
+ data<Schema, encode::Native, Storage> operator-(const data<Schema, encode::Native, Storage>& rhs)const{
using CalcType = typename native_data_type<typename Schema::InterfaceSchema>::type;
CalcType left = static_cast<CalcType>(value_.get());
CalcType right = static_cast<CalcType>(rhs.get());
return {left - right};
}
- data<Schema, encode::Native, storage::Default>& operator-=(const data<Schema, encode::Native, storage::Default>& rhs) const {
+ data<Schema, encode::Native, Storage>& operator-=(const data<Schema, encode::Native, Storage>& rhs) const {
*this = *this - rhs.get();
return *this;
}
template<typename Enc>
- bool operator==(const data<Schema, Enc>& rhs)const{
+ bool operator==(const data<Schema, Enc, Storage>& rhs)const{
return get() == rhs.get();
}
template<typename Enc>
- bool operator<(const data<Schema, Enc>& rhs) const {
+ bool operator<(const data<Schema, Enc, Storage>& rhs) const {
return get() < rhs.get();
}
};
diff --git a/modules/codec/c++/rpc.hpp b/modules/codec/c++/rpc.hpp
index 2c97d6b..d172ec4 100644
--- a/modules/codec/c++/rpc.hpp
+++ b/modules/codec/c++/rpc.hpp
@@ -9,7 +9,6 @@
#include <variant>
namespace saw {
-
/**
* This class acts as a helper for rpc calls and representing data on the remote.
*/
@@ -139,7 +138,9 @@ template<typename Remote>
class remote_address {
static_assert(always_false<Remote>, "Type of remote not supported");
-
+ /**
+ *
+ */
};
/**
diff --git a/modules/codec/c++/transfer.hpp b/modules/codec/c++/transfer.hpp
index b6aa977..1fb297e 100644
--- a/modules/codec/c++/transfer.hpp
+++ b/modules/codec/c++/transfer.hpp
@@ -1,9 +1,9 @@
#pragma once
namespace saw {
-template<typename T>
-class data_client;
-
-template<typename T>
+template<typename Schema, typename Encoding, typename Remote>
class data_server;
+
+template<typename Schema, typename Encoding, typename Remote>
+class data_client;
}
diff --git a/modules/remote-sycl/.nix/derivation.nix b/modules/remote-sycl/.nix/derivation.nix
index 71c5dff..ba6d1ec 100644
--- a/modules/remote-sycl/.nix/derivation.nix
+++ b/modules/remote-sycl/.nix/derivation.nix
@@ -12,6 +12,7 @@
, bash
, build_examples ? "false"
+, build_benchmarks ? "false"
}:
let
@@ -33,8 +34,6 @@ in stdenv.mkDerivation {
forstio.core
forstio.codec
forstio.async
- forstio.io
- forstio.io_codec
keldu.adaptivecpp-dev
ocl-icd
openmp
@@ -42,11 +41,11 @@ in stdenv.mkDerivation {
];
buildPhase = ''
- scons build_examples=${build_examples}
+ scons build_benchmarks=${build_benchmarks} build_examples=${build_examples}
'';
installPhase = ''
- scons prefix=$out build_examples=${build_examples} install
+ scons prefix=$out build_benchmarks=${build_benchmarks} build_examples=${build_examples} install
'';
doCheck = true;
diff --git a/modules/remote-sycl/SConstruct b/modules/remote-sycl/SConstruct
index 96e9df9..40344e7 100644
--- a/modules/remote-sycl/SConstruct
+++ b/modules/remote-sycl/SConstruct
@@ -50,14 +50,19 @@ env_vars.Add(
)
);
+env_vars.Add(
+ BoolVariable('build_benchmarks',
+ help='Build benchmarks',
+ default=False
+ )
+);
+
env=Environment(ENV=os.environ, variables=env_vars, CPPPATH=[],
CPPDEFINES=['SAW_UNIX'],
CXXFLAGS=['-std=c++20','-g','-Wall','-Wextra'],
LIBS=['forstio-core'
,'forstio-codec'
,'forstio-async'
- ,'forstio-io'
- ,'forstio-io_codec'
,'acpp-rt'
,'OpenCL'
,'omp'
@@ -76,6 +81,7 @@ Export('env')
SConscript('c++/SConscript')
SConscript('tests/SConscript')
SConscript('examples/SConscript')
+SConscript('benchmarks/SConscript')
env.Alias('cdb', env.cdb);
env.Alias('all', [env.targets]);
diff --git a/modules/remote-sycl/benchmarks/SConscript b/modules/remote-sycl/benchmarks/SConscript
new file mode 100644
index 0000000..9976e0e
--- /dev/null
+++ b/modules/remote-sycl/benchmarks/SConscript
@@ -0,0 +1,39 @@
+#!/bin/false
+
+import os
+import os.path
+import glob
+
+
+Import('env')
+
+dir_path = Dir('.').abspath
+
+# Environment for base library
+benchmarks_env = env.Clone();
+
+
+benchmarks_sycl_env = benchmarks_env.Clone();
+benchmarks_sycl_env['CXX'] = 'acpp';
+benchmarks_sycl_env['CXXFLAGS'] += ['-O2'];
+
+benchmarks_env.sources = sorted(glob.glob(dir_path + "/*.cpp"))
+benchmarks_env.headers = sorted(glob.glob(dir_path + "/*.hpp"))
+
+env.sources += benchmarks_env.sources;
+env.headers += benchmarks_env.headers;
+
+sycl_objects = [];
+benchmarks_sycl_env.add_source_files(sycl_objects, ['kernel_mixed_precision.cpp'], shared=True);
+
+objects_static = []
+benchmarks_env.sycl_basic = benchmarks_env.Program('#bin/benchmark_mixed_precision', ['mixed_precision.cpp', env.library_static, sycl_objects]);
+
+# Set Alias
+env.benchmarks = [benchmarks_env.sycl_basic];
+env.Alias('benchmarks', env.benchmarks);
+
+if env["build_benchmarks"]:
+ env.targets += ['benchmarks'];
+ env.Install('$prefix/bin/', env.benchmarks);
+#endif
diff --git a/modules/remote-sycl/benchmarks/kernel_mixed_precision.cpp b/modules/remote-sycl/benchmarks/kernel_mixed_precision.cpp
new file mode 100644
index 0000000..0ac9756
--- /dev/null
+++ b/modules/remote-sycl/benchmarks/kernel_mixed_precision.cpp
@@ -0,0 +1,42 @@
+#include "mixed_precision.hpp"
+
+saw::interface<sch::MixedPrecisionBenchmarkInterface, saw::encode::Native, saw::rmt::Sycl, cl::sycl::queue*> listen_mixed_precision(cl::sycl::event& mixed_ev, cl::sycl::event& float64_ev, cl::sycl::event& float32_ev){
+ return {
+ /**
+ * Mixed
+ */
+ [&](saw::data<sch::MixedArray, saw::encode::Native, saw::rmt::Sycl>& in, cl::sycl::queue* cmd) -> saw::error_or<void> {
+
+ mixed_ev = cmd->submit([&](cl::sycl::handler& h){
+ auto acc_buff = in.template access<cl::sycl::access::mode::read_write>(h);
+
+ h.parallel_for(cl::sycl::range<1>(in.size()), [=] (cl::sycl::id<1> it){
+ acc_buff[0u].at(it[0u]) = acc_buff[0u].at(it[0u]) * saw::data<sch::Float64>{2.0};
+ });
+ });
+ return saw::void_t{};
+ },
+ [&](saw::data<sch::Float64Array, saw::encode::Native, saw::rmt::Sycl>& in, cl::sycl::queue* cmd) -> saw::error_or<void> {
+
+ float64_ev = cmd->submit([&](cl::sycl::handler& h){
+ auto acc_buff = in.template access<cl::sycl::access::mode::read_write>(h);
+
+ h.parallel_for(cl::sycl::range<1>(in.size()), [=] (cl::sycl::id<1> it){
+ acc_buff[0u].at(it[0u]) = acc_buff[0u].at(it[0u]) * saw::data<sch::Float64>{2.0};
+ });
+ });
+ return saw::void_t{};
+ },
+ [&](saw::data<sch::Float32Array, saw::encode::Native, saw::rmt::Sycl>& in, cl::sycl::queue* cmd) -> saw::error_or<void> {
+
+ float32_ev = cmd->submit([&](cl::sycl::handler& h){
+ auto acc_buff = in.template access<cl::sycl::access::mode::read_write>(h);
+
+ h.parallel_for(cl::sycl::range<1>(in.size()), [=] (cl::sycl::id<1> it){
+ acc_buff[0u].at(it[0u]) = acc_buff[0u].at(it[0u]) * saw::data<sch::Float32>{2.0f};
+ });
+ });
+ return saw::void_t{};
+ }
+ };
+}
diff --git a/modules/remote-sycl/benchmarks/mixed_precision.cpp b/modules/remote-sycl/benchmarks/mixed_precision.cpp
new file mode 100644
index 0000000..e63a814
--- /dev/null
+++ b/modules/remote-sycl/benchmarks/mixed_precision.cpp
@@ -0,0 +1,110 @@
+#include "./mixed_precision.hpp"
+#include <forstio/codec/schema.hpp>
+
+
+int main(){
+ using namespace saw;
+
+ constexpr uint64_t max_test_size = 1024ul * 1024ul * 512ul;
+
+ std::random_device r;
+ std::default_random_engine e1{r()};
+ std::uniform_real_distribution<> dis{-1.0,1.0};
+
+
+ saw::event_loop loop;
+ saw::wait_scope wait{loop};
+
+ remote<rmt::Sycl> rmt;
+
+ own<remote_address<rmt::Sycl>> rmt_addr{};
+
+ rmt.resolve_address().then([&](auto addr){
+ rmt_addr = std::move(addr);
+ }).detach();
+
+ wait.poll();
+ if(!rmt_addr){
+ return -1;
+ }
+
+ data<sch::MixedArray> mixed_host_data;
+ data<sch::Float64Array> float64_host_data;
+ data<sch::Float32Array> float32_host_data;
+
+ cl::sycl::event mixed_ev;
+ cl::sycl::event float32_ev;
+ cl::sycl::event float64_ev;
+
+ auto sycl_iface = listen_mixed_precision(mixed_ev, float64_ev, float32_ev);
+
+ auto time_eval = [](std::string_view tag, cl::sycl::event& ev){
+ auto end = ev.get_profiling_info<cl::sycl::info::event_profiling::command_end>();
+ auto start = ev.get_profiling_info<cl::sycl::info::event_profiling::command_start>();
+
+ std::cout<<"Elapsed "<<tag<<" kernel time: "<< (end-start) / 1.0e9 << " seconds\n";
+ };
+
+ auto& device = rmt_addr->get_device();
+
+ /**
+ * Warmup
+ */
+ {
+ uint64_t test_size = max_test_size;
+ mixed_host_data = {test_size};
+ float64_host_data = {test_size};
+ float32_host_data = {test_size};
+ for(uint64_t i = 0; i < test_size; ++i){
+ double gen_num = dis(e1);
+ mixed_host_data.at(i) = static_cast<double>(gen_num);
+ float64_host_data.at(i) = static_cast<double>(gen_num);
+ float32_host_data.at(i) = static_cast<float>(gen_num);
+ }
+ data<sch::MixedArray, encode::Native, rmt::Sycl> mixed_device_data{mixed_host_data};
+ data<sch::Float64Array, encode::Native, rmt::Sycl> float64_device_data{float64_host_data};
+ data<sch::Float32Array, encode::Native, rmt::Sycl> float32_device_data{float32_host_data};
+
+ sycl_iface.template call<"float64_32">(mixed_device_data, &(device.get_handle()));
+ device.get_handle().wait();
+ sycl_iface.template call<"float64">(float64_device_data, &(device.get_handle()));
+ device.get_handle().wait();
+ sycl_iface.template call<"float32">(float32_device_data, &(device.get_handle()));
+ device.get_handle().wait();
+
+ }
+
+ /**
+ * Benchmark
+ */
+ for(uint64_t test_size = 1ul; test_size < max_test_size; test_size *= 2ul){
+ device.get_handle().wait();
+ mixed_host_data = {test_size};
+ float64_host_data = {test_size};
+ float32_host_data = {test_size};
+ for(uint64_t i = 0; i < test_size; ++i){
+ double gen_num = dis(e1);
+ mixed_host_data.at(i) = static_cast<double>(gen_num);
+ float64_host_data.at(i) = static_cast<double>(gen_num);
+ float32_host_data.at(i) = static_cast<float>(gen_num);
+ }
+ data<sch::MixedArray, encode::Native, rmt::Sycl> mixed_device_data{mixed_host_data};
+ data<sch::Float64Array, encode::Native, rmt::Sycl> float64_device_data{float64_host_data};
+ data<sch::Float32Array, encode::Native, rmt::Sycl> float32_device_data{float32_host_data};
+
+ sycl_iface.template call<"float64_32">(mixed_device_data);
+ device.get_handle().wait();
+ sycl_iface.template call<"float64">(float64_device_data);
+ device.get_handle().wait();
+ sycl_iface.template call<"float32">(float32_device_data);
+ device.get_handle().wait();
+
+ std::cout<<"\nSize: "<<test_size<<'\n';
+ time_eval("Mixed", mixed_ev);
+ time_eval("Float32", float32_ev);
+ time_eval("Float64", float64_ev);
+ }
+ std::cout<<std::endl;
+
+ return 0;
+}
diff --git a/modules/remote-sycl/benchmarks/mixed_precision.hpp b/modules/remote-sycl/benchmarks/mixed_precision.hpp
new file mode 100644
index 0000000..3462bcd
--- /dev/null
+++ b/modules/remote-sycl/benchmarks/mixed_precision.hpp
@@ -0,0 +1,27 @@
+#pragma once
+
+#include "../c++/remote.hpp"
+
+namespace sch {
+using namespace saw::schema;
+
+using MixedArray = Array<
+ MixedPrecision<Float64, Float32>
+>;
+
+using Float64Array = Array<
+ Float64
+>;
+
+using Float32Array = Array<
+ Float32
+>;
+
+using MixedPrecisionBenchmarkInterface = Interface<
+ Member<Function<MixedArray,Void>, "float64_32">,
+ Member<Function<Float64Array,Void>, "float64">,
+ Member<Function<Float32Array,Void>, "float32">
+>;
+}
+
+saw::interface<sch::MixedPrecisionBenchmarkInterface, saw::encode::Native, saw::rmt::Sycl, cl::sycl::queue*> listen_mixed_precision(cl::sycl::event& mixed_ev, cl::sycl::event& float64_ev, cl::sycl::event& float32_ev);
diff --git a/modules/remote-sycl/c++/common.hpp b/modules/remote-sycl/c++/common.hpp
index 9bc734d..078879f 100644
--- a/modules/remote-sycl/c++/common.hpp
+++ b/modules/remote-sycl/c++/common.hpp
@@ -1,6 +1,6 @@
#pragma once
-#include <forstio/io_codec/rpc.hpp>
+#include <forstio/codec/rpc.hpp>
#include <forstio/codec/data.hpp>
#include <forstio/codec/id_map.hpp>
@@ -17,9 +17,4 @@ class remote<rmt::Sycl>;
template<typename T>
class device;
-template<typename Schema, typename Encoding, typename Remote>
-class data_server;
-
-template<typename Schema, typename Encoding, typename Remote>
-class data_client;
}
diff --git a/modules/remote-sycl/c++/data.hpp b/modules/remote-sycl/c++/data.hpp
index e436e72..d939a53 100644
--- a/modules/remote-sycl/c++/data.hpp
+++ b/modules/remote-sycl/c++/data.hpp
@@ -12,9 +12,11 @@ template<typename Schema>
class data<Schema, encode::Native, rmt::Sycl> {
private:
cl::sycl::buffer<data<Schema, encode::Native, storage::Default>> data_;
+ uint64_t size_;
public:
data(const data<Schema, encode::Native, storage::Default>& data__):
- data_{&data__, 1u}
+ data_{&data__, 1u},
+ size_{data__.size()}
{}
auto& get_handle() {
@@ -25,6 +27,10 @@ public:
return data_;
}
+ uint64_t size() const {
+ return size_;
+ }
+
template<cl::sycl::access::mode AccessMode>
auto access(cl::sycl::handler& h){
return data_.template get_access<AccessMode>(h);
diff --git a/modules/remote-sycl/c++/device.hpp b/modules/remote-sycl/c++/device.hpp
index e6f3fe3..ae19524 100644
--- a/modules/remote-sycl/c++/device.hpp
+++ b/modules/remote-sycl/c++/device.hpp
@@ -11,7 +11,9 @@ class device<rmt::Sycl> final {
private:
cl::sycl::queue cmd_queue_;
public:
- device() = default;
+ device():
+ cmd_queue_{cl::sycl::default_selector_v, cl::sycl::property_list{cl::sycl::property::queue::enable_profiling()}}
+ {}
SAW_FORBID_COPY(device);
SAW_FORBID_MOVE(device);
diff --git a/modules/remote-sycl/c++/transfer.hpp b/modules/remote-sycl/c++/transfer.hpp
index f535751..72b111f 100644
--- a/modules/remote-sycl/c++/transfer.hpp
+++ b/modules/remote-sycl/c++/transfer.hpp
@@ -6,6 +6,7 @@
#include <forstio/error.hpp>
#include <forstio/reduce_templates.hpp>
+#include <forstio/codec/transfer.hpp>
namespace saw {
namespace impl {
@@ -31,7 +32,7 @@ private:
/**
* Store for the data the server manages.
*/
- impl::data_server_redux<Encoding, typename tmpl_reduce<tmpl_group<Schema...>>::type >::type values_;
+ typename impl::data_server_redux<Encoding, typename tmpl_reduce<tmpl_group<Schema...>>::type >::type values_;
public:
/**
* Main constructor
diff --git a/modules/remote-sycl/tests/mixed_precision.cpp b/modules/remote-sycl/tests/mixed_precision.cpp
new file mode 100644
index 0000000..4a62569
--- /dev/null
+++ b/modules/remote-sycl/tests/mixed_precision.cpp
@@ -0,0 +1,212 @@
+#include <forstio/test/suite.hpp>
+
+#include "../c++/remote.hpp"
+
+#include <random>
+
+namespace {
+namespace schema {
+using namespace saw::schema;
+
+using TestMixedArray = Array<
+ MixedPrecision<Float64,Float32>
+>;
+
+using MixedFoo = Interface<
+ Member<Function<TestMixedArray, Void>, "foo">
+>;
+
+using TestDoubleArray = Array<
+ Float64
+>;
+
+using DoubleFoo = Interface<
+ Member<Function<TestDoubleArray, Void>, "foo">
+>;
+
+using TestFloatArray = Array<
+ Float32
+>;
+
+using FloatFoo = Interface<
+ Member<Function<TestFloatArray, Void>, "foo">
+>;
+}
+
+constexpr uint64_t test_size = 1024ul;
+
+SAW_TEST("SYCL Mixed Test"){
+ using namespace saw;
+
+ std::random_device r;
+ std::default_random_engine e1{r()};
+ std::uniform_real_distribution<> dis{-1.0,1.0};
+
+ data<schema::TestMixedArray> host_data;
+ host_data = {test_size};
+ for(uint64_t i = 0; i < test_size; ++i){
+ host_data.at(i) = static_cast<double>(dis(e1));
+ }
+
+ saw::event_loop loop;
+ saw::wait_scope wait{loop};
+
+ remote<rmt::Sycl> rmt;
+
+ own<remote_address<rmt::Sycl>> rmt_addr{};
+
+ rmt.resolve_address().then([&](auto addr){
+ rmt_addr = std::move(addr);
+ }).detach();
+
+ wait.poll();
+ SAW_EXPECT(rmt_addr, "Remote address hasn't been filled");
+
+ data<schema::TestMixedArray, encode::Native, rmt::Sycl> device_data{host_data};
+
+ cl::sycl::event ev;
+
+ interface<schema::MixedFoo, encode::Native,rmt::Sycl, cl::sycl::queue*> cl_iface {
+[&](data<schema::TestMixedArray, encode::Native, rmt::Sycl>& in, cl::sycl::queue* cmd) -> error_or<void> {
+
+ ev = cmd->submit([&](cl::sycl::handler& h){
+
+ auto acc_buff = in.template access<cl::sycl::access::mode::read_write>(h);
+
+ h.parallel_for(cl::sycl::range<1>(test_size), [=] (cl::sycl::id<1> it){
+ acc_buff[0u].at(it[0u]) = acc_buff[0u].at(it[0u]) * data<schema::Float64>{2.0};
+ });
+ });
+ return saw::void_t{};
+ }
+ };
+ auto& device = rmt_addr->get_device();
+
+ cl_iface.template call <"foo">(device_data, &(device.get_handle()));
+ device.get_handle().wait();
+
+ {
+ auto end = ev.get_profiling_info<cl::sycl::info::event_profiling::command_end>();
+ auto start = ev.get_profiling_info<cl::sycl::info::event_profiling::command_start>();
+
+ std::cout<<"Elapsed kernel time: "<< (end-start) / 1.0e9 << " seconds"<<std::endl;
+ }
+}
+
+SAW_TEST("SYCL Float Test"){
+ using namespace saw;
+
+ std::random_device r;
+ std::default_random_engine e1{r()};
+ std::uniform_real_distribution<> dis{-1.0,1.0};
+
+ data<schema::TestFloatArray> host_data;
+ host_data = {test_size};
+ for(uint64_t i = 0; i < test_size; ++i){
+ host_data.at(i) = static_cast<double>(dis(e1));
+ }
+
+ saw::event_loop loop;
+ saw::wait_scope wait{loop};
+
+ remote<rmt::Sycl> rmt;
+
+ own<remote_address<rmt::Sycl>> rmt_addr{};
+
+ rmt.resolve_address().then([&](auto addr){
+ rmt_addr = std::move(addr);
+ }).detach();
+
+ wait.poll();
+ SAW_EXPECT(rmt_addr, "Remote address hasn't been filled");
+
+ data<schema::TestFloatArray, encode::Native, rmt::Sycl> device_data{host_data};
+
+ cl::sycl::event ev;
+
+ interface<schema::FloatFoo, encode::Native,rmt::Sycl, cl::sycl::queue*> cl_iface {
+[&](data<schema::TestFloatArray, encode::Native, rmt::Sycl>& in, cl::sycl::queue* cmd) -> error_or<void> {
+
+ ev = cmd->submit([&](cl::sycl::handler& h){
+
+ auto acc_buff = in.template access<cl::sycl::access::mode::read_write>(h);
+
+ h.parallel_for(cl::sycl::range<1>(test_size), [=] (cl::sycl::id<1> it){
+ acc_buff[0u].at(it[0u]) = acc_buff[0u].at(it[0u]) * data<schema::Float32>{2.0};
+ });
+ });
+ return saw::void_t{};
+ }
+ };
+ auto& device = rmt_addr->get_device();
+
+ cl_iface.template call <"foo">(device_data, &(device.get_handle()));
+ device.get_handle().wait();
+
+ {
+ auto end = ev.get_profiling_info<cl::sycl::info::event_profiling::command_end>();
+ auto start = ev.get_profiling_info<cl::sycl::info::event_profiling::command_start>();
+
+ std::cout<<"Elapsed kernel time: "<< (end-start) / 1.0e9 << " seconds"<<std::endl;
+ }
+}
+
+SAW_TEST("SYCL Double Test"){
+ using namespace saw;
+
+ std::random_device r;
+ std::default_random_engine e1{r()};
+ std::uniform_real_distribution<> dis{-1.0,1.0};
+
+ data<schema::TestDoubleArray> host_data;
+ host_data = {test_size};
+ for(uint64_t i = 0; i < test_size; ++i){
+ host_data.at(i) = static_cast<double>(dis(e1));
+ }
+
+ saw::event_loop loop;
+ saw::wait_scope wait{loop};
+
+ remote<rmt::Sycl> rmt;
+
+ own<remote_address<rmt::Sycl>> rmt_addr{};
+
+ rmt.resolve_address().then([&](auto addr){
+ rmt_addr = std::move(addr);
+ }).detach();
+
+ wait.poll();
+ SAW_EXPECT(rmt_addr, "Remote address hasn't been filled");
+
+ data<schema::TestDoubleArray, encode::Native, rmt::Sycl> device_data{host_data};
+
+ cl::sycl::event ev;
+
+ interface<schema::DoubleFoo, encode::Native,rmt::Sycl, cl::sycl::queue*> cl_iface {
+[&](data<schema::TestDoubleArray, encode::Native, rmt::Sycl>& in, cl::sycl::queue* cmd) -> error_or<void> {
+
+ ev = cmd->submit([&](cl::sycl::handler& h){
+
+ auto acc_buff = in.template access<cl::sycl::access::mode::read_write>(h);
+
+ h.parallel_for(cl::sycl::range<1>(test_size), [=] (cl::sycl::id<1> it){
+ acc_buff[0u].at(it[0u]) = acc_buff[0u].at(it[0u]) * data<schema::Float64>{2.0};
+ });
+ });
+ return saw::void_t{};
+ }
+ };
+ auto& device = rmt_addr->get_device();
+
+ cl_iface.template call <"foo">(device_data, &(device.get_handle()));
+ device.get_handle().wait();
+
+ {
+ auto end = ev.get_profiling_info<cl::sycl::info::event_profiling::command_end>();
+ auto start = ev.get_profiling_info<cl::sycl::info::event_profiling::command_start>();
+
+ std::cout<<"Elapsed kernel time: "<< (end-start) / 1.0e9 << " seconds"<<std::endl;
+ }
+}
+
+}
diff --git a/modules/remote-sycl/tests/sycl_basics.cpp b/modules/remote-sycl/tests/sycl_basics.cpp
index 61e0d87..7212a77 100644
--- a/modules/remote-sycl/tests/sycl_basics.cpp
+++ b/modules/remote-sycl/tests/sycl_basics.cpp
@@ -15,15 +15,6 @@ using TestStruct = Struct<
using Foo = Interface<
Member<Function<TestStruct, Void>, "foo">
>;
-
-using Calculator = Interface<
- Member<
- Function<Tuple<Int64, Int64>, Int64>, "add"
- >
-, Member<
- Function<Tuple<Int64, Int64>, Int64>, "multiply"
- >
->;
}
SAW_TEST("SYCL Test Setup"){
using namespace saw;