summaryrefslogtreecommitdiff
path: root/modules/remote-sycl
diff options
context:
space:
mode:
authorClaudius "keldu" Holeksa <mail@keldu.de>2024-07-02 20:29:02 +0200
committerClaudius "keldu" Holeksa <mail@keldu.de>2024-07-02 20:29:02 +0200
commit0290e02fb1e4d3492a166e6eff3210100251f33a (patch)
treecc9bbf7b81a1c003a9e54b47c0fcf049aa3d3aa7 /modules/remote-sycl
parent25e05907f0292310eaae27a032db0ee274413874 (diff)
Fixed running benchmarks
Diffstat (limited to 'modules/remote-sycl')
-rw-r--r--modules/remote-sycl/benchmarks/kernel_mixed_precision.cpp13
-rw-r--r--modules/remote-sycl/benchmarks/mixed_precision.cpp53
-rw-r--r--modules/remote-sycl/tests/mixed_precision.cpp2
3 files changed, 37 insertions, 31 deletions
diff --git a/modules/remote-sycl/benchmarks/kernel_mixed_precision.cpp b/modules/remote-sycl/benchmarks/kernel_mixed_precision.cpp
index 0ac9756..c17c137 100644
--- a/modules/remote-sycl/benchmarks/kernel_mixed_precision.cpp
+++ b/modules/remote-sycl/benchmarks/kernel_mixed_precision.cpp
@@ -6,33 +6,34 @@ saw::interface<sch::MixedPrecisionBenchmarkInterface, saw::encode::Native, saw::
* Mixed
*/
[&](saw::data<sch::MixedArray, saw::encode::Native, saw::rmt::Sycl>& in, cl::sycl::queue* cmd) -> saw::error_or<void> {
-
+ uint64_t in_size = in.size();
+
mixed_ev = cmd->submit([&](cl::sycl::handler& h){
auto acc_buff = in.template access<cl::sycl::access::mode::read_write>(h);
- h.parallel_for(cl::sycl::range<1>(in.size()), [=] (cl::sycl::id<1> it){
+ h.parallel_for(cl::sycl::range<1>(in_size), [=] (cl::sycl::id<1> it){
acc_buff[0u].at(it[0u]) = acc_buff[0u].at(it[0u]) * saw::data<sch::Float64>{2.0};
});
});
return saw::void_t{};
},
[&](saw::data<sch::Float64Array, saw::encode::Native, saw::rmt::Sycl>& in, cl::sycl::queue* cmd) -> saw::error_or<void> {
-
+ uint64_t in_size = in.size();
float64_ev = cmd->submit([&](cl::sycl::handler& h){
auto acc_buff = in.template access<cl::sycl::access::mode::read_write>(h);
- h.parallel_for(cl::sycl::range<1>(in.size()), [=] (cl::sycl::id<1> it){
+ h.parallel_for(cl::sycl::range<1>(in_size), [=] (cl::sycl::id<1> it){
acc_buff[0u].at(it[0u]) = acc_buff[0u].at(it[0u]) * saw::data<sch::Float64>{2.0};
});
});
return saw::void_t{};
},
[&](saw::data<sch::Float32Array, saw::encode::Native, saw::rmt::Sycl>& in, cl::sycl::queue* cmd) -> saw::error_or<void> {
-
+ uint64_t in_size = in.size();
float32_ev = cmd->submit([&](cl::sycl::handler& h){
auto acc_buff = in.template access<cl::sycl::access::mode::read_write>(h);
- h.parallel_for(cl::sycl::range<1>(in.size()), [=] (cl::sycl::id<1> it){
+ h.parallel_for(cl::sycl::range<1>(in_size), [=] (cl::sycl::id<1> it){
acc_buff[0u].at(it[0u]) = acc_buff[0u].at(it[0u]) * saw::data<sch::Float32>{2.0f};
});
});
diff --git a/modules/remote-sycl/benchmarks/mixed_precision.cpp b/modules/remote-sycl/benchmarks/mixed_precision.cpp
index e63a814..b979b0c 100644
--- a/modules/remote-sycl/benchmarks/mixed_precision.cpp
+++ b/modules/remote-sycl/benchmarks/mixed_precision.cpp
@@ -1,11 +1,12 @@
#include "./mixed_precision.hpp"
#include <forstio/codec/schema.hpp>
+#include <sstream>
int main(){
using namespace saw;
- constexpr uint64_t max_test_size = 1024ul * 1024ul * 512ul;
+ constexpr uint64_t max_test_size = 1024ul * 1024ul * 256ul;
std::random_device r;
std::default_random_engine e1{r()};
@@ -28,21 +29,21 @@ int main(){
return -1;
}
- data<sch::MixedArray> mixed_host_data;
- data<sch::Float64Array> float64_host_data;
- data<sch::Float32Array> float32_host_data;
-
cl::sycl::event mixed_ev;
cl::sycl::event float32_ev;
cl::sycl::event float64_ev;
-
- auto sycl_iface = listen_mixed_precision(mixed_ev, float64_ev, float32_ev);
- auto time_eval = [](std::string_view tag, cl::sycl::event& ev){
+ auto sycl_iface = listen_mixed_precision(mixed_ev, float64_ev, float32_ev);
+
+ data<sch::MixedArray> mixed_host_data;
+ data<sch::Float64Array> float64_host_data;
+ data<sch::Float32Array> float32_host_data;
+
+ auto time_eval = [](std::stringstream& sstr, cl::sycl::event& ev){
auto end = ev.get_profiling_info<cl::sycl::info::event_profiling::command_end>();
auto start = ev.get_profiling_info<cl::sycl::info::event_profiling::command_start>();
- std::cout<<"Elapsed "<<tag<<" kernel time: "<< (end-start) / 1.0e9 << " seconds\n";
+ sstr<<(end-start) / 1.0e9;
};
auto& device = rmt_addr->get_device();
@@ -50,8 +51,9 @@ int main(){
/**
* Warmup
*/
- {
- uint64_t test_size = max_test_size;
+ std::cout<<"Warming up ..."<<std::endl;
+ for(uint64_t test_size = 1024ul; test_size < max_test_size; test_size *= 2ul){
+
mixed_host_data = {test_size};
float64_host_data = {test_size};
float32_host_data = {test_size};
@@ -66,19 +68,20 @@ int main(){
data<sch::Float32Array, encode::Native, rmt::Sycl> float32_device_data{float32_host_data};
sycl_iface.template call<"float64_32">(mixed_device_data, &(device.get_handle()));
- device.get_handle().wait();
sycl_iface.template call<"float64">(float64_device_data, &(device.get_handle()));
- device.get_handle().wait();
sycl_iface.template call<"float32">(float32_device_data, &(device.get_handle()));
device.get_handle().wait();
-
}
+ std::cout<<"Benchmark starting ..."<<std::endl;
/**
* Benchmark
*/
+ std::stringstream sstr;
for(uint64_t test_size = 1ul; test_size < max_test_size; test_size *= 2ul){
- device.get_handle().wait();
+ data<sch::MixedArray> mixed_host_data;
+ data<sch::Float64Array> float64_host_data;
+ data<sch::Float32Array> float32_host_data;
mixed_host_data = {test_size};
float64_host_data = {test_size};
float32_host_data = {test_size};
@@ -92,19 +95,21 @@ int main(){
data<sch::Float64Array, encode::Native, rmt::Sycl> float64_device_data{float64_host_data};
data<sch::Float32Array, encode::Native, rmt::Sycl> float32_device_data{float32_host_data};
- sycl_iface.template call<"float64_32">(mixed_device_data);
+ sstr<<test_size<<",\t";
+ sycl_iface.template call<"float64_32">(mixed_device_data, &(device.get_handle()));
device.get_handle().wait();
- sycl_iface.template call<"float64">(float64_device_data);
+ time_eval(sstr, mixed_ev);
+ sstr<<",\t";
+ sycl_iface.template call<"float64">(float64_device_data, &(device.get_handle()));
device.get_handle().wait();
- sycl_iface.template call<"float32">(float32_device_data);
+ time_eval(sstr, float64_ev);
+ sstr<<",\t";
+ sycl_iface.template call<"float32">(float32_device_data, &(device.get_handle()));
device.get_handle().wait();
-
- std::cout<<"\nSize: "<<test_size<<'\n';
- time_eval("Mixed", mixed_ev);
- time_eval("Float32", float32_ev);
- time_eval("Float64", float64_ev);
+ time_eval(sstr, float32_ev);
+ sstr<<'\n';
}
- std::cout<<std::endl;
+ std::cout<<sstr.str()<<std::endl;
return 0;
}
diff --git a/modules/remote-sycl/tests/mixed_precision.cpp b/modules/remote-sycl/tests/mixed_precision.cpp
index 4a62569..5b4b86e 100644
--- a/modules/remote-sycl/tests/mixed_precision.cpp
+++ b/modules/remote-sycl/tests/mixed_precision.cpp
@@ -33,7 +33,7 @@ using FloatFoo = Interface<
>;
}
-constexpr uint64_t test_size = 1024ul;
+constexpr uint64_t test_size = 64ul;
SAW_TEST("SYCL Mixed Test"){
using namespace saw;