summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--default.nix4
-rw-r--r--modules/remote-sycl/.nix/derivation.nix8
-rw-r--r--modules/remote-sycl/benchmarks/mixed_precision.cpp68
-rw-r--r--modules/remote-sycl/benchmarks/mixed_precision.hpp8
-rw-r--r--modules/remote-sycl/c++/common.hpp2
-rw-r--r--modules/remote-sycl/c++/data.hpp26
-rw-r--r--modules/remote-sycl/c++/device.hpp8
7 files changed, 59 insertions, 65 deletions
diff --git a/default.nix b/default.nix
index ef0bc3d..24b2d77 100644
--- a/default.nix
+++ b/default.nix
@@ -6,7 +6,7 @@
ref = "master";
}).outPath + "/default.nix"){
}).gasp
-, adaptive-cpp ? (pkgs.callPackage ./.nix/adaptivecpp.nix {
+, adaptive-cpp ? (pkgs.callPackage ./.nix/adaptive-cpp.nix {
inherit stdenv;
})
}:
@@ -133,7 +133,7 @@ in rec {
openmp = pkgs.llvmPackages_17.openmp;
build_examples = "false";
- build_benchmarks = "true";
+ build_benchmarks = "false";
};
remote-hip = pkgs.callPackage modules/remote-hip/.nix/derivation.nix {
diff --git a/modules/remote-sycl/.nix/derivation.nix b/modules/remote-sycl/.nix/derivation.nix
index 45d2e8b..688af18 100644
--- a/modules/remote-sycl/.nix/derivation.nix
+++ b/modules/remote-sycl/.nix/derivation.nix
@@ -5,14 +5,14 @@
, version
, forstio
, openmp
-, keldu
, ocl-icd
, lld_17
, python3
, bash
+, adaptive-cpp
, build_examples ? "false"
-, build_benchmarks ? "true"
+, build_benchmarks ? "false"
}:
let
@@ -34,7 +34,7 @@ in stdenv.mkDerivation {
forstio.codec
forstio.async
forstio.remote
- keldu.adaptivecpp-dev
+ adaptive-cpp
ocl-icd
openmp
lld_17
@@ -48,7 +48,7 @@ in stdenv.mkDerivation {
scons prefix=$out build_benchmarks=${build_benchmarks} build_examples=${build_examples} install
'';
- doCheck = true;
+ doCheck = false;
checkPhase = ''
export ACPP_APPDB_DIR=.
scons test
diff --git a/modules/remote-sycl/benchmarks/mixed_precision.cpp b/modules/remote-sycl/benchmarks/mixed_precision.cpp
index d4b119b..aabe949 100644
--- a/modules/remote-sycl/benchmarks/mixed_precision.cpp
+++ b/modules/remote-sycl/benchmarks/mixed_precision.cpp
@@ -82,20 +82,20 @@ int main(int argc, char** argv){
if(!rmt_addr){
return -1;
}
-
- cl::sycl::event mixed_ev;
- cl::sycl::event float32_ev;
- cl::sycl::event float64_ev;
+ // acpp::sycl::queue cmd_queue;
+ acpp::sycl::event mixed_ev;
+ acpp::sycl::event float32_ev;
+ acpp::sycl::event float64_ev;
auto sycl_iface = listen_mixed_precision(mixed_ev, float64_ev, float32_ev, arithmetic_intensity);
- data<sch::MixedArray> mixed_host_data;
- data<sch::Float64Array> float64_host_data;
- data<sch::Float32Array> float32_host_data;
+ data<sch::MixedArray, encode::Sycl<encode::Native>> mixed_data;
+ data<sch::Float64Array, encode::Sycl<encode::Native>> float64_data;
+ data<sch::Float32Array, encode::Sycl<encode::Native>> float32_data;
- auto time_eval = [](uint64_t & current_min_time, cl::sycl::event& ev){
- auto end = ev.get_profiling_info<cl::sycl::info::event_profiling::command_end>();
- auto start = ev.get_profiling_info<cl::sycl::info::event_profiling::command_start>();
+ auto time_eval = [](uint64_t & current_min_time, acpp::sycl::event& ev){
+ auto end = ev.get_profiling_info<acpp::sycl::info::event_profiling::command_end>();
+ auto start = ev.get_profiling_info<acpp::sycl::info::event_profiling::command_start>();
uint64_t curr_time = (end-start);
current_min_time = std::min(curr_time, current_min_time);
@@ -110,22 +110,19 @@ int main(int argc, char** argv){
std::cout<<"Warming up ..."<<std::endl;
for(uint64_t test_size = 1ul; test_size < max_test_size; test_size *= 2ul){
- mixed_host_data = {test_size};
- float64_host_data = {test_size};
- float32_host_data = {test_size};
+ mixed_data = {{{test_size,}}};
+ float64_data = {{{test_size}}};
+ float32_data = {{{test_size}}};
for(uint64_t i = 0; i < test_size; ++i){
double gen_num = dis(e1);
- mixed_host_data.at(i) = static_cast<double>(gen_num);
- float64_host_data.at(i) = static_cast<double>(gen_num);
- float32_host_data.at(i) = static_cast<float>(gen_num);
+ mixed_data.at({{i}}) = {static_cast<double>(gen_num)};
+ float64_data.at({{i}}) = {static_cast<double>(gen_num)};
+ float32_data.at({{i}}) = {static_cast<float>(gen_num)};
}
- data<sch::MixedArray, encode::Sycl<encode::Native>> mixed_device_data{mixed_host_data};
- data<sch::Float64Array, encode::Sycl<encode::Native>> float64_device_data{float64_host_data};
- data<sch::Float32Array, encode::Sycl<encode::Native>> float32_device_data{float32_host_data};
- sycl_iface.template call<"float64_32">(mixed_device_data, &(device.get_handle()));
- sycl_iface.template call<"float64">(float64_device_data, &(device.get_handle()));
- sycl_iface.template call<"float32">(float32_device_data, &(device.get_handle()));
+ sycl_iface.template call<"float64_32">(mixed_data, &(device.get_handle()));
+ sycl_iface.template call<"float64">(float64_data, &(device.get_handle()));
+ sycl_iface.template call<"float32">(float32_data, &(device.get_handle()));
device.get_handle().wait();
}
@@ -142,32 +139,29 @@ int main(int argc, char** argv){
(std::cout<<'.').flush();
- data<sch::MixedArray> mixed_host_data;
- data<sch::Float64Array> float64_host_data;
- data<sch::Float32Array> float32_host_data;
+ data<sch::MixedArray, encode::Sycl<encode::Native>> mixed_data;
+ data<sch::Float64Array, encode::Sycl<encode::Native>> float64_data;
+ data<sch::Float32Array, encode::Sycl<encode::Native>> float32_data;
- mixed_host_data = {test_size};
- float64_host_data = {test_size};
- float32_host_data = {test_size};
+ mixed_data = {{{test_size}}};
+ float64_data = {{{test_size}}};
+ float32_data = {{{test_size}}};
for(uint64_t i = 0; i < test_size; ++i){
double gen_num = dis(e1);
- mixed_host_data.at(i) = static_cast<double>(gen_num);
- float64_host_data.at(i) = static_cast<double>(gen_num);
- float32_host_data.at(i) = static_cast<float>(gen_num);
+ mixed_data.at({{i}}) = {static_cast<double>(gen_num)};
+ float64_data.at({{i}}) = {static_cast<double>(gen_num)};
+ float32_data.at({{i}}) = {static_cast<float>(gen_num)};
}
- data<sch::MixedArray, encode::Sycl<encode::Native>> mixed_device_data{mixed_host_data};
- data<sch::Float64Array, encode::Sycl<encode::Native>> float64_device_data{float64_host_data};
- data<sch::Float32Array, encode::Sycl<encode::Native>> float32_device_data{float32_host_data};
- sycl_iface.template call<"float64_32">(mixed_device_data, &(device.get_handle()));
+ sycl_iface.template call<"float64_32">(mixed_data, &(device.get_handle()));
device.get_handle().wait();
time_eval(time_mixed, mixed_ev);
- sycl_iface.template call<"float64">(float64_device_data, &(device.get_handle()));
+ sycl_iface.template call<"float64">(float64_data, &(device.get_handle()));
device.get_handle().wait();
time_eval(time_float64, float64_ev);
- sycl_iface.template call<"float32">(float32_device_data, &(device.get_handle()));
+ sycl_iface.template call<"float32">(float32_data, &(device.get_handle()));
device.get_handle().wait();
time_eval(time_float32, float32_ev);
}
diff --git a/modules/remote-sycl/benchmarks/mixed_precision.hpp b/modules/remote-sycl/benchmarks/mixed_precision.hpp
index e21a2e6..f673538 100644
--- a/modules/remote-sycl/benchmarks/mixed_precision.hpp
+++ b/modules/remote-sycl/benchmarks/mixed_precision.hpp
@@ -34,8 +34,8 @@ using FloatPrecisionBenchmarkInterface = Interface<
>;
}
-saw::interface<sch::MixedPrecisionBenchmarkInterface, saw::encode::Sycl<saw::encode::Native>, cl::sycl::queue*> listen_mixed_precision(cl::sycl::event& mixed_ev, cl::sycl::event& float64_ev, cl::sycl::event& float32_ev, uint64_t& arithmetic_intensity);
+saw::interface<sch::MixedPrecisionBenchmarkInterface, saw::encode::Sycl<saw::encode::Native>, acpp::sycl::queue*> listen_mixed_precision(acpp::sycl::event& mixed_ev, acpp::sycl::event& float64_ev, acpp::sycl::event& float32_ev, uint64_t& arithmetic_intensity);
-saw::interface<sch::FloatPrecisionBenchmarkInterface<sch::MixedArray>, saw::encode::Sycl<saw::encode::Native>, cl::sycl::queue*> listen_mixed_float_precision(cl::sycl::event& ev, uint64_t& arithmetic_intensity);
-saw::interface<sch::FloatPrecisionBenchmarkInterface<sch::Float32Array>, saw::encode::Sycl<saw::encode::Native>, cl::sycl::queue*> listen_half_float_precision(cl::sycl::event& ev, uint64_t& arithmetic_intensity);
-saw::interface<sch::FloatPrecisionBenchmarkInterface<sch::Float64Array>, saw::encode::Sycl<saw::encode::Native>, cl::sycl::queue*> listen_full_precision(cl::sycl::event& ev, uint64_t& arithmetic_intensity);
+saw::interface<sch::FloatPrecisionBenchmarkInterface<sch::MixedArray>, saw::encode::Sycl<saw::encode::Native>, acpp::sycl::queue*> listen_mixed_float_precision(acpp::sycl::event& ev, uint64_t& arithmetic_intensity);
+saw::interface<sch::FloatPrecisionBenchmarkInterface<sch::Float32Array>, saw::encode::Sycl<saw::encode::Native>, acpp::sycl::queue*> listen_half_float_precision(acpp::sycl::event& ev, uint64_t& arithmetic_intensity);
+saw::interface<sch::FloatPrecisionBenchmarkInterface<sch::Float64Array>, saw::encode::Sycl<saw::encode::Native>, acpp::sycl::queue*> listen_full_precision(acpp::sycl::event& ev, uint64_t& arithmetic_intensity);
diff --git a/modules/remote-sycl/c++/common.hpp b/modules/remote-sycl/c++/common.hpp
index 822dae5..287075f 100644
--- a/modules/remote-sycl/c++/common.hpp
+++ b/modules/remote-sycl/c++/common.hpp
@@ -5,7 +5,7 @@
#include <forstio/codec/schema_hash.hpp>
#include <forstio/codec/id_map.hpp>
-#include <AdaptiveCpp/CL/sycl.hpp>
+#include <AdaptiveCpp/sycl/sycl.hpp>
namespace saw {
namespace rmt {
diff --git a/modules/remote-sycl/c++/data.hpp b/modules/remote-sycl/c++/data.hpp
index 2d5893f..c2ee098 100644
--- a/modules/remote-sycl/c++/data.hpp
+++ b/modules/remote-sycl/c++/data.hpp
@@ -15,8 +15,8 @@ public:
using Schema = schema::Array<Sch,Dim>;
private:
// cl::sycl::buffer<data<Sch, encode::Native>> data_;
- using sycl_usm_allocator = acpp::sycl::usm_allocator<data<Sch,Encode>, sycl::usm::alloc::shared>;
- data<schema::FixedArray<schema::UInt64, sizeof...(D)>, Encode> dims_;
+ using sycl_usm_allocator = acpp::sycl::usm_allocator<data<Sch,Encode>, acpp::sycl::usm::alloc::shared>;
+ data<schema::FixedArray<schema::UInt64, Dim>, Encode> dims_;
data<schema::UInt64, Encode> size_;
std::vector<data<Sch,Encode>, sycl_usm_allocator> data_;
@@ -31,20 +31,20 @@ private:
return s;
}
public:
- data():
+ data(acpp::sycl::queue& q__):
dims_{},
size_{0u},
- data_{}
+ data_{0u,q__}
{
for(uint64_t iter = 0; iter < Dim; ++iter){
dims_.at({iter}) = 0u;
}
}
- data(const data<schema::FixedArray<schema::UInt64, sizeof...(D)>, Encode>& dims__):
+ data(const data<schema::FixedArray<schema::UInt64, Dim>, Encode>& dims__, acpp::sycl::queue& q__):
dims_{dims__},
size_{get_full_size()},
- data_{size_}
+ data_{size_,q__}
{}
auto* get_internal_data() {
@@ -62,19 +62,19 @@ public:
return size_;
}
- data<schema::FixedArray<schema::UInt64, sizeof...(D)>, Encode> dims() const {
+ data<schema::FixedArray<schema::UInt64, Dim>, Encode> dims() const {
return dims_;
}
- constexpr data<T, Encode>& at(const data<schema::FixedArray<schema::UInt64, sizeof...(D)>, Encode>& i){
- return value_.at(this->get_flat_index(i));
+ constexpr data<Sch, Encode>& at(const data<schema::FixedArray<schema::UInt64, Dim>, Encode>& i){
+ return data_.at(this->get_flat_index(i));
}
- constexpr const data<T, Encode>& at(const data<schema::FixedArray<schema::UInt64, sizeof...(D)>, Encode>& i)const{
- return value_.at(this->get_flat_index(i));
+ constexpr const data<Sch, Encode>& at(const data<schema::FixedArray<schema::UInt64, Dim>, Encode>& i)const{
+ return data_.at(this->get_flat_index(i));
}
- data<schema::UInt64,Encode> internal_flat_index(const data<schema::FixedArray<schema::UInt64, sizeof...(D)>, Encode>& i) const {
+ data<schema::UInt64,Encode> internal_flat_index(const data<schema::FixedArray<schema::UInt64, Dim>, Encode>& i) const {
return {this->get_flat_index(i)};
}
private:
@@ -85,7 +85,7 @@ private:
std::is_same_v<U,std::array<uint64_t,Dim>>,
"Unsupported type"
);
- assert(value_.size() == get_full_size());
+ assert(data_.size() == get_full_size());
uint64_t s = 0;
uint64_t stride = 1;
diff --git a/modules/remote-sycl/c++/device.hpp b/modules/remote-sycl/c++/device.hpp
index a050078..05bb17a 100644
--- a/modules/remote-sycl/c++/device.hpp
+++ b/modules/remote-sycl/c++/device.hpp
@@ -9,10 +9,10 @@ namespace saw {
template<>
class device<rmt::Sycl> final {
private:
- cl::sycl::queue cmd_queue_;
+ acpp::sycl::queue cmd_queue_;
public:
device():
- cmd_queue_{cl::sycl::default_selector_v, cl::sycl::property_list{cl::sycl::property::queue::enable_profiling()}}
+ cmd_queue_{acpp::sycl::default_selector_v, acpp::sycl::property_list{acpp::sycl::property::queue::enable_profiling()}}
{}
SAW_FORBID_COPY(device);
@@ -44,14 +44,14 @@ public:
});
cmd_queue_.wait();
*/
- cl::sycl::host_accessor result{dev_data.get_handle()};
+ acpp::sycl::host_accessor result{dev_data.get_handle()};
return result[0];
}
/**
* Get a reference to the handle
*/
- cl::sycl::queue& get_handle(){
+ acpp::sycl::queue& get_handle(){
return cmd_queue_;
}
};