diff options
| -rw-r--r-- | default.nix | 5 | ||||
| -rw-r--r-- | examples/poiseulle_particles_2d_gpu/sim.cpp | 19 | ||||
| -rw-r--r-- | lib/core/c++/chunk.hpp | 1 | ||||
| -rw-r--r-- | lib/sycl/c++/data.hpp | 18 |
4 files changed, 26 insertions, 17 deletions
diff --git a/default.nix b/default.nix index fd36323..d46eed5 100644 --- a/default.nix +++ b/default.nix @@ -102,6 +102,11 @@ in rec { inherit pname version stdenv forstio adaptive-cpp; inherit kel; }; + + s_poiseulle_particles_2d_gpu = pkgs.callPackage ./examples/s_poiseulle_particles_2d_gpu/.nix/derivation.nix { + inherit pname version stdenv forstio adaptive-cpp; + inherit kel; + }; poiseulle_3d = pkgs.callPackage ./examples/poiseulle_3d/.nix/derivation.nix { inherit pname version stdenv forstio adaptive-cpp; diff --git a/examples/poiseulle_particles_2d_gpu/sim.cpp b/examples/poiseulle_particles_2d_gpu/sim.cpp index 008d8f0..bb1fca5 100644 --- a/examples/poiseulle_particles_2d_gpu/sim.cpp +++ b/examples/poiseulle_particles_2d_gpu/sim.cpp @@ -1,5 +1,5 @@ -#include <kel/lbm/lbm.hpp> #include <kel/lbm/sycl/lbm.hpp> +#include <kel/lbm/lbm.hpp> #include <forstio/remote/filesystem/easy.hpp> #include <forstio/codec/json/json.hpp> @@ -13,16 +13,6 @@ constexpr uint64_t dim_y = 16u; namespace sch { using namespace saw::schema; -template<typename T, typename Desc> -using CellStruct = Struct< - Member<FixedArray<T,Desc::Q>, "dfs">, - Member<FixedArray<T,Desc::Q>, "dfs_old">, - Member<UInt8, "info">, - Member<Vector<T,Desc::D>, "velocity">, - Member<Vector<T,Desc::D>, "force"> ->; - - using InfoChunk = Chunk<UInt8, 0u, dim_x, dim_y>; template<typename T, typename Desc> @@ -127,14 +117,15 @@ saw::error_or<void> lbm_main(int argc, char** argv){ }; // saw::data<sch::FixedArray<sch::UInt64,Desc::D>> meta{{dim_x,dim_y}}; - saw::data<sch::ChunkStruct<T,Desc>> lbm_data{}; + auto lbm_data_ptr = saw::heap<saw::data<sch::ChunkStruct<T,Desc>>>(); device dev; + auto& sycl_q = dev.get_handle(); sycl_q.wait(); { - auto eov = setup_initial_conditions<T,Desc>(lbm_data); + auto eov = setup_initial_conditions<T,Desc>(*lbm_data_ptr); if(eov.is_error()){ return eov; } @@ -147,7 +138,7 @@ saw::error_or<void> lbm_main(int argc, char** argv){ sycl_q.wait(); std::cout<<"Hey2"<<std::endl; { - auto eov = dev.copy_to_device(lbm_data,lbm_sycl_data); + auto eov = dev.copy_to_device(*lbm_data_ptr,lbm_sycl_data); if(eov.is_error()){ return eov; } diff --git a/lib/core/c++/chunk.hpp b/lib/core/c++/chunk.hpp index 1da2c9f..5d20faa 100644 --- a/lib/core/c++/chunk.hpp +++ b/lib/core/c++/chunk.hpp @@ -1,6 +1,7 @@ #pragma once #include "common.hpp" +#include "flatten.hpp" namespace kel { namespace lbm { diff --git a/lib/sycl/c++/data.hpp b/lib/sycl/c++/data.hpp index c6ea281..cffeb38 100644 --- a/lib/sycl/c++/data.hpp +++ b/lib/sycl/c++/data.hpp @@ -33,15 +33,19 @@ public: q_{&q__}, values_{nullptr} { - values_ = acpp::sycl::malloc_device<data<Sch>>(ct_multiply<uint64_t,Dims...>::value,*q_); + std::cout<<"Hey: "<<ct_multiply<uint64_t,Dims...>::value<<std::endl; + values_ = acpp::sycl::malloc_device<data<Sch,Encode>>(ct_multiply<uint64_t,Dims...>::value,*q_); + SAW_ASSERT(values_ and q_); } ~data(){ if(not values_){ return; } + SAW_ASSERT(q_); acpp::sycl::free(values_,*q_); + values_ = nullptr; } static constexpr data<schema::FixedArray<schema::UInt64, sizeof...(Dims)>> get_dims() { @@ -169,7 +173,13 @@ struct sycl_copy_helper<sch::Struct<Members...>, Encode> final { auto host_ptr = host_member_data.flat_data(); auto sycl_ptr = sycl_member_data.flat_data(); - q.memcpy(host_ptr, sycl_ptr, sizeof(std::decay_t<decltype(host_ptr)>) * host_member_data.flat_size().get() ); + static_assert(sizeof(std::decay_t<decltype(sycl_ptr)>) == sizeof(std::decay_t<decltype(host_ptr)>), "Unequal size"); + + std::cout<<host_member_data.flat_size().get()<<" "<<std::endl; + + q.submit([&](acpp::sycl::handler& h){ + h.copy(host_ptr,sycl_ptr, host_member_data.flat_size().get()); + }).wait(); return copy_to_device_member<i+1u>(host_data,sycl_data,q); } @@ -192,7 +202,9 @@ struct sycl_copy_helper<sch::Struct<Members...>, Encode> final { auto host_ptr = host_member_data.flat_data(); auto sycl_ptr = sycl_member_data.flat_data(); - q.memcpy(sycl_ptr, host_ptr, sizeof(std::decay_t<decltype(host_ptr)>) * host_member_data.flat_size().get() ); + q.submit([&](acpp::sycl::handler& h){ + h.copy(sycl_ptr,host_ptr, host_member_data.flat_size().get()); + }).wait(); return copy_to_host_member<i+1u>(sycl_data,host_data,q); } |
