From 9e0147cb0d4a07fddfdc77063d7d83d8ad1ac0d3 Mon Sep 17 00:00:00 2001 From: "Claudius \"keldu\" Holeksa" Date: Sun, 12 Apr 2026 23:35:51 +0200 Subject: Copying particles to gpu --- examples/settling_cubes_2d_ibm_gpu/sim.cpp | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/examples/settling_cubes_2d_ibm_gpu/sim.cpp b/examples/settling_cubes_2d_ibm_gpu/sim.cpp index 3281239..3c1e96d 100644 --- a/examples/settling_cubes_2d_ibm_gpu/sim.cpp +++ b/examples/settling_cubes_2d_ibm_gpu/sim.cpp @@ -121,6 +121,7 @@ template saw::error_or step( saw::data>,encode::Sycl>& fields, saw::data>,encode::Sycl>& macros, + saw::data, encode::Sycl>& particles, saw::data t_i, device& dev ){ @@ -130,7 +131,6 @@ saw::error_or step( { } - // auto coll_ev = q.submit([&](acpp::sycl::handler& h){ saw::data> force; @@ -164,6 +164,10 @@ saw::error_or step( }).wait(); + q.submit([&](acpp::sycl::handler& h){ + h.parallel_for(acpp::sycl::range{dim_x}, [=](acpp::sycl::id idx){ + }); + }).wait(); // Step /* q.submit([&](acpp::sycl::handler& h){ @@ -210,7 +214,7 @@ saw::error_or lbm_main(int argc, char** argv){ // saw::data> meta{{dim_x,dim_y}}; auto lbm_data_ptr = saw::heap>>(); auto lbm_macro_data_ptr = saw::heap>>(); - auto lbm_particles_data = saw::data>(); + auto lbm_particle_data = saw::heap>>(); std::cout<<"Estimated Bytes: "<,sch::MacroStruct>().get()< lbm_main(int argc, char** argv){ sycl_q.wait(); { - auto eov = setup_initial_conditions(*lbm_data_ptr,*lbm_macro_data_ptr,lbm_particles_data); + auto eov = setup_initial_conditions(*lbm_data_ptr,*lbm_macro_data_ptr,lbm_particle_data); if(eov.is_error()){ return eov; } @@ -250,10 +254,12 @@ saw::error_or lbm_main(int argc, char** argv){ saw::data, encode::Sycl> lbm_sycl_data{sycl_q}; saw::data, encode::Sycl> lbm_sycl_macro_data{sycl_q}; + saw::data, encode::Sycl> lbm_sycl_particle_data{sycl_q}; sycl_q.wait(); auto lsd_view = make_chunk_struct_view(lbm_sycl_data); auto lsdm_view = make_chunk_struct_view(lbm_sycl_macro_data); + auto lsdp_view = make_chunk_struct_view(lbm_sycl_particle_data); { auto eov = dev.copy_to_device(*lbm_data_ptr,lbm_sycl_data); if(eov.is_error()){ @@ -266,6 +272,12 @@ saw::error_or lbm_main(int argc, char** argv){ return eov; } } + { + auto eov = dev.copy_to_device(*lbm_particle_data_ptr,lbm_sycl_particle_data); + if(eov.is_error()){ + return eov; + } + } sycl_q.wait(); saw::data time_steps{16u*4096ul}; @@ -274,7 +286,7 @@ saw::error_or lbm_main(int argc, char** argv){ for(saw::data i{0u}; i < time_steps and krun; ++i){ // BC + Collision { - auto eov = step(lsd_view,lsdm_view,i,dev); + auto eov = step(lsd_view,lsdm_view,lsdp_view,i,dev); if(eov.is_error()){ return eov; } -- cgit v1.2.3 From 546ee949f384c7c666372b9c1f5c064c8a8f2e89 Mon Sep 17 00:00:00 2001 From: "Claudius \"keldu\" Holeksa" Date: Sun, 12 Apr 2026 23:36:10 +0200 Subject: Preventing copied linkout --- util/podman/norce_build_and_run.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/util/podman/norce_build_and_run.sh b/util/podman/norce_build_and_run.sh index 048d2c7..42d1f24 100755 --- a/util/podman/norce_build_and_run.sh +++ b/util/podman/norce_build_and_run.sh @@ -74,10 +74,10 @@ podman run --rm -it \ nix-store --verify --check-contents || true # Build - nix-build default.nix -A release.examples --out-link result + nix-build default.nix -A release.examples --out-link /tmp/result - # --- Interactive binary selection --- - BIN_DIR=./result/bin + # --- Interactive binary selection --- + BIN_DIR=/tmp/result/bin mapfile -t BINARIES < <(ls -1 \$BIN_DIR) if (( \${#BINARIES[@]} == 0 )); then echo 'No binaries found in result/bin' -- cgit v1.2.3 From 20e46825069b3974d5cc883163d7e37b4836b2af Mon Sep 17 00:00:00 2001 From: "Claudius \"keldu\" Holeksa" Date: Mon, 13 Apr 2026 19:26:57 +0200 Subject: Progress? --- examples/settling_cubes_2d_ibm_gpu/sim.cpp | 16 +- lib/core/c++/particle/particle.hpp | 26 +-- lib/sycl/c++/data.hpp | 300 ++++++++++++++++++++++++++++- 3 files changed, 308 insertions(+), 34 deletions(-) diff --git a/examples/settling_cubes_2d_ibm_gpu/sim.cpp b/examples/settling_cubes_2d_ibm_gpu/sim.cpp index 3c1e96d..3c698bb 100644 --- a/examples/settling_cubes_2d_ibm_gpu/sim.cpp +++ b/examples/settling_cubes_2d_ibm_gpu/sim.cpp @@ -52,7 +52,7 @@ using MacroStruct = Struct< template using ParticleGroups = Tuple< ParticleGroup< - T,Desc::D,sch::ParticleCollisionSpheroid + T,Desc::D,sch::ParticleCollisionSpheroid > >; } @@ -119,11 +119,11 @@ saw::error_or setup_initial_conditions( template saw::error_or step( - saw::data>,encode::Sycl>& fields, - saw::data>,encode::Sycl>& macros, - saw::data, encode::Sycl>& particles, - saw::data t_i, - device& dev + saw::data>,encode::Sycl>& fields, + saw::data>,encode::Sycl>& macros, + saw::data>,encode::Sycl>& particles, + saw::data t_i, + device& dev ){ auto& q = dev.get_handle(); auto& info_f = fields.template get<"info">(); @@ -214,7 +214,7 @@ saw::error_or lbm_main(int argc, char** argv){ // saw::data> meta{{dim_x,dim_y}}; auto lbm_data_ptr = saw::heap>>(); auto lbm_macro_data_ptr = saw::heap>>(); - auto lbm_particle_data = saw::heap>>(); + auto lbm_particle_data_ptr = saw::heap>>(); std::cout<<"Estimated Bytes: "<,sch::MacroStruct>().get()< lbm_main(int argc, char** argv){ sycl_q.wait(); { - auto eov = setup_initial_conditions(*lbm_data_ptr,*lbm_macro_data_ptr,lbm_particle_data); + auto eov = setup_initial_conditions(*lbm_data_ptr,*lbm_macro_data_ptr,*lbm_particle_data_ptr); if(eov.is_error()){ return eov; } diff --git a/lib/core/c++/particle/particle.hpp b/lib/core/c++/particle/particle.hpp index c0d115f..e48cf08 100644 --- a/lib/core/c++/particle/particle.hpp +++ b/lib/core/c++/particle/particle.hpp @@ -55,14 +55,13 @@ using Particle = Struct< template> using ParticleGroup = Struct< Member, "mask">, - Member, - Member, "mass">, + Member,1u>, "density">, Member>, "particles"> >; } -template -saw::data>> create_spheroid_particle_group( +template +saw::data>> create_spheroid_particle_group( saw::data> rad_p, saw::data> density_p, const saw::data& mask_resolution @@ -71,23 +70,10 @@ saw::data>> create_sph auto& mask = part.template get<"mask">(); auto& collision = part.template get<"collision">(); - auto& mass = part.template get<"mass">(); + auto& density = part.template get<"density">().at({{0u}}); - if constexpr ( D == 1u ){ - saw::data> c; - c.at({}).set(2.0); - mass = rad_p * c * density_p; - } else if constexpr ( D == 2u){ - saw::data> pi; - pi.at({}).set(3.14159); - mass = rad_p * rad_p * pi * density_p; - } else if constexpr ( D == 3u ){ - saw::data> c; - c.at({}).set(3.14159 * 4.0 / 3.0); - mass = rad_p * rad_p * rad_p * c * density_p; - } else { - static_assert(D == 0u or D > 3u, "Dimensions only supported for Dim 1,2 & 3."); - } + static_assert(D == 0u or D > 3u, "Dimensions only supported for Dim 1,2 & 3."); + density = density_p; saw::data> mask_dims; for(uint64_t i = 0u; i < D; ++i){ diff --git a/lib/sycl/c++/data.hpp b/lib/sycl/c++/data.hpp index 50adb4f..661c313 100644 --- a/lib/sycl/c++/data.hpp +++ b/lib/sycl/c++/data.hpp @@ -29,13 +29,15 @@ private: SAW_FORBID_COPY(data); SAW_FORBID_MOVE(data); + public: data(acpp::sycl::queue& q__): q_{&q__}, values_{nullptr} { + SAW_ASSERT(q_); values_ = acpp::sycl::malloc_device>(ct_multiply::value,*q_); - SAW_ASSERT(values_ and q_); + SAW_ASSERT(values_); } ~data(){ @@ -63,6 +65,7 @@ public: constexpr data* flat_data() { return values_; } + }; template @@ -81,7 +84,7 @@ public: {} data(data, kel::lbm::encode::Sycl>& values__): - values_{&values__.at({})} + values_{values__.flat_data()} {} data(data* values__): @@ -105,6 +108,113 @@ public: } }; +template +class data, kel::lbm::encode::Sycl> final { +public: + using Schema = schema::Array; +private: + static_assert(Dims > 0u, "Zero Dim Arrays make no sense here. If you meant to use this for math style approaches then use Tensor instead of Array"); + + data* values_; + data,Encode> meta_; + acpp::sycl::queue* q_; + + SAW_FORBID_COPY(data); + SAW_FORBID_MOVE(data); +public: + data(const data, Encode>& meta__,acpp::sycl::queue& q__): + values_{nullptr}, + meta_{meta__}, + q_{&q__} + { + SAW_ASSERT(q_); + values_ = acpp::sycl::malloc_device>(flat_size().get(),*q_); + SAW_ASSERT(values_); + } + + ~data(){ + if(not values_){ + return; + } + SAW_ASSERT(q_); + + acpp::sycl::free(values_,*q_); + values_ = nullptr; + } + + constexpr data, Encode> meta() const { + return meta_; + } + + constexpr data& at(const data, Encode>& index){ + return values_[kel::lbm::flatten_index::apply(index,meta()).get()]; + } + + constexpr data& at(const data, Encode>& index) const{ + return values_[kel::lbm::flatten_index::apply(index,meta()).get()]; + } + + constexpr data* flat_data() { + return values_; + } + + constexpr data flat_size() const { + data mult{1u}; + + for(uint64_t i{0u}; i < Dims; ++i){ + mult = mult * meta_.at({i}); + } + + return mult; + } +}; + +template +class data>, kel::lbm::encode::Sycl> final { +public: + using Schema = schema::Ptr>; +private: + static_assert(Dims > 0u, "Zero Dim Arrays make no sense here. If you meant to use this for math style approaches then use Tensor instead of Array"); + + data* values_; + data,Encode> meta_; + + SAW_FORBID_COPY(data); + SAW_FORBID_MOVE(data); +public: + data(const data, kel::lbm::encode::Sycl>& values__): + values_{values__.flat_data()}, + meta_{values__.meta()} + { + } + + constexpr data, Encode> meta() const { + return meta_; + } + + constexpr data& at(const data, Encode>& index){ + return values_[kel::lbm::flatten_index::apply(index,meta()).get()]; + } + + constexpr data& at(const data, Encode>& index) const{ + return values_[kel::lbm::flatten_index::apply(index,meta()).get()]; + } + + constexpr data* flat_data() { + return values_; + } + + constexpr data flat_size() const { + data mult{1u}; + + for(uint64_t i{0u}; i < Dims; ++i){ + mult = mult * meta_.at({i}); + } + + return mult; + } +}; + template class data,kel::lbm::encode::Sycl> final { public: @@ -223,6 +333,64 @@ public: } }; +template +struct data, kel::lbm::encode::Sycl> final { +public: + using StorageT = std::tuple>...>; + using Schema = schema::Tuple; +private: + StorageT members_; + + /** + * A helper constructor to forward the sycl queue to the inner "default" constructors + */ + template + constexpr data(acpp::sycl::queue& q, std::index_sequence): + members_{(static_cast(Is), q)...} + {} +public: + data(acpp::sycl::queue& q__): + data{q__, std::make_index_sequence{}} + { + q__.wait(); + } + + template + auto& get(){ + return std::get(members_); + } + + template + const auto& get() const { + return std::get(members_); + } +}; + +template +struct data>, kel::lbm::encode::Sycl> final { +public: + using StorageT = std::tuple,kel::lbm::encode::Sycl>...>; + using Schema = schema::Tuple; +private: + StorageT members_; + + /** + * A helper constructor to forward the sycl queue to the inner "default" constructors + */ +public: + data() = default; + + template + auto& get(){ + return std::get(members_); + } + + template + const auto& get() const { + return std::get(members_); + } +}; + template class data, kel::lbm::encode::Sycl > final { public: @@ -235,6 +403,9 @@ private: */ StorageT members_; + /** + * A helper constructor to forward the sycl queue to the inner "default" constructors + */ template constexpr data(acpp::sycl::queue& q, std::index_sequence): members_{(static_cast(Is), q)...} @@ -246,6 +417,16 @@ public: q__.wait(); } + template + auto& get(){ + return std::get(members_); + } + + template + const auto& get() const { + return std::get(members_); + } + template auto& get(){ return std::get::value>(members_); @@ -347,6 +528,60 @@ struct sycl_copy_helper, Encode> final { } }; +template +struct sycl_copy_helper, Encode> final { + using Schema = sch::Tuple; + + template + static saw::error_or copy_to_device_member(saw::data& host_data, saw::data>& sycl_data, sycl::queue& q){ + if constexpr (i < sizeof...(Members)){ + auto& host_member_data = host_data.template get(); + auto& sycl_member_data = sycl_data.template get(); + + auto host_ptr = host_member_data.flat_data(); + auto sycl_ptr = sycl_member_data.flat_data(); + + static_assert(sizeof(std::decay_t) == sizeof(std::decay_t), "Unequal size"); + + q.submit([&](acpp::sycl::handler& h){ + h.copy(host_ptr,sycl_ptr, host_member_data.flat_size().get()); + }).wait(); + + return copy_to_device_member(host_data,sycl_data,q); + } + + return saw::make_void(); + } + + static saw::error_or copy_to_device(saw::data& host_data, saw::data>& sycl_data, sycl::queue& q){ + return copy_to_device_member<0u>(host_data, sycl_data, q); + } + + template + static saw::error_or copy_to_host_member(saw::data>& sycl_data, saw::data& host_data, sycl::queue& q){ + if constexpr (i < sizeof...(Members)){ + auto& host_member_data = host_data.template get(); + auto& sycl_member_data = sycl_data.template get(); + + auto host_ptr = host_member_data.flat_data(); + auto sycl_ptr = sycl_member_data.flat_data(); + + q.submit([&](acpp::sycl::handler& h){ + h.copy(sycl_ptr,host_ptr, host_member_data.flat_size().get()); + }).wait(); + + return copy_to_host_member(sycl_data,host_data,q); + } + + return saw::make_void(); + } + + + static saw::error_or copy_to_host(saw::data>& sycl_data, saw::data& host_data, sycl::queue& q){ + return copy_to_host_member<0u>(sycl_data, host_data, q); + } +}; + template struct sycl_copy_helper, Encode> final { using Schema = sch::FixedArray; @@ -376,12 +611,41 @@ struct sycl_copy_helper, Encode> final { } }; +template +struct sycl_copy_helper, Encode> final { + using Schema = sch::Array; + + static saw::error_or copy_to_host(saw::data>& sycl_data, saw::data& host_data, sycl::queue& q){ + auto host_ptr = host_data.flat_data(); + auto sycl_ptr = sycl_data.flat_data(); + + static_assert(sizeof(std::decay_t) == sizeof(std::decay_t), "Unequal size"); + + q.submit([&](acpp::sycl::handler& h){ + h.copy(sycl_ptr,host_ptr, host_data.flat_size()); + }).wait(); + return saw::make_void(); + } + + static saw::error_or copy_to_device(saw::data& host_data, saw::data>& sycl_data, sycl::queue& q){ + auto host_ptr = host_data.flat_data(); + auto sycl_ptr = sycl_data.flat_data(); + + static_assert(sizeof(std::decay_t) == sizeof(std::decay_t), "Unequal size"); + + q.submit([&](acpp::sycl::handler& h){ + h.copy(host_ptr,sycl_ptr, host_data.flat_size()); + }).wait(); + return saw::make_void(); + } +}; + template -struct make_chunk_struct_view_helper; +struct make_view_helper; template -struct make_chunk_struct_view_helper...>, encode::Sycl> { +struct make_view_helper...>, encode::Sycl> { private: template static void apply_i( @@ -405,12 +669,36 @@ public: return str_view; } }; + +template +struct make_view_helper, encode::Sycl> { +private: + template + static void apply_i( + saw::data, encode::Sycl>& dat, + saw::data>, encode::Sycl>& dat_view) + { + if constexpr (i < sizeof...(Sch)){ + dat_view.template get() = {dat.template get().flat_data()}; + + apply_i(dat,dat_view); + } + } +public: + static auto apply(saw::data, encode::Sycl>& dat){ + saw::data>, encode::Sycl> str_view; + + apply_i<0u>(dat,str_view); + + return str_view; + } +}; } // Ptr => Ptr>> => Ptr template -auto make_chunk_struct_view(saw::data& dat){ - return impl::make_chunk_struct_view_helper::apply(dat); +auto make_view(saw::data& dat){ + return impl::make_view_helper::apply(dat); } class device final { -- cgit v1.2.3 From ebc2e26a1b3498363bb7522c241de2925bb7f627 Mon Sep 17 00:00:00 2001 From: "Claudius \"keldu\" Holeksa" Date: Tue, 14 Apr 2026 15:33:11 +0200 Subject: Fixed compilation issues with make_view for sycl --- default.nix | 2 +- examples/settling_cubes_2d_ibm_gpu/sim.cpp | 16 +- lib/core/c++/particle/particle.hpp | 13 +- lib/sycl/c++/data.hpp | 232 ++++++++++++++++++++--------- 4 files changed, 177 insertions(+), 86 deletions(-) diff --git a/default.nix b/default.nix index 05ca001..2562f1d 100644 --- a/default.nix +++ b/default.nix @@ -71,7 +71,7 @@ let src = builtins.fetchurl { url = "https://git.keldu.de/forstio-forstio/snapshot/master.tar.gz"; - sha256 = "sha256:026scvd9015blphbj49d0cv5wfrxh1szzdv3p212wzk4iw643akj"; + sha256 = "sha256:1bpglvf0h90dqdm25kp3h1rvk3f7fyasp43xsgki09n1yp55iw9w"; }; phases = [ "unpackPhase" "installPhase" ]; diff --git a/examples/settling_cubes_2d_ibm_gpu/sim.cpp b/examples/settling_cubes_2d_ibm_gpu/sim.cpp index 3c698bb..b4f204e 100644 --- a/examples/settling_cubes_2d_ibm_gpu/sim.cpp +++ b/examples/settling_cubes_2d_ibm_gpu/sim.cpp @@ -52,7 +52,7 @@ using MacroStruct = Struct< template using ParticleGroups = Tuple< ParticleGroup< - T,Desc::D,sch::ParticleCollisionSpheroid + T,Desc::D,sch::ParticleCollisionSpheroid > >; } @@ -103,12 +103,10 @@ saw::error_or setup_initial_conditions( // Particles { - saw::data> rad_p, dense_p; - rad_p.at({}).set(2.0); + saw::data> dense_p; dense_p.at({}).set(1); auto& spheroid_group = particles.template get<0u>(); - spheroid_group = create_spheroid_particle_group( - rad_p, + spheroid_group = create_spheroid_particle_group( dense_p, {64u} ); @@ -165,7 +163,7 @@ saw::error_or step( }).wait(); q.submit([&](acpp::sycl::handler& h){ - h.parallel_for(acpp::sycl::range{dim_x}, [=](acpp::sycl::id idx){ + h.parallel_for(acpp::sycl::range<1u>{dim_x}, [=](acpp::sycl::id<1u> idx){ }); }).wait(); // Step @@ -257,9 +255,9 @@ saw::error_or lbm_main(int argc, char** argv){ saw::data, encode::Sycl> lbm_sycl_particle_data{sycl_q}; sycl_q.wait(); - auto lsd_view = make_chunk_struct_view(lbm_sycl_data); - auto lsdm_view = make_chunk_struct_view(lbm_sycl_macro_data); - auto lsdp_view = make_chunk_struct_view(lbm_sycl_particle_data); + auto lsd_view = make_view(lbm_sycl_data); + auto lsdm_view = make_view(lbm_sycl_macro_data); + auto lsdp_view = make_view(lbm_sycl_particle_data); { auto eov = dev.copy_to_device(*lbm_data_ptr,lbm_sycl_data); if(eov.is_error()){ diff --git a/lib/core/c++/particle/particle.hpp b/lib/core/c++/particle/particle.hpp index e48cf08..7af43dc 100644 --- a/lib/core/c++/particle/particle.hpp +++ b/lib/core/c++/particle/particle.hpp @@ -40,9 +40,8 @@ using ParticleRigidBody = Struct< Member::Schema, "angular_acceleration"> >; -template +template::type radius = 1.0f> using ParticleCollisionSpheroid = Struct< - Member, "radius"> >; template @@ -52,7 +51,7 @@ using Particle = Struct< // Member, "mask">, >; -template> +template> using ParticleGroup = Struct< Member, "mask">, Member,1u>, "density">, @@ -60,19 +59,17 @@ using ParticleGroup = Struct< >; } -template +template::type radius> saw::data>> create_spheroid_particle_group( - saw::data> rad_p, saw::data> density_p, const saw::data& mask_resolution ){ - saw::data>> part; + saw::data>> part; auto& mask = part.template get<"mask">(); - auto& collision = part.template get<"collision">(); auto& density = part.template get<"density">().at({{0u}}); - static_assert(D == 0u or D > 3u, "Dimensions only supported for Dim 1,2 & 3."); + static_assert(D >= 1u and D <= 3u, "Dimensions only supported for Dim 1,2 & 3."); density = density_p; saw::data> mask_dims; diff --git a/lib/sycl/c++/data.hpp b/lib/sycl/c++/data.hpp index 661c313..4e5129b 100644 --- a/lib/sycl/c++/data.hpp +++ b/lib/sycl/c++/data.hpp @@ -62,7 +62,7 @@ public: return values_[kel::lbm::flatten_index::apply(index,get_dims()).get()]; } - constexpr data* flat_data() { + constexpr data* flat_data() const { return values_; } @@ -103,7 +103,7 @@ public: return values_[kel::lbm::flatten_index::apply(index,get_dims()).get()]; } - constexpr data* flat_data() { + constexpr data* flat_data() const { return values_; } }; @@ -122,6 +122,14 @@ private: SAW_FORBID_COPY(data); SAW_FORBID_MOVE(data); public: + data(acpp::sycl::queue& q__): + values_{nullptr}, + meta_{}, + q_{&q__} + { + SAW_ASSERT(q_); + } + data(const data, Encode>& meta__,acpp::sycl::queue& q__): values_{nullptr}, meta_{meta__}, @@ -154,7 +162,7 @@ public: return values_[kel::lbm::flatten_index::apply(index,meta()).get()]; } - constexpr data* flat_data() { + constexpr data* flat_data() const { return values_; } @@ -179,9 +187,14 @@ private: data* values_; data,Encode> meta_; - SAW_FORBID_COPY(data); - SAW_FORBID_MOVE(data); public: + SAW_DEFAULT_COPY(data); + SAW_DEFAULT_MOVE(data); + + data(): + values_{nullptr} + {} + data(const data, kel::lbm::encode::Sycl>& values__): values_{values__.flat_data()}, meta_{values__.meta()} @@ -200,7 +213,7 @@ public: return values_[kel::lbm::flatten_index::apply(index,meta()).get()]; } - constexpr data* flat_data() { + constexpr data* flat_data() const { return values_; } @@ -261,7 +274,7 @@ public: return data,Encode>{{Sides...}}; } - auto flat_data(){ + auto flat_data() const { return values_.flat_data(); } @@ -288,6 +301,10 @@ public: values_{nullptr} {} + data(const data, kel::lbm::encode::Sycl>& values__): + values_{values__.flat_data()} + {} + data(data* values__): values_{values__} {} @@ -361,7 +378,7 @@ public: } template - const auto& get() const { + auto& get() const { return std::get(members_); } }; @@ -423,7 +440,7 @@ public: } template - const auto& get() const { + auto& get() const { return std::get(members_); } @@ -433,7 +450,7 @@ public: } template - const auto& get() const { + auto& get() const { return std::get::value>(members_); } }; @@ -449,17 +466,26 @@ private: * Do it here by specializing. */ StorageT members_; - public: data() = default; + template + auto& get(){ + return std::get(members_); + } + + template + auto& get() const { + return std::get(members_); + } + template auto& get(){ return std::get::value>(members_); } template - const auto& get() const { + auto& get() const { return std::get::value>(members_); } }; @@ -482,14 +508,10 @@ struct sycl_copy_helper, Encode> final { auto& host_member_data = host_data.template get(); auto& sycl_member_data = sycl_data.template get(); - auto host_ptr = host_member_data.flat_data(); - auto sycl_ptr = sycl_member_data.flat_data(); - - static_assert(sizeof(std::decay_t) == sizeof(std::decay_t), "Unequal size"); - - q.submit([&](acpp::sycl::handler& h){ - h.copy(host_ptr,sycl_ptr, host_member_data.flat_size().get()); - }).wait(); + auto eov = sycl_copy_helper::copy_to_device(host_member_data,sycl_member_data,q); + if(eov.is_error()){ + return eov; + } return copy_to_device_member(host_data,sycl_data,q); } @@ -509,12 +531,10 @@ struct sycl_copy_helper, Encode> final { auto& host_member_data = host_data.template get(); auto& sycl_member_data = sycl_data.template get(); - auto host_ptr = host_member_data.flat_data(); - auto sycl_ptr = sycl_member_data.flat_data(); - - q.submit([&](acpp::sycl::handler& h){ - h.copy(sycl_ptr,host_ptr, host_member_data.flat_size().get()); - }).wait(); + auto eov = sycl_copy_helper::copy_to_host(sycl_member_data,host_member_data,q); + if(eov.is_error()){ + return eov; + } return copy_to_host_member(sycl_data,host_data,q); } @@ -535,17 +555,14 @@ struct sycl_copy_helper, Encode> final { template static saw::error_or copy_to_device_member(saw::data& host_data, saw::data>& sycl_data, sycl::queue& q){ if constexpr (i < sizeof...(Members)){ + using M = typename saw::parameter_pack_type::type; auto& host_member_data = host_data.template get(); auto& sycl_member_data = sycl_data.template get(); - auto host_ptr = host_member_data.flat_data(); - auto sycl_ptr = sycl_member_data.flat_data(); - - static_assert(sizeof(std::decay_t) == sizeof(std::decay_t), "Unequal size"); - - q.submit([&](acpp::sycl::handler& h){ - h.copy(host_ptr,sycl_ptr, host_member_data.flat_size().get()); - }).wait(); + auto eov = sycl_copy_helper::copy_to_device(host_member_data,sycl_member_data,q); + if(eov.is_error()){ + return eov; + } return copy_to_device_member(host_data,sycl_data,q); } @@ -560,15 +577,15 @@ struct sycl_copy_helper, Encode> final { template static saw::error_or copy_to_host_member(saw::data>& sycl_data, saw::data& host_data, sycl::queue& q){ if constexpr (i < sizeof...(Members)){ + using M = typename saw::parameter_pack_type::type; + auto& host_member_data = host_data.template get(); auto& sycl_member_data = sycl_data.template get(); - auto host_ptr = host_member_data.flat_data(); - auto sycl_ptr = sycl_member_data.flat_data(); - - q.submit([&](acpp::sycl::handler& h){ - h.copy(sycl_ptr,host_ptr, host_member_data.flat_size().get()); - }).wait(); + auto eov = sycl_copy_helper::copy_to_host(sycl_member_data,host_member_data,q); + if(eov.is_error()){ + return eov; + } return copy_to_host_member(sycl_data,host_data,q); } @@ -611,6 +628,35 @@ struct sycl_copy_helper, Encode> final { } }; +template +struct sycl_copy_helper, Encode> final { + using Schema = sch::Chunk; + + static saw::error_or copy_to_host(saw::data>& sycl_data, saw::data& host_data, sycl::queue& q){ + auto host_ptr = host_data.flat_data(); + auto sycl_ptr = sycl_data.flat_data(); + + static_assert(sizeof(std::decay_t) == sizeof(std::decay_t), "Unequal size"); + + q.submit([&](acpp::sycl::handler& h){ + h.copy(sycl_ptr,host_ptr, saw::ct_multiply::value); + }).wait(); + return saw::make_void(); + } + + static saw::error_or copy_to_device(saw::data& host_data, saw::data>& sycl_data, sycl::queue& q){ + auto host_ptr = host_data.flat_data(); + auto sycl_ptr = sycl_data.flat_data(); + + static_assert(sizeof(std::decay_t) == sizeof(std::decay_t), "Unequal size"); + + q.submit([&](acpp::sycl::handler& h){ + h.copy(host_ptr,sycl_ptr, saw::ct_multiply::value); + }).wait(); + return saw::make_void(); + } +}; + template struct sycl_copy_helper, Encode> final { using Schema = sch::Array; @@ -622,7 +668,7 @@ struct sycl_copy_helper, Encode> final { static_assert(sizeof(std::decay_t) == sizeof(std::decay_t), "Unequal size"); q.submit([&](acpp::sycl::handler& h){ - h.copy(sycl_ptr,host_ptr, host_data.flat_size()); + h.copy(sycl_ptr,host_ptr, host_data.flat_size().get()); }).wait(); return saw::make_void(); } @@ -634,7 +680,7 @@ struct sycl_copy_helper, Encode> final { static_assert(sizeof(std::decay_t) == sizeof(std::decay_t), "Unequal size"); q.submit([&](acpp::sycl::handler& h){ - h.copy(host_ptr,sycl_ptr, host_data.flat_size()); + h.copy(host_ptr,sycl_ptr, host_data.flat_size().get()); }).wait(); return saw::make_void(); } @@ -644,53 +690,99 @@ struct sycl_copy_helper, Encode> final { template struct make_view_helper; +template +struct make_view_helper, encode::Sycl> { +public: +static saw::error_or apply( + saw::data, encode::Sycl>& dat, + saw::data>, encode::Sycl>& dat_view +){ + dat_view = {dat}; + return saw::make_void(); +} +}; + +template +struct make_view_helper, encode::Sycl> { +public: + static saw::error_or apply( + saw::data, encode::Sycl>& dat, + saw::data>, encode::Sycl>& dat_view + ){ + dat_view = {dat}; + return saw::make_void(); + } +}; + +template +struct make_view_helper, encode::Sycl> { +public: + static saw::error_or apply( + saw::data, encode::Sycl>& dat, + saw::data>, encode::Sycl>& dat_view + ){ + dat_view = {dat}; + return saw::make_void(); + } +}; + template struct make_view_helper...>, encode::Sycl> { private: - template - static void apply_i( +template +static saw::error_or apply_i( saw::data...>, encode::Sycl>& dat, - saw::data...>>, encode::Sycl>& dat_view) - { + saw::data...>>, encode::Sycl>& dat_view + ){ if constexpr (i < sizeof...(Sch)){ using M = typename saw::parameter_pack_type...>::type; - - dat_view.template get() = {dat.template get().flat_data()}; - apply_i(dat,dat_view); - } - } -public: - static auto apply(saw::data...>, encode::Sycl>& dat){ - saw::data...>>, encode::Sycl> str_view; + auto eov = make_view_helper>::apply(dat.template get(),dat_view.template get()); + if(eov.is_error()){ + return eov; + } - apply_i<0u>(dat,str_view); + return apply_i(dat,dat_view); + } - return str_view; + return saw::make_void(); } +public: +static saw::error_or apply( + saw::data...>, encode::Sycl>& dat, + saw::data...>>, encode::Sycl>& dat_view +){ + return apply_i<0u>(dat,dat_view); +} }; template struct make_view_helper, encode::Sycl> { private: template - static void apply_i( + static saw::error_or apply_i( saw::data, encode::Sycl>& dat, - saw::data>, encode::Sycl>& dat_view) - { + saw::data>, encode::Sycl>& dat_view + ){ if constexpr (i < sizeof...(Sch)){ - dat_view.template get() = {dat.template get().flat_data()}; + using M = typename saw::parameter_pack_type::type; + + auto eov = make_view_helper>::apply(dat.template get(), dat_view.template get()); + if(eov.is_error()){ + return eov; + } - apply_i(dat,dat_view); + return apply_i(dat,dat_view); } + + return saw::make_void(); } public: - static auto apply(saw::data, encode::Sycl>& dat){ - saw::data>, encode::Sycl> str_view; - - apply_i<0u>(dat,str_view); - - return str_view; + static saw::error_or apply( + saw::data, encode::Sycl>& dat, + saw::data>, encode::Sycl> dat_view + ){ + return apply_i<0u>(dat,dat_view); } }; } @@ -698,7 +790,11 @@ public: // Ptr => Ptr>> => Ptr template auto make_view(saw::data& dat){ - return impl::make_view_helper::apply(dat); + saw::data,Encode> dat_view; + auto eov = impl::make_view_helper::apply(dat,dat_view); + (void) eov; + + return dat_view; } class device final { -- cgit v1.2.3 From c61ba8f8eb86f66915a54551fcc39dfbeab1fad9 Mon Sep 17 00:00:00 2001 From: "Claudius \"keldu\" Holeksa" Date: Tue, 14 Apr 2026 21:09:42 +0200 Subject: Working on meta schema instantiation, because I'm stupid --- default.nix | 2 +- examples/settling_cubes_2d_ibm_gpu/sim.cpp | 14 +++- lib/core/c++/abstract/data.hpp | 26 ++++++- lib/sycl/c++/data.hpp | 112 +++++++++++++++++++++++++++-- lib/sycl/tests/data.cpp | 27 +++++++ 5 files changed, 169 insertions(+), 12 deletions(-) diff --git a/default.nix b/default.nix index 2562f1d..0a110b8 100644 --- a/default.nix +++ b/default.nix @@ -71,7 +71,7 @@ let src = builtins.fetchurl { url = "https://git.keldu.de/forstio-forstio/snapshot/master.tar.gz"; - sha256 = "sha256:1bpglvf0h90dqdm25kp3h1rvk3f7fyasp43xsgki09n1yp55iw9w"; + sha256 = "sha256:0dm4ix3a53zx2ddyalzn33lwgqi6aacpcmlqrz28x11c96s30xyh"; }; phases = [ "unpackPhase" "installPhase" ]; diff --git a/examples/settling_cubes_2d_ibm_gpu/sim.cpp b/examples/settling_cubes_2d_ibm_gpu/sim.cpp index b4f204e..e1ba012 100644 --- a/examples/settling_cubes_2d_ibm_gpu/sim.cpp +++ b/examples/settling_cubes_2d_ibm_gpu/sim.cpp @@ -253,11 +253,14 @@ saw::error_or lbm_main(int argc, char** argv){ saw::data, encode::Sycl> lbm_sycl_data{sycl_q}; saw::data, encode::Sycl> lbm_sycl_macro_data{sycl_q}; saw::data, encode::Sycl> lbm_sycl_particle_data{sycl_q}; + { + auto eov = dev.malloc_on_device(*lbm_particle_data_ptr,lbm_sycl_particle_data); + if(eov.is_error()){ + return eov; + } + } sycl_q.wait(); - auto lsd_view = make_view(lbm_sycl_data); - auto lsdm_view = make_view(lbm_sycl_macro_data); - auto lsdp_view = make_view(lbm_sycl_particle_data); { auto eov = dev.copy_to_device(*lbm_data_ptr,lbm_sycl_data); if(eov.is_error()){ @@ -277,6 +280,11 @@ saw::error_or lbm_main(int argc, char** argv){ } } sycl_q.wait(); + + auto lsd_view = make_view(lbm_sycl_data); + auto lsdm_view = make_view(lbm_sycl_macro_data); + auto lsdp_view = make_view(lbm_sycl_particle_data); + saw::data time_steps{16u*4096ul}; auto& info_f = lsd_view.template get<"info">(); diff --git a/lib/core/c++/abstract/data.hpp b/lib/core/c++/abstract/data.hpp index e8f1757..0075718 100644 --- a/lib/core/c++/abstract/data.hpp +++ b/lib/core/c++/abstract/data.hpp @@ -4,28 +4,48 @@ namespace kel { namespace sch { +struct Void {}; + struct UnsignedInteger {}; struct SignedInteger {}; struct FloatingPoint {}; template struct MixedPrecision { + using Meta = Void; using StorageType = StorageT; using InterfaceType = InterfaceT; }; template struct Primitive { - using PrimitiveType = PrimType; + using Meta = Void; + using Type = PrimType; static constexpr uint64_t Bytes = N; }; template -struct Array { - using InnerType = T; +struct FixedArray { + using Meta = Void; + using Inner = T; static constexpr std::array Dimensions{Dims...}; }; +template +struct Array { + using Meta = FixedArray; + using Inner = T; + static constexpr std::array Dimensions{Dims}; +}; + +template +struct Tuple { +}; } + +template +struct schema { + using Type = Sch; +}; } diff --git a/lib/sycl/c++/data.hpp b/lib/sycl/c++/data.hpp index 4e5129b..3ac51e0 100644 --- a/lib/sycl/c++/data.hpp +++ b/lib/sycl/c++/data.hpp @@ -29,8 +29,17 @@ private: SAW_FORBID_COPY(data); SAW_FORBID_MOVE(data); - public: + data(const data::MetaSchema>& meta__, acpp::sycl::queue& q__): + q_{&q__}, + values_{nullptr} + { + (void) meta__; + SAW_ASSERT(q_); + values_ = acpp::sycl::malloc_device>(ct_multiply::value,*q_); + SAW_ASSERT(values_); + } + data(acpp::sycl::queue& q__): q_{&q__}, values_{nullptr} @@ -65,7 +74,6 @@ public: constexpr data* flat_data() const { return values_; } - }; template @@ -122,6 +130,20 @@ private: SAW_FORBID_COPY(data); SAW_FORBID_MOVE(data); public: + data(const data::MetaSchema>& meta__, acpp::sycl::queue& q__): + q_{&q__}, + values_{nullptr} + { + SAW_ASSERT(q_); + /// TODO use meta + data m{1u}; + for(uint64_t i = 0u; i < Dims; ++i){ + m = m * meta__.at({i}); + } + values_ = acpp::sycl::malloc_device>(m.get(),*q_); + SAW_ASSERT(values_); + } + data(acpp::sycl::queue& q__): values_{nullptr}, meta_{}, @@ -198,8 +220,7 @@ public: data(const data, kel::lbm::encode::Sycl>& values__): values_{values__.flat_data()}, meta_{values__.meta()} - { - } + {} constexpr data, Encode> meta() const { return meta_; @@ -238,6 +259,10 @@ private: data> values_; public: + data(const data::MetaSchema>& meta__, acpp::sycl::queue& q__): + values_{meta__,q__} + {} + data(acpp::sycl::queue& q__): values_{q__} {} @@ -366,6 +391,11 @@ private: members_{(static_cast(Is), q)...} {} public: + data(data::MetaSchema>& meta__, acpp::sycl::queue& q__): + data{q__, std::make_index_sequence{}} + { + } + data(acpp::sycl::queue& q__): data{q__, std::make_index_sequence{}} { @@ -546,6 +576,29 @@ struct sycl_copy_helper, Encode> final { static saw::error_or copy_to_host(saw::data>& sycl_data, saw::data& host_data, sycl::queue& q){ return copy_to_host_member<0u>(sycl_data, host_data, q); } + + template + static saw::error_or malloc_on_device_member(saw::data& host_data, saw::data>& sycl_data, sycl::queue& q){ + + if constexpr (i < sizeof...(Members)){ + using M = typename saw::parameter_pack_type::type; + auto& host_member_data = host_data.template get(); + auto& sycl_member_data = sycl_data.template get(); + + auto eov = sycl_copy_helper::malloc_on_device(host_member_data,sycl_member_data,q); + if(eov.is_error()){ + return eov; + } + + return malloc_on_device_member(host_data,sycl_data,q); + } + + return saw::make_void(); + } + + static saw::error_or malloc_on_device(saw::data& host_data, saw::data>& sycl_data, sycl::queue& q){ + return malloc_on_device_member<0u>(host_data,sycl_data,q); + } }; template @@ -594,9 +647,36 @@ struct sycl_copy_helper, Encode> final { } - static saw::error_or copy_to_host(saw::data>& sycl_data, saw::data& host_data, sycl::queue& q){ + static saw::error_or copy_to_host( + saw::data& host_data, + saw::data>& sycl_data, + sycl::queue& q + ){ return copy_to_host_member<0u>(sycl_data, host_data, q); } + + template + static saw::error_or malloc_on_device_member(saw::data& host_data, saw::data>& sycl_data, sycl::queue& q){ + + if constexpr (i < sizeof...(Members)){ + using M = typename saw::parameter_pack_type::type; + auto& host_member_data = host_data.template get(); + auto& sycl_member_data = sycl_data.template get(); + + auto eov = sycl_copy_helper::malloc_on_device(host_member_data,sycl_member_data,q); + if(eov.is_error()){ + return eov; + } + + return malloc_on_device_member(host_data,sycl_data,q); + } + + return saw::make_void(); + } + + static saw::error_or malloc_on_device(saw::data& host_data, saw::data>& sycl_data, sycl::queue& q){ + return malloc_on_device_member<0u>(host_data,sycl_data,q); + } }; template @@ -626,6 +706,13 @@ struct sycl_copy_helper, Encode> final { }).wait(); return saw::make_void(); } + + static saw::error_or malloc_on_device(saw::data& host_data, saw::data>& sycl_data, sycl::queue& q){ + (void) host_data; + (void) sycl_data; + (void) q; + return saw::make_void(); + } }; template @@ -684,6 +771,11 @@ struct sycl_copy_helper, Encode> final { }).wait(); return saw::make_void(); } + + static saw::error_or malloc_on_device(saw::data& host_data, saw::data>& sycl_data, sycl::queue& q){ + sycl_data = {host_data.meta(),q}; + return saw::make_void(); + } }; @@ -817,6 +909,16 @@ public: return impl::sycl_copy_helper::copy_to_host(sycl_data, host_data, q_); } + template + saw::error_or malloc_on_device( + saw::data& host_data, + saw::data>& sycl_data + ){ + auto eov = impl::sycl_copy_helper::malloc_on_device(host_data, sycl_data, q_); + q_.wait(); + return eov; + } + auto& get_handle(){ return q_; } diff --git a/lib/sycl/tests/data.cpp b/lib/sycl/tests/data.cpp index 3073a22..6b17622 100644 --- a/lib/sycl/tests/data.cpp +++ b/lib/sycl/tests/data.cpp @@ -2,6 +2,24 @@ #include "../c++/lbm.hpp" +namespace { + +namespace sch { +using namespace kel::lbm::sch; +using TestObjSchema = Tuple< + Member, "foo">, + Member, "bar">, + Member< + Array< + Struct< + Member,"pos"> + > + >, + "baz" + > +>; +} + SAW_TEST("Sycl Data Compilation"){ acpp::sycl::queue q; saw::data< @@ -19,3 +37,12 @@ SAW_TEST("Sycl Data Compilation"){ // test_f.at({}).set(1); // SAW_EXPECT(test_f.at({}).get() == 1, "Value check failed"); } + +SAW_TEST("Sycl Data Compilation for Particle Similacrum"){ + acpp::sycl::queue q; + + saw::data< + sch::TestObjSchema + > a; +} +} -- cgit v1.2.3 From 30ff1caf073b4341fd0614e0974c67a8588c8931 Mon Sep 17 00:00:00 2001 From: "Claudius \"keldu\" Holeksa" Date: Wed, 15 Apr 2026 19:11:21 +0200 Subject: Feierabend --- .../poiseulle_particles_channel_2d.cpp | 3 + examples/settling_cubes_2d_ibm_gpu/sim.cpp | 158 +++++++++++++++++---- lib/sycl/c++/data.hpp | 56 +++++--- lib/sycl/tests/data.cpp | 8 ++ 4 files changed, 177 insertions(+), 48 deletions(-) diff --git a/examples/poiseulle_particles_channel_2d/poiseulle_particles_channel_2d.cpp b/examples/poiseulle_particles_channel_2d/poiseulle_particles_channel_2d.cpp index 7ac663f..1e6e75f 100644 --- a/examples/poiseulle_particles_channel_2d/poiseulle_particles_channel_2d.cpp +++ b/examples/poiseulle_particles_channel_2d/poiseulle_particles_channel_2d.cpp @@ -461,6 +461,8 @@ void couple_particles_to_lattice( p_acc = p_acc + p_pos_rel_vec; } + + /* So I want to include the relative velocity from both particles, but this is a bit hard considering I just assumed 0 velocity from the other party else if(n_macro_cell_particle.get() != (i.get() + 1u) and n_macro_cell_particle.get() > 0u){ // Generally compare @@ -478,6 +480,7 @@ void couple_particles_to_lattice( auto& n_info = n_cell.template get<"info">()({0u}); auto& n_macro_cell = macros.at(n_p_cell_pos); auto& n_macro_cell_particle = n_macro_cell.template get<"particle">(); + l // If neighbour is wall, then add force pushing the particle away if(n_info.get() <= 1u or (n_macro_cell_particle.get() != (i.get()+1u) and n_macro_cell_particle.get() > 0u) ) { diff --git a/examples/settling_cubes_2d_ibm_gpu/sim.cpp b/examples/settling_cubes_2d_ibm_gpu/sim.cpp index e1ba012..de40c54 100644 --- a/examples/settling_cubes_2d_ibm_gpu/sim.cpp +++ b/examples/settling_cubes_2d_ibm_gpu/sim.cpp @@ -6,6 +6,7 @@ #include #include #include +#include namespace kel { namespace lbm { @@ -49,19 +50,26 @@ using MacroStruct = Struct< Member, "force"> >; +template +using ParticleSpheroidGroup = ParticleGroup< + T, + Desc::D, + sch::ParticleCollisionSpheroid +>; + template using ParticleGroups = Tuple< - ParticleGroup< - T,Desc::D,sch::ParticleCollisionSpheroid - > + ParticleSpheroidGroup >; + + } template saw::error_or setup_initial_conditions( saw::data>& fields, saw::data>& macros, - saw::data>& particles + saw::data>& particles ){ auto& info_f = fields.template get<"info">(); // Set everything as walls @@ -105,11 +113,39 @@ saw::error_or setup_initial_conditions( { saw::data> dense_p; dense_p.at({}).set(1); - auto& spheroid_group = particles.template get<0u>(); + // auto& spheroid_group = particles.template get<0u>(); + auto& spheroid_group = particles; + spheroid_group = create_spheroid_particle_group( dense_p, {64u} ); + + { + auto& p = spheroid_group.template get<"particles">(); + + p = {{{16u}}}; + + iterator<1u>::apply( + [&](auto& index){ + // Set Pos here? + auto& p_ind = p.at(index); + + auto& p_rb = p_ind.template get<"rigid_body">(); + auto& p_pos = p_rb.template get<"position">(); + + // TODO CONTINUE HERE NEED to init pos here !!!! + + auto& p_pos_old = p_rb.template get<"position_old">(); + p_pos_old = p_pos; + }, + {}, + p.meta() + ); + } + } + // Particle in hacky flavour + { } return saw::make_void(); @@ -119,20 +155,98 @@ template saw::error_or step( saw::data>,encode::Sycl>& fields, saw::data>,encode::Sycl>& macros, - saw::data>,encode::Sycl>& particles, + saw::data>,encode::Sycl>& p_group, saw::data t_i, device& dev ){ auto& q = dev.get_handle(); auto& info_f = fields.template get<"info">(); - { - } + auto& parts = p_group.template get<"particles">(); + auto& p_mask = p_group.template get<"mask">(); + auto& vels = macros.template get<"velocity">(); + auto& forces = macros.template get<"force">(); + + auto p_meta = parts.meta(); + q.submit([&](acpp::sycl::handler& h){ + + h.parallel_for(acpp::sycl::range{dim_x,dim_y}, [=](acpp::sycl::id idx){ + saw::data> index; + for(uint64_t i = 0u; i < Desc::D; ++i){ + index.at({{i}}).set(idx[i]); + } + + // Reset the force to zero + forces.at(index) = {}; + }); + }).wait(); + + q.submit([&](acpp::sycl::handler& h){ + h.parallel_for(acpp::sycl::range<1u>{p_meta.at({0u}).get()}, [=](acpp::sycl::id<1u> idx){ + + saw::data> index; + for(uint64_t i = 0u; i < 1u; ++i){ + index.at({{i}}).set(idx[i]); + } + + auto& p = parts.at(index); + auto& p_rb = p.template get<"rigid_body">(); + saw::data> delta_t; + delta_t.at({}).set(1.0f); + + auto& p_pos = p_rb.template get<"position">(); + auto& p_rot = p_rb.template get<"rotation">(); + + iterator::apply( + [&](auto& m_ind){ + saw::data> index_shift; + for(uint64_t i{0u}; i < Desc::D; ++i){ + index_shift.at({{i}}) = m_ind.at({i}).template cast_to() - (p_mask.meta().at({i})+1u).template cast_to() * 0.5; + } + + saw::data> transformed_pos; + for(uint64_t i{0u}; i < Desc::D; ++i){ + // TODO add rotation, scaling here. + transformed_pos.at({{i}}) = index_shift.at({{i}}); + } + + // Lagrange indicator position + auto p_pos_lag = p_pos + transformed_pos; + + // Pick the closest velocity + saw::data> p_cell_pos; + saw::data> p_cell_pos_vec; + for(uint64_t i{0u}; i < Desc::D; ++i){ + p_cell_pos.at({{i}}) = (p_pos_lag.at({{i}}) + 0.5).template cast_to(); + p_cell_pos.at({{i}}).set(std::max(1ul,std::min(p_cell_pos.at({{i}}).get(), p_meta.at({{i}}).get() - 2ul))); + p_cell_pos_vec.at({{i}}) = p_cell_pos.at({{i}}); + } + + auto& u_fluid = vels.at(p_cell_pos); + + // this is our relative position to the particle + auto rel_cell_to_part_pos = p_cell_pos_vec.template cast_to() - p_pos; + + auto p_vel = (p_pos - p_rb.template get<"position_old">()) * delta_t; + auto u_solid = p_vel + saw::math::cross(p_rot,rel_cell_to_part_pos); + + + // Force + auto force = (u_solid - u_fluid) / delta_t; + + // TODO HERE ATOMIC! !!!! + forces.at(p_cell_pos) = forces.at(p_cell_pos) + force; + }, + {}, + p_mask.meta() + ); + + verlet_step_lambda(p,delta_t); + }); + }).wait(); // auto coll_ev = q.submit([&](acpp::sycl::handler& h){ - saw::data> force; - force.at({{1}}).set(-1.0); // Need nicer things to handle the flow. I see improvement here component> collision{0.8}; component> bb; @@ -157,15 +271,10 @@ saw::error_or step( default: break; } - }); - - }).wait(); - - q.submit([&](acpp::sycl::handler& h){ - h.parallel_for(acpp::sycl::range<1u>{dim_x}, [=](acpp::sycl::id<1u> idx){ }); }).wait(); + // Step /* q.submit([&](acpp::sycl::handler& h){ @@ -212,9 +321,11 @@ saw::error_or lbm_main(int argc, char** argv){ // saw::data> meta{{dim_x,dim_y}}; auto lbm_data_ptr = saw::heap>>(); auto lbm_macro_data_ptr = saw::heap>>(); - auto lbm_particle_data_ptr = saw::heap>>(); + auto lbm_particle_data_ptr = saw::heap>>(); + // auto lbm_particles_ptr = saw::heap,part_count>>>(); + // saw::data> p_mask; - std::cout<<"Estimated Bytes: "<,sch::MacroStruct>().get()<,sch::MacroStruct>().get()< lbm_main(int argc, char** argv){ saw::data, encode::Sycl> lbm_sycl_data{sycl_q}; saw::data, encode::Sycl> lbm_sycl_macro_data{sycl_q}; - saw::data, encode::Sycl> lbm_sycl_particle_data{sycl_q}; - { - auto eov = dev.malloc_on_device(*lbm_particle_data_ptr,lbm_sycl_particle_data); - if(eov.is_error()){ - return eov; - } - } - sycl_q.wait(); + saw::data, encode::Sycl> lbm_sycl_particle_data{sycl_q}; { auto eov = dev.copy_to_device(*lbm_data_ptr,lbm_sycl_data); @@ -285,7 +389,7 @@ saw::error_or lbm_main(int argc, char** argv){ auto lsdm_view = make_view(lbm_sycl_macro_data); auto lsdp_view = make_view(lbm_sycl_particle_data); - saw::data time_steps{16u*4096ul}; + saw::data time_steps{4u*4096ul}; auto& info_f = lsd_view.template get<"info">(); diff --git a/lib/sycl/c++/data.hpp b/lib/sycl/c++/data.hpp index 3ac51e0..0206833 100644 --- a/lib/sycl/c++/data.hpp +++ b/lib/sycl/c++/data.hpp @@ -131,16 +131,12 @@ private: SAW_FORBID_MOVE(data); public: data(const data::MetaSchema>& meta__, acpp::sycl::queue& q__): - q_{&q__}, - values_{nullptr} + values_{nullptr}, + meta_{meta__}, + q_{&q__} { SAW_ASSERT(q_); - /// TODO use meta - data m{1u}; - for(uint64_t i = 0u; i < Dims; ++i){ - m = m * meta__.at({i}); - } - values_ = acpp::sycl::malloc_device>(m.get(),*q_); + values_ = acpp::sycl::malloc_device>(flat_size().get(),*q_); SAW_ASSERT(values_); } @@ -152,16 +148,6 @@ public: SAW_ASSERT(q_); } - data(const data, Encode>& meta__,acpp::sycl::queue& q__): - values_{nullptr}, - meta_{meta__}, - q_{&q__} - { - SAW_ASSERT(q_); - values_ = acpp::sycl::malloc_device>(flat_size().get(),*q_); - SAW_ASSERT(values_); - } - ~data(){ if(not values_){ return; @@ -184,6 +170,19 @@ public: return values_[kel::lbm::flatten_index::apply(index,meta()).get()]; } + constexpr error_or reset_to(const data::MetaSchema>& meta_arg){ + SAW_ASSERT(q_); + meta_ = meta_arg; + + if(values_){ + acpp::sycl::free(values_,*q_); + } + values_ = acpp::sycl::malloc_device>(flat_size().get(),*q_); + SAW_ASSERT(q_); + + return make_void(); + } + constexpr data* flat_data() const { return values_; } @@ -391,10 +390,11 @@ private: members_{(static_cast(Is), q)...} {} public: + /* data(data::MetaSchema>& meta__, acpp::sycl::queue& q__): data{q__, std::make_index_sequence{}} - { - } + {} + */ data(acpp::sycl::queue& q__): data{q__, std::make_index_sequence{}} @@ -754,6 +754,7 @@ struct sycl_copy_helper, Encode> final { static_assert(sizeof(std::decay_t) == sizeof(std::decay_t), "Unequal size"); + SAW_ASSERT(host_data.flat_size() == sycl_data.flat_size()); q.submit([&](acpp::sycl::handler& h){ h.copy(sycl_ptr,host_ptr, host_data.flat_size().get()); }).wait(); @@ -761,14 +762,27 @@ struct sycl_copy_helper, Encode> final { } static saw::error_or copy_to_device(saw::data& host_data, saw::data>& sycl_data, sycl::queue& q){ + + { + auto hm = host_data.meta(); + auto sm = sycl_data.meta(); + bool equ{true}; + for(uint64_t i{0u}; i < Dims; ++i){ + equ &= (hm.at({i}).get() == sm.at({i}).get()); + } + if(not equ){ + sycl_data.reset_to(hm); + } + } + auto host_ptr = host_data.flat_data(); auto sycl_ptr = sycl_data.flat_data(); - static_assert(sizeof(std::decay_t) == sizeof(std::decay_t), "Unequal size"); q.submit([&](acpp::sycl::handler& h){ h.copy(host_ptr,sycl_ptr, host_data.flat_size().get()); }).wait(); + return saw::make_void(); } diff --git a/lib/sycl/tests/data.cpp b/lib/sycl/tests/data.cpp index 6b17622..4321a0d 100644 --- a/lib/sycl/tests/data.cpp +++ b/lib/sycl/tests/data.cpp @@ -38,6 +38,13 @@ SAW_TEST("Sycl Data Compilation"){ // SAW_EXPECT(test_f.at({}).get() == 1, "Value check failed"); } +SAW_TEST("Sycl Data Array of Struct"){ + acpp::sycl::queue q; + + saw::data, kel::lbm::encode::Sycl> a{{{2u}},q}; +} + +/* SAW_TEST("Sycl Data Compilation for Particle Similacrum"){ acpp::sycl::queue q; @@ -45,4 +52,5 @@ SAW_TEST("Sycl Data Compilation for Particle Similacrum"){ sch::TestObjSchema > a; } +*/ } -- cgit v1.2.3 From e19b4c91cfc7c370851c73314f54ef3e479b45bc Mon Sep 17 00:00:00 2001 From: "Claudius \"keldu\" Holeksa" Date: Thu, 16 Apr 2026 09:05:08 +0200 Subject: Address boundary error --- default.nix | 2 +- examples/settling_cubes_2d_ibm_gpu/sim.cpp | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/default.nix b/default.nix index 0a110b8..6940267 100644 --- a/default.nix +++ b/default.nix @@ -71,7 +71,7 @@ let src = builtins.fetchurl { url = "https://git.keldu.de/forstio-forstio/snapshot/master.tar.gz"; - sha256 = "sha256:0dm4ix3a53zx2ddyalzn33lwgqi6aacpcmlqrz28x11c96s30xyh"; + sha256 = "sha256:17zsz10lj5dgqw3fmassgqlhbwgpd7zznbq8cl0sw1v816w3ca8z"; }; phases = [ "unpackPhase" "installPhase" ]; diff --git a/examples/settling_cubes_2d_ibm_gpu/sim.cpp b/examples/settling_cubes_2d_ibm_gpu/sim.cpp index de40c54..80ba90c 100644 --- a/examples/settling_cubes_2d_ibm_gpu/sim.cpp +++ b/examples/settling_cubes_2d_ibm_gpu/sim.cpp @@ -389,7 +389,7 @@ saw::error_or lbm_main(int argc, char** argv){ auto lsdm_view = make_view(lbm_sycl_macro_data); auto lsdp_view = make_view(lbm_sycl_particle_data); - saw::data time_steps{4u*4096ul}; + saw::data time_steps{16u*4096ul}; auto& info_f = lsd_view.template get<"info">(); @@ -402,8 +402,7 @@ saw::error_or lbm_main(int argc, char** argv){ } } sycl_q.wait(); - /* - { + if(i.get()%32u ==0u){ { auto eov = dev.copy_to_host(lbm_sycl_macro_data,*lbm_macro_data_ptr); if(eov.is_error()){ @@ -417,7 +416,6 @@ saw::error_or lbm_main(int argc, char** argv){ } } } - */ /* { { -- cgit v1.2.3 From d01f8faadcec62593e0af5304bcc2db3d22ed0c5 Mon Sep 17 00:00:00 2001 From: "Claudius \"keldu\" Holeksa" Date: Thu, 16 Apr 2026 16:20:23 +0200 Subject: Brain fried --- examples/settling_cubes_2d_ibm_gpu/sim.cpp | 21 ++++++++++++++------- lib/core/c++/particle.hpp | 5 +++++ lib/sycl/c++/data.hpp | 16 ++++++++++++++++ 3 files changed, 35 insertions(+), 7 deletions(-) diff --git a/examples/settling_cubes_2d_ibm_gpu/sim.cpp b/examples/settling_cubes_2d_ibm_gpu/sim.cpp index 80ba90c..dceb156 100644 --- a/examples/settling_cubes_2d_ibm_gpu/sim.cpp +++ b/examples/settling_cubes_2d_ibm_gpu/sim.cpp @@ -201,7 +201,7 @@ saw::error_or step( [&](auto& m_ind){ saw::data> index_shift; for(uint64_t i{0u}; i < Desc::D; ++i){ - index_shift.at({{i}}) = m_ind.at({i}).template cast_to() - (p_mask.meta().at({i})+1u).template cast_to() * 0.5; + index_shift.at({{i}}) = m_ind.at({i}).template cast_to() - (vels.meta().at({i})+1u).template cast_to() * 0.5; } saw::data> transformed_pos; @@ -213,12 +213,12 @@ saw::error_or step( // Lagrange indicator position auto p_pos_lag = p_pos + transformed_pos; - // Pick the closest velocity + // Pick the closest velocity and clamp it saw::data> p_cell_pos; saw::data> p_cell_pos_vec; for(uint64_t i{0u}; i < Desc::D; ++i){ p_cell_pos.at({{i}}) = (p_pos_lag.at({{i}}) + 0.5).template cast_to(); - p_cell_pos.at({{i}}).set(std::max(1ul,std::min(p_cell_pos.at({{i}}).get(), p_meta.at({{i}}).get() - 2ul))); + p_cell_pos.at({{i}}).set(std::max(0ul,std::min(p_cell_pos.at({{i}}).get(), vels.meta().at({{i}}).get() - 2ul))); p_cell_pos_vec.at({{i}}) = p_cell_pos.at({{i}}); } @@ -228,14 +228,16 @@ saw::error_or step( auto rel_cell_to_part_pos = p_cell_pos_vec.template cast_to() - p_pos; auto p_vel = (p_pos - p_rb.template get<"position_old">()) * delta_t; - auto u_solid = p_vel + saw::math::cross(p_rot,rel_cell_to_part_pos); + auto u_solid = p_vel + saw::math::cross(p_rot,rel_cell_to_part_pos); // Force auto force = (u_solid - u_fluid) / delta_t; // TODO HERE ATOMIC! !!!! forces.at(p_cell_pos) = forces.at(p_cell_pos) + force; + + // TODO APPLY FORCE TO PARTICLE }, {}, p_mask.meta() @@ -248,7 +250,7 @@ saw::error_or step( // auto coll_ev = q.submit([&](acpp::sycl::handler& h){ // Need nicer things to handle the flow. I see improvement here - component> collision{0.8}; + component> collision{0.8}; component> bb; h.parallel_for(acpp::sycl::range{dim_x,dim_y}, [=](acpp::sycl::id idx){ @@ -271,7 +273,6 @@ saw::error_or step( default: break; } - }); }).wait(); @@ -389,6 +390,12 @@ saw::error_or lbm_main(int argc, char** argv){ auto lsdm_view = make_view(lbm_sycl_macro_data); auto lsdp_view = make_view(lbm_sycl_particle_data); + { + auto eov = write_vtk_file(out_dir,"ms",0u, *lbm_macro_data_ptr); + if(eov.is_error()){ + return eov; + } + } saw::data time_steps{16u*4096ul}; auto& info_f = lsd_view.template get<"info">(); @@ -402,7 +409,7 @@ saw::error_or lbm_main(int argc, char** argv){ } } sycl_q.wait(); - if(i.get()%32u ==0u){ + if(i.get()%1u ==0u){ { auto eov = dev.copy_to_host(lbm_sycl_macro_data,*lbm_macro_data_ptr); if(eov.is_error()){ diff --git a/lib/core/c++/particle.hpp b/lib/core/c++/particle.hpp index b098ecc..691a74b 100644 --- a/lib/core/c++/particle.hpp +++ b/lib/core/c++/particle.hpp @@ -11,7 +11,12 @@ struct Particle {}; template class component { +private: + saw::data> dt_; public: + component(saw::data> dt__): + dt_{dt__} + {} template void apply(const saw::data& particles, const saw::data& macros, saw::data index, saw::data time_step) const { diff --git a/lib/sycl/c++/data.hpp b/lib/sycl/c++/data.hpp index 0206833..71627de 100644 --- a/lib/sycl/c++/data.hpp +++ b/lib/sycl/c++/data.hpp @@ -278,6 +278,10 @@ public: return data>::get_dims(); } + static constexpr auto ghost_meta() { + return data>::meta(); + } + data& at(const data>& index){ std::decay_t ind; for(uint64_t i = 0u; i < sizeof...(Sides); ++i){ @@ -298,6 +302,10 @@ public: return data,Encode>{{Sides...}}; } + static constexpr auto meta(){ + return data,Encode>{{Sides...}}; + } + auto flat_data() const { return values_.flat_data(); } @@ -345,6 +353,10 @@ public: return data>::get_dims(); } + static constexpr auto ghost_meta() { + return data>::meta(); + } + data& at(const data>& index){ std::decay_t ind; for(uint64_t i = 0u; i < sizeof...(Sides); ++i){ @@ -361,6 +373,10 @@ public: return values_.at(ind); } + static constexpr auto meta(){ + return data,Encode>{{Sides...}}; + } + static constexpr auto get_dims(){ return data,Encode>{{Sides...}}; } -- cgit v1.2.3 From 6b355333b994aee50c1a31e2911ee96ae84104ad Mon Sep 17 00:00:00 2001 From: "Claudius \"keldu\" Holeksa" Date: Fri, 17 Apr 2026 10:14:36 +0200 Subject: Renamed function --- examples/poiseulle_3d_gpu/sim.cpp | 4 ++-- examples/poiseulle_particles_2d_bgk_gpu/sim.cpp | 4 ++-- examples/poiseulle_particles_2d_hlbm_gpu/sim.cpp | 4 ++-- examples/poiseulle_particles_2d_psm_gpu/sim.cpp | 4 ++-- examples/settling_cubes_2d_ibm_gpu/sim.cpp | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/examples/poiseulle_3d_gpu/sim.cpp b/examples/poiseulle_3d_gpu/sim.cpp index 93df1f8..6916ef2 100644 --- a/examples/poiseulle_3d_gpu/sim.cpp +++ b/examples/poiseulle_3d_gpu/sim.cpp @@ -315,8 +315,8 @@ saw::error_or lbm_main(int argc, char** argv){ saw::data, particle_size>, encode::Sycl> lbm_sycl_particle_data{sycl_q}; sycl_q.wait(); - auto lsd_view = make_chunk_struct_view(lbm_sycl_data); - auto lsdm_view = make_chunk_struct_view(lbm_sycl_macro_data); + auto lsd_view = make_view(lbm_sycl_data); + auto lsdm_view = make_view(lbm_sycl_macro_data); { auto eov = dev.copy_to_device(*lbm_data_ptr,lbm_sycl_data); if(eov.is_error()){ diff --git a/examples/poiseulle_particles_2d_bgk_gpu/sim.cpp b/examples/poiseulle_particles_2d_bgk_gpu/sim.cpp index 7d1a624..e2052bb 100644 --- a/examples/poiseulle_particles_2d_bgk_gpu/sim.cpp +++ b/examples/poiseulle_particles_2d_bgk_gpu/sim.cpp @@ -314,8 +314,8 @@ saw::error_or lbm_main(int argc, char** argv){ saw::data, encode::Sycl> lbm_sycl_macro_data{sycl_q}; sycl_q.wait(); - auto lsd_view = make_chunk_struct_view(lbm_sycl_data); - auto lsdm_view = make_chunk_struct_view(lbm_sycl_macro_data); + auto lsd_view = make_view(lbm_sycl_data); + auto lsdm_view = make_view(lbm_sycl_macro_data); { auto eov = dev.copy_to_device(*lbm_data_ptr,lbm_sycl_data); if(eov.is_error()){ diff --git a/examples/poiseulle_particles_2d_hlbm_gpu/sim.cpp b/examples/poiseulle_particles_2d_hlbm_gpu/sim.cpp index 9eeda3c..5c97686 100644 --- a/examples/poiseulle_particles_2d_hlbm_gpu/sim.cpp +++ b/examples/poiseulle_particles_2d_hlbm_gpu/sim.cpp @@ -320,8 +320,8 @@ saw::error_or lbm_main(int argc, char** argv){ saw::data, encode::Sycl> lbm_sycl_macro_data{sycl_q}; sycl_q.wait(); - auto lsd_view = make_chunk_struct_view(lbm_sycl_data); - auto lsdm_view = make_chunk_struct_view(lbm_sycl_macro_data); + auto lsd_view = make_view(lbm_sycl_data); + auto lsdm_view = make_view(lbm_sycl_macro_data); { auto eov = dev.copy_to_device(*lbm_data_ptr,lbm_sycl_data); if(eov.is_error()){ diff --git a/examples/poiseulle_particles_2d_psm_gpu/sim.cpp b/examples/poiseulle_particles_2d_psm_gpu/sim.cpp index 7e76da6..20640c8 100644 --- a/examples/poiseulle_particles_2d_psm_gpu/sim.cpp +++ b/examples/poiseulle_particles_2d_psm_gpu/sim.cpp @@ -340,8 +340,8 @@ saw::error_or lbm_main(int argc, char** argv){ saw::data, encode::Sycl> lbm_sycl_macro_data{sycl_q}; sycl_q.wait(); - auto lsd_view = make_chunk_struct_view(lbm_sycl_data); - auto lsdm_view = make_chunk_struct_view(lbm_sycl_macro_data); + auto lsd_view = make_view(lbm_sycl_data); + auto lsdm_view = make_view(lbm_sycl_macro_data); { auto eov = dev.copy_to_device(*lbm_data_ptr,lbm_sycl_data); if(eov.is_error()){ diff --git a/examples/settling_cubes_2d_ibm_gpu/sim.cpp b/examples/settling_cubes_2d_ibm_gpu/sim.cpp index dceb156..d7b402a 100644 --- a/examples/settling_cubes_2d_ibm_gpu/sim.cpp +++ b/examples/settling_cubes_2d_ibm_gpu/sim.cpp @@ -268,7 +268,7 @@ saw::error_or step( bb.apply(fields,index,t_i); break; case 2u: - collision.apply(fields,macros,index,t_i); + // collision.apply(fields,macros,index,t_i); break; default: break; -- cgit v1.2.3 From e8193a747cac9cc71a6dec84e1f822214131388a Mon Sep 17 00:00:00 2001 From: "Claudius \"keldu\" Holeksa" Date: Tue, 21 Apr 2026 15:20:27 +0200 Subject: Debugging where copy errors occurs --- examples/poiseulle_particles_2d_bgk_gpu/sim.cpp | 14 +++++--------- examples/poiseulle_particles_2d_hlbm_gpu/sim.cpp | 4 ++-- examples/poiseulle_particles_2d_psm_gpu/sim.cpp | 7 +++++-- 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/examples/poiseulle_particles_2d_bgk_gpu/sim.cpp b/examples/poiseulle_particles_2d_bgk_gpu/sim.cpp index e2052bb..69e94c3 100644 --- a/examples/poiseulle_particles_2d_bgk_gpu/sim.cpp +++ b/examples/poiseulle_particles_2d_bgk_gpu/sim.cpp @@ -205,19 +205,17 @@ saw::error_or step( case 0u: break; case 1u: - abb.apply(fields,index,t_i); + bb.apply(fields,index,t_i); break; case 2u: collision.apply(fields,macros,index,t_i); break; case 3u: - //flow_in.apply(fields,index,t_i); - equi.apply(fields,index,t_i); + flow_in.apply(fields,index,t_i); collision.apply(fields,macros,index,t_i); break; case 4u: - // flow_out.apply(fields,index,t_i); - equi.apply(fields,index,t_i); + flow_out.apply(fields,index,t_i); collision.apply(fields,macros,index,t_i); break; default: @@ -314,8 +312,6 @@ saw::error_or lbm_main(int argc, char** argv){ saw::data, encode::Sycl> lbm_sycl_macro_data{sycl_q}; sycl_q.wait(); - auto lsd_view = make_view(lbm_sycl_data); - auto lsdm_view = make_view(lbm_sycl_macro_data); { auto eov = dev.copy_to_device(*lbm_data_ptr,lbm_sycl_data); if(eov.is_error()){ @@ -329,6 +325,8 @@ saw::error_or lbm_main(int argc, char** argv){ } } sycl_q.wait(); + auto lsd_view = make_view(lbm_sycl_data); + auto lsdm_view = make_view(lbm_sycl_macro_data); saw::data time_steps{16u*4096ul}; auto& info_f = lsd_view.template get<"info">(); @@ -342,7 +340,6 @@ saw::error_or lbm_main(int argc, char** argv){ } } sycl_q.wait(); - /* { { auto eov = dev.copy_to_host(lbm_sycl_macro_data,*lbm_macro_data_ptr); @@ -357,7 +354,6 @@ saw::error_or lbm_main(int argc, char** argv){ } } } - */ /* { { diff --git a/examples/poiseulle_particles_2d_hlbm_gpu/sim.cpp b/examples/poiseulle_particles_2d_hlbm_gpu/sim.cpp index 5c97686..df57bbc 100644 --- a/examples/poiseulle_particles_2d_hlbm_gpu/sim.cpp +++ b/examples/poiseulle_particles_2d_hlbm_gpu/sim.cpp @@ -320,8 +320,6 @@ saw::error_or lbm_main(int argc, char** argv){ saw::data, encode::Sycl> lbm_sycl_macro_data{sycl_q}; sycl_q.wait(); - auto lsd_view = make_view(lbm_sycl_data); - auto lsdm_view = make_view(lbm_sycl_macro_data); { auto eov = dev.copy_to_device(*lbm_data_ptr,lbm_sycl_data); if(eov.is_error()){ @@ -335,6 +333,8 @@ saw::error_or lbm_main(int argc, char** argv){ } } sycl_q.wait(); + auto lsd_view = make_view(lbm_sycl_data); + auto lsdm_view = make_view(lbm_sycl_macro_data); saw::data time_steps{16u*4096ul}; auto& info_f = lsd_view.template get<"info">(); diff --git a/examples/poiseulle_particles_2d_psm_gpu/sim.cpp b/examples/poiseulle_particles_2d_psm_gpu/sim.cpp index 20640c8..7768ca4 100644 --- a/examples/poiseulle_particles_2d_psm_gpu/sim.cpp +++ b/examples/poiseulle_particles_2d_psm_gpu/sim.cpp @@ -340,8 +340,6 @@ saw::error_or lbm_main(int argc, char** argv){ saw::data, encode::Sycl> lbm_sycl_macro_data{sycl_q}; sycl_q.wait(); - auto lsd_view = make_view(lbm_sycl_data); - auto lsdm_view = make_view(lbm_sycl_macro_data); { auto eov = dev.copy_to_device(*lbm_data_ptr,lbm_sycl_data); if(eov.is_error()){ @@ -355,6 +353,9 @@ saw::error_or lbm_main(int argc, char** argv){ } } sycl_q.wait(); + auto lsd_view = make_view(lbm_sycl_data); + auto lsdm_view = make_view(lbm_sycl_macro_data); + saw::data time_steps{16u*4096ul}; auto& info_f = lsd_view.template get<"info">(); @@ -368,6 +369,7 @@ saw::error_or lbm_main(int argc, char** argv){ } } sycl_q.wait(); + /* if(i.get() % 32u == 0u){ { auto eov = dev.copy_to_host(lbm_sycl_macro_data,*lbm_macro_data_ptr); @@ -382,6 +384,7 @@ saw::error_or lbm_main(int argc, char** argv){ } } } + */ /* { { -- cgit v1.2.3 From 45ebf7411d687ab5530431ab1bcc74edb0499c69 Mon Sep 17 00:00:00 2001 From: "Claudius \"keldu\" Holeksa" Date: Wed, 22 Apr 2026 16:48:50 +0200 Subject: Working on equalizing poiseulle_particle channel methods --- examples/poiseulle_particles_2d_bgk_gpu/sim.cpp | 13 ++++++++----- examples/poiseulle_particles_2d_hlbm_gpu/sim.cpp | 16 ++++++++-------- examples/poiseulle_particles_2d_psm_gpu/sim.cpp | 12 ++++++------ lib/core/c++/write_vtk.hpp | 4 ++-- lib/sycl/c++/data.hpp | 10 +++++++--- 5 files changed, 31 insertions(+), 24 deletions(-) diff --git a/examples/poiseulle_particles_2d_bgk_gpu/sim.cpp b/examples/poiseulle_particles_2d_bgk_gpu/sim.cpp index 69e94c3..b8ac35e 100644 --- a/examples/poiseulle_particles_2d_bgk_gpu/sim.cpp +++ b/examples/poiseulle_particles_2d_bgk_gpu/sim.cpp @@ -183,11 +183,11 @@ saw::error_or step( component,encode::Sycl> flow_in{ [&](){ - uint64_t target_t_i = 64u; + uint64_t target_t_i = 32u; if(t_i.get() < target_t_i){ - return 1.0 + (0.0015 / target_t_i) * t_i.get(); + return 1.0 + (0.0002 / target_t_i) * t_i.get(); } - return 1.0015; + return 1.0002; }() }; component,encode::Sycl> flow_out{1.0}; @@ -212,6 +212,7 @@ saw::error_or step( break; case 3u: flow_in.apply(fields,index,t_i); + //equi.apply(fields,index,t_i); collision.apply(fields,macros,index,t_i); break; case 4u: @@ -266,7 +267,7 @@ saw::error_or lbm_main(int argc, char** argv){ {{1.0}} }; - print_lbm_meta(conv,{0.05},{0.01},{0.4 * dim_y}); + print_lbm_meta(conv,{0.1},{1e-4},{0.4 * dim_y}); // saw::data> meta{{dim_x,dim_y}}; auto lbm_data_ptr = saw::heap>>(); @@ -340,7 +341,8 @@ saw::error_or lbm_main(int argc, char** argv){ } } sycl_q.wait(); - { + /* + if(i.get() % 128u == 0u){ { auto eov = dev.copy_to_host(lbm_sycl_macro_data,*lbm_macro_data_ptr); if(eov.is_error()){ @@ -354,6 +356,7 @@ saw::error_or lbm_main(int argc, char** argv){ } } } + */ /* { { diff --git a/examples/poiseulle_particles_2d_hlbm_gpu/sim.cpp b/examples/poiseulle_particles_2d_hlbm_gpu/sim.cpp index df57bbc..7cfd567 100644 --- a/examples/poiseulle_particles_2d_hlbm_gpu/sim.cpp +++ b/examples/poiseulle_particles_2d_hlbm_gpu/sim.cpp @@ -191,9 +191,9 @@ saw::error_or step( [&](){ uint64_t target_t_i = 64u; if(t_i.get() < target_t_i){ - return 1.0 + (0.0015 / target_t_i) * t_i.get(); + return 1.0 + (0.0002 / target_t_i) * t_i.get(); } - return 1.0015; + return 1.0002; }() }; component,encode::Sycl> flow_out{1.0}; @@ -211,19 +211,19 @@ saw::error_or step( case 0u: break; case 1u: - abb.apply(fields,index,t_i); + bb.apply(fields,index,t_i); break; case 2u: collision.apply(fields,macros,index,t_i); break; case 3u: - //flow_in.apply(fields,index,t_i); - equi.apply(fields,index,t_i); + flow_in.apply(fields,index,t_i); + // equi.apply(fields,index,t_i); collision.apply(fields,macros,index,t_i); break; case 4u: - // flow_out.apply(fields,index,t_i); - equi.apply(fields,index,t_i); + flow_out.apply(fields,index,t_i); + // equi.apply(fields,index,t_i); collision.apply(fields,macros,index,t_i); break; default: @@ -274,7 +274,7 @@ saw::error_or lbm_main(int argc, char** argv){ {{1.0}} }; - print_lbm_meta(conv,{0.05},{0.01},{0.4 * dim_y}); + print_lbm_meta(conv,{0.1},{1e-4},{0.4 * dim_y}); // saw::data> meta{{dim_x,dim_y}}; auto lbm_data_ptr = saw::heap>>(); diff --git a/examples/poiseulle_particles_2d_psm_gpu/sim.cpp b/examples/poiseulle_particles_2d_psm_gpu/sim.cpp index 7768ca4..75d1a14 100644 --- a/examples/poiseulle_particles_2d_psm_gpu/sim.cpp +++ b/examples/poiseulle_particles_2d_psm_gpu/sim.cpp @@ -207,9 +207,9 @@ saw::error_or step( [&](){ uint64_t target_t_i = 64u; if(t_i.get() < target_t_i){ - return 1.0 + (0.0015 / target_t_i) * t_i.get(); + return 1.0 + (0.0002 / target_t_i) * t_i.get(); } - return 1.0015; + return 1.0002; }() }; component,encode::Sycl> flow_out{1.0}; @@ -227,7 +227,7 @@ saw::error_or step( case 0u: break; case 1u: - abb.apply(fields,index,t_i); + bb.apply(fields,index,t_i); break; case 2u: collision.apply(fields,macros,index,t_i); @@ -289,12 +289,12 @@ saw::error_or lbm_main(int argc, char** argv){ converter conv { // delta_x - {{1e-3}}, + {{1.0}}, // delta_t - {{1e-06}} + {{1.0}} }; - print_lbm_meta(conv,{0.1},{1e-2},{0.4 * dim_y *1e-3}); + print_lbm_meta(conv,{0.1},{1e-4},{0.4 * dim_y}); // saw::data> meta{{dim_x,dim_y}}; auto lbm_data_ptr = saw::heap>>(); diff --git a/lib/core/c++/write_vtk.hpp b/lib/core/c++/write_vtk.hpp index f925361..e852172 100644 --- a/lib/core/c++/write_vtk.hpp +++ b/lib/core/c++/write_vtk.hpp @@ -136,7 +136,7 @@ struct lbm_vtk_writer> { template struct lbm_vtk_writer> { static saw::error_or apply_header(std::ostream& vtk_file, std::string_view name){ - vtk_file<<"SCALARS "<, if constexpr (sizeof...(MemberT) > 0u){ // POINT DATA { - vtk_file << "POINT_DATA " << pd_size.get() <<"\n"; + vtk_file << "\nPOINT_DATA " << pd_size.get() <<"\n"; } // HEADER TO BODY diff --git a/lib/sycl/c++/data.hpp b/lib/sycl/c++/data.hpp index 71627de..9f43848 100644 --- a/lib/sycl/c++/data.hpp +++ b/lib/sycl/c++/data.hpp @@ -381,7 +381,7 @@ public: return data,Encode>{{Sides...}}; } - auto flat_data(){ + auto flat_data() const { return values_.flat_data(); } @@ -741,8 +741,10 @@ struct sycl_copy_helper, Encode> final { static_assert(sizeof(std::decay_t) == sizeof(std::decay_t), "Unequal size"); + auto flat_size = host_data.flat_size(); + q.submit([&](acpp::sycl::handler& h){ - h.copy(sycl_ptr,host_ptr, saw::ct_multiply::value); + h.copy(sycl_ptr,host_ptr, flat_size.get()); }).wait(); return saw::make_void(); } @@ -753,8 +755,10 @@ struct sycl_copy_helper, Encode> final { static_assert(sizeof(std::decay_t) == sizeof(std::decay_t), "Unequal size"); + auto flat_size = host_data.flat_size(); + q.submit([&](acpp::sycl::handler& h){ - h.copy(host_ptr,sycl_ptr, saw::ct_multiply::value); + h.copy(host_ptr,sycl_ptr, flat_size.get()); }).wait(); return saw::make_void(); } -- cgit v1.2.3